This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Changes.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
56953603
IZ
45#endif
46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
1aa99e6b
IH
119#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
125
51371543
GS
126static void restore_pos(pTHXo_ void *arg);
127
128
76e3520e 129STATIC CHECKPOINT
cea2e8a9 130S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 131{
3280af22 132 int retval = PL_savestack_ix;
b1ce53c5
JH
133#define REGCP_PAREN_ELEMS 4
134 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
135 int p;
136
b1ce53c5
JH
137#define REGCP_OTHER_ELEMS 5
138 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 139 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 140/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
141 SSPUSHINT(PL_regendp[p]);
142 SSPUSHINT(PL_regstartp[p]);
3280af22 143 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
144 SSPUSHINT(p);
145 }
b1ce53c5 146/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
147 SSPUSHINT(PL_regsize);
148 SSPUSHINT(*PL_reglastparen);
149 SSPUSHPTR(PL_reginput);
b1ce53c5
JH
150 SSPUSHINT(paren_elems_to_push + (REGCP_PAREN_ELEMS - 1));
151 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
a0d0e21e
LW
152 return retval;
153}
154
c277df42 155/* These are needed since we do not localize EVAL nodes: */
02db2b7b 156# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 157 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 158 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 159
02db2b7b 160# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 161 PerlIO_printf(Perl_debug_log, \
faccc32b 162 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 163 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 164
76e3520e 165STATIC char *
cea2e8a9 166S_regcppop(pTHX)
a0d0e21e 167{
b1ce53c5 168 I32 i;
a0d0e21e
LW
169 U32 paren = 0;
170 char *input;
cf93c79d 171 I32 tmps;
b1ce53c5
JH
172
173 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 174 i = SSPOPINT;
b1ce53c5
JH
175 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
176 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 177 input = (char *) SSPOPPTR;
3280af22
NIS
178 *PL_reglastparen = SSPOPINT;
179 PL_regsize = SSPOPINT;
b1ce53c5
JH
180
181 /* Now restore the parentheses context. */
182 for (i -= (REGCP_PAREN_ELEMS - 1); i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 183 paren = (U32)SSPOPINT;
3280af22 184 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
185 PL_regstartp[paren] = SSPOPINT;
186 tmps = SSPOPINT;
3280af22
NIS
187 if (paren <= *PL_reglastparen)
188 PL_regendp[paren] = tmps;
c277df42 189 DEBUG_r(
c3464db5 190 PerlIO_printf(Perl_debug_log,
b900a521
JH
191 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
192 (UV)paren, (IV)PL_regstartp[paren],
193 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
194 (IV)PL_regendp[paren],
3280af22 195 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 196 );
a0d0e21e 197 }
c277df42 198 DEBUG_r(
3280af22 199 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 200 PerlIO_printf(Perl_debug_log,
faccc32b
JH
201 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
202 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
203 }
204 );
daf18116 205#if 1
dafc8851
JH
206 /* It would seem that the similar code in regtry()
207 * already takes care of this, and in fact it is in
208 * a better location to since this code can #if 0-ed out
209 * but the code in regtry() is needed or otherwise tests
210 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
211 * (as of patchlevel 7877) will fail. Then again,
212 * this code seems to be necessary or otherwise
213 * building DynaLoader will fail:
214 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
215 * --jhi */
3280af22
NIS
216 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
217 if (paren > PL_regsize)
cf93c79d
IZ
218 PL_regstartp[paren] = -1;
219 PL_regendp[paren] = -1;
a0d0e21e 220 }
dafc8851 221#endif
a0d0e21e
LW
222 return input;
223}
224
0f5d15d6 225STATIC char *
cea2e8a9 226S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
227{
228 I32 tmp = PL_savestack_ix;
229
230 PL_savestack_ix = ss;
231 regcppop();
232 PL_savestack_ix = tmp;
942e002e 233 return Nullch;
0f5d15d6
IZ
234}
235
236typedef struct re_cc_state
237{
238 I32 ss;
239 regnode *node;
240 struct re_cc_state *prev;
241 CURCUR *cc;
242 regexp *re;
243} re_cc_state;
244
02db2b7b 245#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 246
29d1e993
HS
247#define TRYPAREN(paren, n, input) { \
248 if (paren) { \
249 if (n) { \
250 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
251 PL_regendp[paren] = input - PL_bostr; \
252 } \
253 else \
254 PL_regendp[paren] = -1; \
255 } \
256 if (regmatch(next)) \
257 sayYES; \
258 if (paren && n) \
259 PL_regendp[paren] = -1; \
260}
261
262
a687059c 263/*
e50aee73 264 * pregexec and friends
a687059c
LW
265 */
266
267/*
c277df42 268 - pregexec - match a regexp against a string
a687059c 269 */
c277df42 270I32
864dbfa3 271Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 272 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
273/* strend: pointer to null at end of string */
274/* strbeg: real beginning of string */
275/* minend: end of match must be >=minend after stringarg. */
276/* nosave: For optimizations. */
277{
278 return
279 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
280 nosave ? 0 : REXEC_COPY_STR);
281}
0f5d15d6
IZ
282
283STATIC void
cea2e8a9 284S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
285{
286 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
287#ifdef DEBUGGING
288 PL_regprogram = prog->program;
289#endif
290 PL_regnpar = prog->nparens;
291 PL_regdata = prog->data;
292 PL_reg_re = prog;
293}
22e551b9 294
cad2e5aa
JH
295/*
296 * Need to implement the following flags for reg_anch:
297 *
298 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
299 * USE_INTUIT_ML
300 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
301 * INTUIT_AUTORITATIVE_ML
302 * INTUIT_ONCE_NOML - Intuit can match in one location only.
303 * INTUIT_ONCE_ML
304 *
305 * Another flag for this function: SECOND_TIME (so that float substrs
306 * with giant delta may be not rechecked).
307 */
308
309/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
310
2c2d71f5 311/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
312 Otherwise, only SvCUR(sv) is used to get strbeg. */
313
314/* XXXX We assume that strpos is strbeg unless sv. */
315
6eb5f6b9
JH
316/* XXXX Some places assume that there is a fixed substring.
317 An update may be needed if optimizer marks as "INTUITable"
318 RExen without fixed substrings. Similarly, it is assumed that
319 lengths of all the strings are no more than minlen, thus they
320 cannot come from lookahead.
321 (Or minlen should take into account lookahead.) */
322
2c2d71f5
JH
323/* A failure to find a constant substring means that there is no need to make
324 an expensive call to REx engine, thus we celebrate a failure. Similarly,
325 finding a substring too deep into the string means that less calls to
30944b6d
IZ
326 regtry() should be needed.
327
328 REx compiler's optimizer found 4 possible hints:
329 a) Anchored substring;
330 b) Fixed substring;
331 c) Whether we are anchored (beginning-of-line or \G);
332 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 333 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
334 string which does not contradict any of them.
335 */
2c2d71f5 336
6eb5f6b9
JH
337/* Most of decisions we do here should have been done at compile time.
338 The nodes of the REx which we used for the search should have been
339 deleted from the finite automaton. */
340
cad2e5aa
JH
341char *
342Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
343 char *strend, U32 flags, re_scream_pos_data *data)
344{
2c2d71f5 345 register I32 start_shift;
cad2e5aa 346 /* Should be nonnegative! */
2c2d71f5
JH
347 register I32 end_shift;
348 register char *s;
349 register SV *check;
a1933d95 350 char *strbeg;
cad2e5aa
JH
351 char *t;
352 I32 ml_anch;
2c2d71f5 353 char *tmp;
6eb5f6b9
JH
354 register char *other_last = Nullch; /* other substr checked before this */
355 char *check_at; /* check substr found at this pos */
30944b6d
IZ
356#ifdef DEBUGGING
357 char *i_strpos = strpos;
358#endif
cad2e5aa
JH
359
360 DEBUG_r( if (!PL_colorset) reginitcolors() );
361 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 362 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
363 PL_colors[4],PL_colors[5],PL_colors[0],
364 prog->precomp,
365 PL_colors[1],
366 (strlen(prog->precomp) > 60 ? "..." : ""),
367 PL_colors[0],
b900a521 368 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
369 strpos, PL_colors[1],
370 (strend - strpos > 60 ? "..." : ""))
371 );
372
1aa99e6b
IH
373 if (prog->reganch & ROPT_UTF8)
374 PL_reg_flags |= RF_utf8;
375
376 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 377 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 378 goto fail;
2c2d71f5 379 }
a1933d95 380 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 381 PL_regeol = strend;
653099ff 382 check = prog->check_substr;
2c2d71f5 383 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
384 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
385 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 386 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 387
5675c1a6
IZ
388 if (!ml_anch) {
389 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
390 /* SvCUR is not set on references: SvRV and SvPVX overlap */
391 && sv && !SvROK(sv)
a1933d95 392 && (strpos != strbeg)) {
5675c1a6
IZ
393 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
394 goto fail;
395 }
396 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 397 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
398 I32 slen;
399
1aa99e6b 400 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
401 if (SvTAIL(check)) {
402 slen = SvCUR(check); /* >= 1 */
cad2e5aa 403
2c2d71f5
JH
404 if ( strend - s > slen || strend - s < slen - 1
405 || (strend - s == slen && strend[-1] != '\n')) {
406 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
407 goto fail_finish;
cad2e5aa
JH
408 }
409 /* Now should match s[0..slen-2] */
410 slen--;
653099ff 411 if (slen && (*SvPVX(check) != *s
cad2e5aa 412 || (slen > 1
653099ff 413 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
414 report_neq:
415 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
416 goto fail_finish;
417 }
cad2e5aa 418 }
653099ff
GS
419 else if (*SvPVX(check) != *s
420 || ((slen = SvCUR(check)) > 1
421 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
422 goto report_neq;
423 goto success_at_start;
5675c1a6 424 }
cad2e5aa 425 }
2c2d71f5 426 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 427 s = strpos;
2c2d71f5 428 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 429 end_shift = prog->minlen - start_shift -
653099ff 430 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 431 if (!ml_anch) {
653099ff
GS
432 I32 end = prog->check_offset_max + CHR_SVLEN(check)
433 - (SvTAIL(check) != 0);
1aa99e6b 434 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
435
436 if (end_shift < eshift)
437 end_shift = eshift;
438 }
cad2e5aa 439 }
2c2d71f5 440 else { /* Can match at random position */
cad2e5aa
JH
441 ml_anch = 0;
442 s = strpos;
2c2d71f5
JH
443 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
444 /* Should be nonnegative! */
445 end_shift = prog->minlen - start_shift -
653099ff 446 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
447 }
448
2c2d71f5 449#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 450 if (end_shift < 0)
6bbae5e6 451 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
452#endif
453
2c2d71f5
JH
454 restart:
455 /* Find a possible match in the region s..strend by looking for
456 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 457 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
458 I32 p = -1; /* Internal iterator of scream. */
459 I32 *pp = data ? data->scream_pos : &p;
460
2c2d71f5
JH
461 if (PL_screamfirst[BmRARE(check)] >= 0
462 || ( BmRARE(check) == '\n'
463 && (BmPREVIOUS(check) == SvCUR(check) - 1)
464 && SvTAIL(check) ))
465 s = screaminstr(sv, check,
466 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 467 else
2c2d71f5 468 goto fail_finish;
cad2e5aa
JH
469 if (data)
470 *data->scream_olds = s;
471 }
472 else
1aa99e6b
IH
473 s = fbm_instr(HOP3(s, start_shift, strend),
474 HOP3(strend, -end_shift, strbeg),
2c2d71f5 475 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
476
477 /* Update the count-of-usability, remove useless subpatterns,
478 unshift s. */
2c2d71f5
JH
479
480 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
481 (s ? "Found" : "Did not find"),
482 ((check == prog->anchored_substr) ? "anchored" : "floating"),
483 PL_colors[0],
7b0972df
JH
484 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
485 SvPVX(check),
2c2d71f5
JH
486 PL_colors[1], (SvTAIL(check) ? "$" : ""),
487 (s ? " at offset " : "...\n") ) );
488
489 if (!s)
490 goto fail_finish;
491
6eb5f6b9
JH
492 check_at = s;
493
2c2d71f5 494 /* Finish the diagnostic message */
30944b6d 495 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
496
497 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
498 Start with the other substr.
499 XXXX no SCREAM optimization yet - and a very coarse implementation
500 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
501 *always* match. Probably should be marked during compile...
502 Probably it is right to do no SCREAM here...
503 */
504
505 if (prog->float_substr && prog->anchored_substr) {
30944b6d 506 /* Take into account the "other" substring. */
2c2d71f5
JH
507 /* XXXX May be hopelessly wrong for UTF... */
508 if (!other_last)
6eb5f6b9 509 other_last = strpos;
2c2d71f5 510 if (check == prog->float_substr) {
30944b6d
IZ
511 do_other_anchored:
512 {
1aa99e6b 513 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
514 char *s1 = s;
515
2c2d71f5
JH
516 t = s - prog->check_offset_max;
517 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
518 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 519 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 520 && t > strpos)))
30944b6d 521 /* EMPTY */;
2c2d71f5
JH
522 else
523 t = strpos;
1aa99e6b 524 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
525 if (t < other_last) /* These positions already checked */
526 t = other_last;
1aa99e6b 527 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
528 if (last < last1)
529 last1 = last;
530 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
531 /* On end-of-str: see comment below. */
532 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
533 HOP3(HOP3(last1, prog->anchored_offset, strend)
534 + SvCUR(prog->anchored_substr),
535 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
536 prog->anchored_substr,
537 PL_multiline ? FBMrf_MULTILINE : 0);
538 DEBUG_r(PerlIO_printf(Perl_debug_log,
539 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
540 (s ? "Found" : "Contradicts"),
541 PL_colors[0],
7b0972df
JH
542 (int)(SvCUR(prog->anchored_substr)
543 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
544 SvPVX(prog->anchored_substr),
545 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
546 if (!s) {
547 if (last1 >= last2) {
548 DEBUG_r(PerlIO_printf(Perl_debug_log,
549 ", giving up...\n"));
550 goto fail_finish;
551 }
552 DEBUG_r(PerlIO_printf(Perl_debug_log,
553 ", trying floating at offset %ld...\n",
1aa99e6b
IH
554 (long)(HOP3c(s1, 1, strend) - i_strpos)));
555 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
556 s = HOP3c(last, 1, strend);
2c2d71f5
JH
557 goto restart;
558 }
559 else {
560 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 561 (long)(s - i_strpos)));
1aa99e6b
IH
562 t = HOP3c(s, -prog->anchored_offset, strbeg);
563 other_last = HOP3c(s, 1, strend);
30944b6d 564 s = s1;
2c2d71f5
JH
565 if (t == strpos)
566 goto try_at_start;
2c2d71f5
JH
567 goto try_at_offset;
568 }
30944b6d 569 }
2c2d71f5
JH
570 }
571 else { /* Take into account the floating substring. */
572 char *last, *last1;
573 char *s1 = s;
574
1aa99e6b
IH
575 t = HOP3c(s, -start_shift, strbeg);
576 last1 = last =
577 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
578 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
579 last = HOP3c(t, prog->float_max_offset, strend);
580 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
581 if (s < other_last)
582 s = other_last;
2c2d71f5
JH
583 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
584 /* fbm_instr() takes into account exact value of end-of-str
585 if the check is SvTAIL(ed). Since false positives are OK,
586 and end-of-str is not later than strend we are OK. */
587 s = fbm_instr((unsigned char*)s,
588 (unsigned char*)last + SvCUR(prog->float_substr)
589 - (SvTAIL(prog->float_substr)!=0),
590 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
591 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
592 (s ? "Found" : "Contradicts"),
593 PL_colors[0],
7b0972df
JH
594 (int)(SvCUR(prog->float_substr)
595 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
596 SvPVX(prog->float_substr),
597 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
598 if (!s) {
599 if (last1 == last) {
600 DEBUG_r(PerlIO_printf(Perl_debug_log,
601 ", giving up...\n"));
602 goto fail_finish;
603 }
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", trying anchored starting at offset %ld...\n",
30944b6d 606 (long)(s1 + 1 - i_strpos)));
803ff556 607 other_last = last;
1aa99e6b 608 s = HOP3c(t, 1, strend);
2c2d71f5
JH
609 goto restart;
610 }
611 else {
612 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 613 (long)(s - i_strpos)));
803ff556 614 other_last = s; /* Fix this later. --Hugo */
30944b6d 615 s = s1;
2c2d71f5
JH
616 if (t == strpos)
617 goto try_at_start;
2c2d71f5
JH
618 goto try_at_offset;
619 }
620 }
cad2e5aa 621 }
2c2d71f5
JH
622
623 t = s - prog->check_offset_max;
2c2d71f5
JH
624 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
625 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
626 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
627 && t > strpos))) {
2c2d71f5
JH
628 /* Fixed substring is found far enough so that the match
629 cannot start at strpos. */
630 try_at_offset:
cad2e5aa 631 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
632 /* Eventually fbm_*() should handle this, but often
633 anchored_offset is not 0, so this check will not be wasted. */
634 /* XXXX In the code below we prefer to look for "^" even in
635 presence of anchored substrings. And we search even
636 beyond the found float position. These pessimizations
637 are historical artefacts only. */
638 find_anchor:
2c2d71f5 639 while (t < strend - prog->minlen) {
cad2e5aa 640 if (*t == '\n') {
4ee3650e 641 if (t < check_at - prog->check_offset_min) {
30944b6d 642 if (prog->anchored_substr) {
4ee3650e
GS
643 /* Since we moved from the found position,
644 we definitely contradict the found anchored
30944b6d
IZ
645 substr. Due to the above check we do not
646 contradict "check" substr.
647 Thus we can arrive here only if check substr
648 is float. Redo checking for "other"=="fixed".
649 */
650 strpos = t + 1;
651 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
652 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
653 goto do_other_anchored;
654 }
4ee3650e
GS
655 /* We don't contradict the found floating substring. */
656 /* XXXX Why not check for STCLASS? */
cad2e5aa 657 s = t + 1;
2c2d71f5 658 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 659 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
660 goto set_useful;
661 }
4ee3650e
GS
662 /* Position contradicts check-string */
663 /* XXXX probably better to look for check-string
664 than for "\n", so one should lower the limit for t? */
665 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 666 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 667 other_last = strpos = s = t + 1;
cad2e5aa
JH
668 goto restart;
669 }
670 t++;
671 }
2c2d71f5
JH
672 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
673 PL_colors[0],PL_colors[1]));
674 goto fail_finish;
cad2e5aa 675 }
f5952150
GS
676 else {
677 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
678 PL_colors[0],PL_colors[1]));
679 }
cad2e5aa
JH
680 s = t;
681 set_useful:
2c2d71f5 682 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
683 }
684 else {
f5952150 685 /* The found string does not prohibit matching at strpos,
2c2d71f5 686 - no optimization of calling REx engine can be performed,
f5952150
GS
687 unless it was an MBOL and we are not after MBOL,
688 or a future STCLASS check will fail this. */
2c2d71f5
JH
689 try_at_start:
690 /* Even in this situation we may use MBOL flag if strpos is offset
691 wrt the start of the string. */
05b4157f 692 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 693 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
694 /* May be due to an implicit anchor of m{.*foo} */
695 && !(prog->reganch & ROPT_IMPLICIT))
696 {
cad2e5aa
JH
697 t = strpos;
698 goto find_anchor;
699 }
30944b6d 700 DEBUG_r( if (ml_anch)
f5952150
GS
701 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
702 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 703 );
2c2d71f5 704 success_at_start:
30944b6d 705 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 706 && prog->check_substr /* Could be deleted already */
cad2e5aa 707 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
708 && prog->check_substr == prog->float_substr)
709 {
cad2e5aa 710 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 711 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
712 SvREFCNT_dec(prog->check_substr);
713 prog->check_substr = Nullsv; /* disable */
714 prog->float_substr = Nullsv; /* clear */
5e39e1e5 715 check = Nullsv; /* abort */
cad2e5aa 716 s = strpos;
3cf5c195
IZ
717 /* XXXX This is a remnant of the old implementation. It
718 looks wasteful, since now INTUIT can use many
6eb5f6b9 719 other heuristics. */
cad2e5aa
JH
720 prog->reganch &= ~RE_USE_INTUIT;
721 }
722 else
723 s = strpos;
724 }
725
6eb5f6b9
JH
726 /* Last resort... */
727 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
728 if (prog->regstclass) {
729 /* minlen == 0 is possible if regstclass is \b or \B,
730 and the fixed substr is ''$.
731 Since minlen is already taken into account, s+1 is before strend;
732 accidentally, minlen >= 1 guaranties no false positives at s + 1
733 even for \b or \B. But (minlen? 1 : 0) below assumes that
734 regstclass does not come from lookahead... */
735 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
736 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 737 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 738 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 739 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 740 : 1);
6eb5f6b9 741 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
742 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
743 : (prog->float_substr
744 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
745 cl_l, strend)
746 : strend);
a1933d95 747 char *startpos = strbeg;
6eb5f6b9
JH
748
749 t = s;
76384e4a 750 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 751 PL_regdata = prog->data;
76384e4a
GS
752 PL_bostr = startpos;
753 }
6eb5f6b9
JH
754 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
755 if (!s) {
756#ifdef DEBUGGING
757 char *what;
758#endif
759 if (endpos == strend) {
760 DEBUG_r( PerlIO_printf(Perl_debug_log,
761 "Could not match STCLASS...\n") );
762 goto fail;
763 }
66e933ab
GS
764 DEBUG_r( PerlIO_printf(Perl_debug_log,
765 "This position contradicts STCLASS...\n") );
653099ff
GS
766 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
767 goto fail;
6eb5f6b9
JH
768 /* Contradict one of substrings */
769 if (prog->anchored_substr) {
6eb5f6b9
JH
770 if (prog->anchored_substr == check) {
771 DEBUG_r( what = "anchored" );
772 hop_and_restart:
1aa99e6b 773 s = HOP3c(t, 1, strend);
66e933ab
GS
774 if (s + start_shift + end_shift > strend) {
775 /* XXXX Should be taken into account earlier? */
776 DEBUG_r( PerlIO_printf(Perl_debug_log,
777 "Could not match STCLASS...\n") );
778 goto fail;
779 }
5e39e1e5
HS
780 if (!check)
781 goto giveup;
6eb5f6b9 782 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 783 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
784 what, (long)(s + start_shift - i_strpos)) );
785 goto restart;
786 }
66e933ab 787 /* Have both, check_string is floating */
6eb5f6b9
JH
788 if (t + start_shift >= check_at) /* Contradicts floating=check */
789 goto retry_floating_check;
790 /* Recheck anchored substring, but not floating... */
791 s = check_at;
5e39e1e5
HS
792 if (!check)
793 goto giveup;
6eb5f6b9 794 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 795 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
796 (long)(other_last - i_strpos)) );
797 goto do_other_anchored;
798 }
60e71179
GS
799 /* Another way we could have checked stclass at the
800 current position only: */
801 if (ml_anch) {
802 s = t = t + 1;
5e39e1e5
HS
803 if (!check)
804 goto giveup;
60e71179 805 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
806 "Looking for /%s^%s/m starting at offset %ld...\n",
807 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 808 goto try_at_offset;
66e933ab 809 }
60e71179
GS
810 if (!prog->float_substr) /* Could have been deleted */
811 goto fail;
6eb5f6b9
JH
812 /* Check is floating subtring. */
813 retry_floating_check:
814 t = check_at - start_shift;
815 DEBUG_r( what = "floating" );
816 goto hop_and_restart;
817 }
818 DEBUG_r( if (t != s)
819 PerlIO_printf(Perl_debug_log,
820 "By STCLASS: moving %ld --> %ld\n",
821 (long)(t - i_strpos), (long)(s - i_strpos));
822 else
823 PerlIO_printf(Perl_debug_log,
824 "Does not contradict STCLASS...\n") );
825 }
5e39e1e5
HS
826 giveup:
827 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
828 PL_colors[4], (check ? "Guessed" : "Giving up"),
829 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 830 return s;
2c2d71f5
JH
831
832 fail_finish: /* Substring not found */
66e933ab
GS
833 if (prog->check_substr) /* could be removed already */
834 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 835 fail:
2c2d71f5 836 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
837 PL_colors[4],PL_colors[5]));
838 return Nullch;
839}
9661b544 840
6eb5f6b9 841/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
842STATIC char *
843S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 844{
6eb5f6b9
JH
845 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
846 char *m;
d8093b23
G
847 STRLEN ln;
848 unsigned int c1;
849 unsigned int c2;
6eb5f6b9
JH
850 char *e;
851 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 852 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 853
6eb5f6b9
JH
854 /* We know what class it must start with. */
855 switch (OP(c)) {
6eb5f6b9 856 case ANYOF:
a0ed51b3 857 while (s < strend) {
ffc61ed2 858 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
859 if (tmp && (norun || regtry(prog, s)))
860 goto got_it;
861 else
862 tmp = doevery;
a0ed51b3 863 }
6eb5f6b9
JH
864 else
865 tmp = 1;
ffc61ed2 866 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 867 }
6eb5f6b9
JH
868 break;
869 case EXACTF:
870 m = STRING(c);
871 ln = STR_LEN(c);
1aa99e6b
IH
872 if (UTF) {
873 c1 = to_utf8_lower((U8*)m);
874 c2 = to_utf8_upper((U8*)m);
875 }
876 else {
877 c1 = *(U8*)m;
878 c2 = PL_fold[c1];
879 }
6eb5f6b9
JH
880 goto do_exactf;
881 case EXACTFL:
882 m = STRING(c);
883 ln = STR_LEN(c);
d8093b23 884 c1 = *(U8*)m;
6eb5f6b9
JH
885 c2 = PL_fold_locale[c1];
886 do_exactf:
887 e = strend - ln;
b3c9acc1 888
6eb5f6b9
JH
889 if (norun && e < s)
890 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
891
892 if (do_utf8) {
893 STRLEN len;
894 if (c1 == c2)
895 while (s <= e) {
896 if ( utf8_to_uv_simple((U8*)s, &len) == c1
897 && regtry(prog, s) )
898 goto got_it;
899 s += len;
900 }
901 else
902 while (s <= e) {
903 UV c = utf8_to_uv_simple((U8*)s, &len);
904 if ( (c == c1 || c == c2) && regtry(prog, s) )
905 goto got_it;
906 s += len;
907 }
908 }
909 else {
910 if (c1 == c2)
911 while (s <= e) {
912 if ( *(U8*)s == c1
913 && (ln == 1 || !(OP(c) == EXACTF
914 ? ibcmp(s, m, ln)
915 : ibcmp_locale(s, m, ln)))
916 && (norun || regtry(prog, s)) )
917 goto got_it;
918 s++;
919 }
920 else
921 while (s <= e) {
922 if ( (*(U8*)s == c1 || *(U8*)s == c2)
923 && (ln == 1 || !(OP(c) == EXACTF
924 ? ibcmp(s, m, ln)
925 : ibcmp_locale(s, m, ln)))
926 && (norun || regtry(prog, s)) )
927 goto got_it;
928 s++;
929 }
b3c9acc1
IZ
930 }
931 break;
bbce6d69 932 case BOUNDL:
3280af22 933 PL_reg_flags |= RF_tainted;
bbce6d69 934 /* FALL THROUGH */
a0d0e21e 935 case BOUND:
ffc61ed2
JH
936 if (do_utf8) {
937 if (s == startpos)
938 tmp = '\n';
939 else {
1aa99e6b 940 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
ffc61ed2
JH
941
942 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
943 }
944 tmp = ((OP(c) == BOUND ?
945 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
946 while (s < strend) {
947 if (tmp == !(OP(c) == BOUND ?
948 swash_fetch(PL_utf8_alnum, (U8*)s) :
949 isALNUM_LC_utf8((U8*)s)))
950 {
951 tmp = !tmp;
952 if ((norun || regtry(prog, s)))
953 goto got_it;
954 }
955 s += UTF8SKIP(s);
a687059c 956 }
a0d0e21e 957 }
667bb95a 958 else {
ffc61ed2
JH
959 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
960 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
961 while (s < strend) {
962 if (tmp ==
963 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
964 tmp = !tmp;
965 if ((norun || regtry(prog, s)))
966 goto got_it;
967 }
968 s++;
a0ed51b3 969 }
a0ed51b3 970 }
6eb5f6b9 971 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
972 goto got_it;
973 break;
bbce6d69 974 case NBOUNDL:
3280af22 975 PL_reg_flags |= RF_tainted;
bbce6d69 976 /* FALL THROUGH */
a0d0e21e 977 case NBOUND:
ffc61ed2
JH
978 if (do_utf8) {
979 if (s == startpos)
980 tmp = '\n';
981 else {
1aa99e6b 982 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
ffc61ed2
JH
983
984 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
985 }
986 tmp = ((OP(c) == NBOUND ?
987 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
988 while (s < strend) {
989 if (tmp == !(OP(c) == NBOUND ?
990 swash_fetch(PL_utf8_alnum, (U8*)s) :
991 isALNUM_LC_utf8((U8*)s)))
992 tmp = !tmp;
993 else if ((norun || regtry(prog, s)))
994 goto got_it;
995 s += UTF8SKIP(s);
996 }
a0d0e21e 997 }
667bb95a 998 else {
ffc61ed2
JH
999 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
1000 tmp = ((OP(c) == NBOUND ?
1001 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1002 while (s < strend) {
1003 if (tmp ==
1004 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1005 tmp = !tmp;
1006 else if ((norun || regtry(prog, s)))
1007 goto got_it;
1008 s++;
1009 }
a0ed51b3 1010 }
6eb5f6b9 1011 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1012 goto got_it;
1013 break;
a0d0e21e 1014 case ALNUM:
ffc61ed2
JH
1015 if (do_utf8) {
1016 while (s < strend) {
1017 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1018 if (tmp && (norun || regtry(prog, s)))
1019 goto got_it;
1020 else
1021 tmp = doevery;
1022 }
bbce6d69 1023 else
ffc61ed2
JH
1024 tmp = 1;
1025 s += UTF8SKIP(s);
bbce6d69 1026 }
bbce6d69 1027 }
ffc61ed2
JH
1028 else {
1029 while (s < strend) {
1030 if (isALNUM(*s)) {
1031 if (tmp && (norun || regtry(prog, s)))
1032 goto got_it;
1033 else
1034 tmp = doevery;
1035 }
a0ed51b3 1036 else
ffc61ed2
JH
1037 tmp = 1;
1038 s++;
a0ed51b3 1039 }
a0ed51b3
LW
1040 }
1041 break;
bbce6d69 1042 case ALNUML:
3280af22 1043 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1044 if (do_utf8) {
1045 while (s < strend) {
1046 if (isALNUM_LC_utf8((U8*)s)) {
1047 if (tmp && (norun || regtry(prog, s)))
1048 goto got_it;
1049 else
1050 tmp = doevery;
1051 }
a687059c 1052 else
ffc61ed2
JH
1053 tmp = 1;
1054 s += UTF8SKIP(s);
a0d0e21e 1055 }
a0d0e21e 1056 }
ffc61ed2
JH
1057 else {
1058 while (s < strend) {
1059 if (isALNUM_LC(*s)) {
1060 if (tmp && (norun || regtry(prog, s)))
1061 goto got_it;
1062 else
1063 tmp = doevery;
1064 }
a0ed51b3 1065 else
ffc61ed2
JH
1066 tmp = 1;
1067 s++;
a0ed51b3 1068 }
a0ed51b3
LW
1069 }
1070 break;
a0d0e21e 1071 case NALNUM:
ffc61ed2
JH
1072 if (do_utf8) {
1073 while (s < strend) {
1074 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1075 if (tmp && (norun || regtry(prog, s)))
1076 goto got_it;
1077 else
1078 tmp = doevery;
1079 }
bbce6d69 1080 else
ffc61ed2
JH
1081 tmp = 1;
1082 s += UTF8SKIP(s);
bbce6d69 1083 }
bbce6d69 1084 }
ffc61ed2
JH
1085 else {
1086 while (s < strend) {
1087 if (!isALNUM(*s)) {
1088 if (tmp && (norun || regtry(prog, s)))
1089 goto got_it;
1090 else
1091 tmp = doevery;
1092 }
a0ed51b3 1093 else
ffc61ed2
JH
1094 tmp = 1;
1095 s++;
a0ed51b3 1096 }
a0ed51b3
LW
1097 }
1098 break;
bbce6d69 1099 case NALNUML:
3280af22 1100 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1101 if (do_utf8) {
1102 while (s < strend) {
1103 if (!isALNUM_LC_utf8((U8*)s)) {
1104 if (tmp && (norun || regtry(prog, s)))
1105 goto got_it;
1106 else
1107 tmp = doevery;
1108 }
a687059c 1109 else
ffc61ed2
JH
1110 tmp = 1;
1111 s += UTF8SKIP(s);
a687059c 1112 }
a0d0e21e 1113 }
ffc61ed2
JH
1114 else {
1115 while (s < strend) {
1116 if (!isALNUM_LC(*s)) {
1117 if (tmp && (norun || regtry(prog, s)))
1118 goto got_it;
1119 else
1120 tmp = doevery;
1121 }
a0ed51b3 1122 else
ffc61ed2
JH
1123 tmp = 1;
1124 s++;
a0ed51b3 1125 }
a0ed51b3
LW
1126 }
1127 break;
a0d0e21e 1128 case SPACE:
ffc61ed2
JH
1129 if (do_utf8) {
1130 while (s < strend) {
1131 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1132 if (tmp && (norun || regtry(prog, s)))
1133 goto got_it;
1134 else
1135 tmp = doevery;
1136 }
a0d0e21e 1137 else
ffc61ed2
JH
1138 tmp = 1;
1139 s += UTF8SKIP(s);
2304df62 1140 }
a0d0e21e 1141 }
ffc61ed2
JH
1142 else {
1143 while (s < strend) {
1144 if (isSPACE(*s)) {
1145 if (tmp && (norun || regtry(prog, s)))
1146 goto got_it;
1147 else
1148 tmp = doevery;
1149 }
a0ed51b3 1150 else
ffc61ed2
JH
1151 tmp = 1;
1152 s++;
a0ed51b3 1153 }
a0ed51b3
LW
1154 }
1155 break;
bbce6d69 1156 case SPACEL:
3280af22 1157 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1158 if (do_utf8) {
1159 while (s < strend) {
1160 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
bbce6d69 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s += UTF8SKIP(s);
bbce6d69 1169 }
bbce6d69 1170 }
ffc61ed2
JH
1171 else {
1172 while (s < strend) {
1173 if (isSPACE_LC(*s)) {
1174 if (tmp && (norun || regtry(prog, s)))
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
a0ed51b3 1179 else
ffc61ed2
JH
1180 tmp = 1;
1181 s++;
a0ed51b3 1182 }
a0ed51b3
LW
1183 }
1184 break;
a0d0e21e 1185 case NSPACE:
ffc61ed2
JH
1186 if (do_utf8) {
1187 while (s < strend) {
1188 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1189 if (tmp && (norun || regtry(prog, s)))
1190 goto got_it;
1191 else
1192 tmp = doevery;
1193 }
a0d0e21e 1194 else
ffc61ed2
JH
1195 tmp = 1;
1196 s += UTF8SKIP(s);
a687059c 1197 }
a0d0e21e 1198 }
ffc61ed2
JH
1199 else {
1200 while (s < strend) {
1201 if (!isSPACE(*s)) {
1202 if (tmp && (norun || regtry(prog, s)))
1203 goto got_it;
1204 else
1205 tmp = doevery;
1206 }
a0ed51b3 1207 else
ffc61ed2
JH
1208 tmp = 1;
1209 s++;
a0ed51b3 1210 }
a0ed51b3
LW
1211 }
1212 break;
bbce6d69 1213 case NSPACEL:
3280af22 1214 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1215 if (do_utf8) {
1216 while (s < strend) {
1217 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1218 if (tmp && (norun || regtry(prog, s)))
1219 goto got_it;
1220 else
1221 tmp = doevery;
1222 }
bbce6d69 1223 else
ffc61ed2
JH
1224 tmp = 1;
1225 s += UTF8SKIP(s);
bbce6d69 1226 }
bbce6d69 1227 }
ffc61ed2
JH
1228 else {
1229 while (s < strend) {
1230 if (!isSPACE_LC(*s)) {
1231 if (tmp && (norun || regtry(prog, s)))
1232 goto got_it;
1233 else
1234 tmp = doevery;
1235 }
a0ed51b3 1236 else
ffc61ed2
JH
1237 tmp = 1;
1238 s++;
a0ed51b3 1239 }
a0ed51b3
LW
1240 }
1241 break;
a0d0e21e 1242 case DIGIT:
ffc61ed2
JH
1243 if (do_utf8) {
1244 while (s < strend) {
1245 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1246 if (tmp && (norun || regtry(prog, s)))
1247 goto got_it;
1248 else
1249 tmp = doevery;
1250 }
a0d0e21e 1251 else
ffc61ed2
JH
1252 tmp = 1;
1253 s += UTF8SKIP(s);
2b69d0c2 1254 }
a0d0e21e 1255 }
ffc61ed2
JH
1256 else {
1257 while (s < strend) {
1258 if (isDIGIT(*s)) {
1259 if (tmp && (norun || regtry(prog, s)))
1260 goto got_it;
1261 else
1262 tmp = doevery;
1263 }
a0ed51b3 1264 else
ffc61ed2
JH
1265 tmp = 1;
1266 s++;
a0ed51b3 1267 }
a0ed51b3
LW
1268 }
1269 break;
b8c5462f
JH
1270 case DIGITL:
1271 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1272 if (do_utf8) {
1273 while (s < strend) {
1274 if (isDIGIT_LC_utf8((U8*)s)) {
1275 if (tmp && (norun || regtry(prog, s)))
1276 goto got_it;
1277 else
1278 tmp = doevery;
1279 }
b8c5462f 1280 else
ffc61ed2
JH
1281 tmp = 1;
1282 s += UTF8SKIP(s);
b8c5462f 1283 }
b8c5462f 1284 }
ffc61ed2
JH
1285 else {
1286 while (s < strend) {
1287 if (isDIGIT_LC(*s)) {
1288 if (tmp && (norun || regtry(prog, s)))
1289 goto got_it;
1290 else
1291 tmp = doevery;
1292 }
b8c5462f 1293 else
ffc61ed2
JH
1294 tmp = 1;
1295 s++;
b8c5462f 1296 }
b8c5462f
JH
1297 }
1298 break;
a0d0e21e 1299 case NDIGIT:
ffc61ed2
JH
1300 if (do_utf8) {
1301 while (s < strend) {
1302 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1303 if (tmp && (norun || regtry(prog, s)))
1304 goto got_it;
1305 else
1306 tmp = doevery;
1307 }
a0d0e21e 1308 else
ffc61ed2
JH
1309 tmp = 1;
1310 s += UTF8SKIP(s);
a687059c 1311 }
a0d0e21e 1312 }
ffc61ed2
JH
1313 else {
1314 while (s < strend) {
1315 if (!isDIGIT(*s)) {
1316 if (tmp && (norun || regtry(prog, s)))
1317 goto got_it;
1318 else
1319 tmp = doevery;
1320 }
a0ed51b3 1321 else
ffc61ed2
JH
1322 tmp = 1;
1323 s++;
a0ed51b3 1324 }
a0ed51b3
LW
1325 }
1326 break;
b8c5462f
JH
1327 case NDIGITL:
1328 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1329 if (do_utf8) {
1330 while (s < strend) {
1331 if (!isDIGIT_LC_utf8((U8*)s)) {
1332 if (tmp && (norun || regtry(prog, s)))
1333 goto got_it;
1334 else
1335 tmp = doevery;
1336 }
b8c5462f 1337 else
ffc61ed2
JH
1338 tmp = 1;
1339 s += UTF8SKIP(s);
b8c5462f 1340 }
a0ed51b3 1341 }
ffc61ed2
JH
1342 else {
1343 while (s < strend) {
1344 if (!isDIGIT_LC(*s)) {
1345 if (tmp && (norun || regtry(prog, s)))
1346 goto got_it;
1347 else
1348 tmp = doevery;
1349 }
cf93c79d 1350 else
ffc61ed2
JH
1351 tmp = 1;
1352 s++;
b8c5462f 1353 }
b8c5462f
JH
1354 }
1355 break;
b3c9acc1 1356 default:
3c3eec57
GS
1357 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1358 break;
d6a28714 1359 }
6eb5f6b9
JH
1360 return 0;
1361 got_it:
1362 return s;
1363}
1364
1365/*
1366 - regexec_flags - match a regexp against a string
1367 */
1368I32
1369Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1370 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1371/* strend: pointer to null at end of string */
1372/* strbeg: real beginning of string */
1373/* minend: end of match must be >=minend after stringarg. */
1374/* data: May be used for some additional optimizations. */
1375/* nosave: For optimizations. */
1376{
6eb5f6b9
JH
1377 register char *s;
1378 register regnode *c;
1379 register char *startpos = stringarg;
6eb5f6b9
JH
1380 I32 minlen; /* must match at least this many chars */
1381 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1382 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1383 constant substr. */ /* CC */
1384 I32 end_shift = 0; /* Same for the end. */ /* CC */
1385 I32 scream_pos = -1; /* Internal iterator of scream. */
1386 char *scream_olds;
1387 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1388 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1389
1390 PL_regcc = 0;
1391
1392 cache_re(prog);
1393#ifdef DEBUGGING
1394 PL_regnarrate = PL_debug & 512;
1395#endif
1396
1397 /* Be paranoid... */
1398 if (prog == NULL || startpos == NULL) {
1399 Perl_croak(aTHX_ "NULL regexp parameter");
1400 return 0;
1401 }
1402
1403 minlen = prog->minlen;
1aa99e6b
IH
1404 if (do_utf8) {
1405 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1406 }
1407 else {
1408 if (strend - startpos < minlen) goto phooey;
1409 }
6eb5f6b9
JH
1410
1411 if (startpos == strbeg) /* is ^ valid at stringarg? */
1412 PL_regprev = '\n';
1413 else {
1aa99e6b
IH
1414 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1415 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
1416 PL_regprev = utf8_to_uv(s, (U8*)stringarg - s, NULL, 0);
1417 }
1418 else
1419 PL_regprev = (U32)stringarg[-1];
6eb5f6b9
JH
1420 if (!PL_multiline && PL_regprev == '\n')
1421 PL_regprev = '\0'; /* force ^ to NOT match */
1422 }
1423
1424 /* Check validity of program. */
1425 if (UCHARAT(prog->program) != REG_MAGIC) {
1426 Perl_croak(aTHX_ "corrupted regexp program");
1427 }
1428
1429 PL_reg_flags = 0;
1430 PL_reg_eval_set = 0;
1431 PL_reg_maxiter = 0;
1432
1433 if (prog->reganch & ROPT_UTF8)
1434 PL_reg_flags |= RF_utf8;
1435
1436 /* Mark beginning of line for ^ and lookbehind. */
1437 PL_regbol = startpos;
1438 PL_bostr = strbeg;
1439 PL_reg_sv = sv;
1440
1441 /* Mark end of line for $ (and such) */
1442 PL_regeol = strend;
1443
1444 /* see how far we have to get to not match where we matched before */
1445 PL_regtill = startpos+minend;
1446
1447 /* We start without call_cc context. */
1448 PL_reg_call_cc = 0;
1449
1450 /* If there is a "must appear" string, look for it. */
1451 s = startpos;
1452
1453 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1454 MAGIC *mg;
1455
1456 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1457 PL_reg_ganch = startpos;
1458 else if (sv && SvTYPE(sv) >= SVt_PVMG
1459 && SvMAGIC(sv)
1460 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1461 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1462 if (prog->reganch & ROPT_ANCH_GPOS) {
1463 if (s > PL_reg_ganch)
1464 goto phooey;
1465 s = PL_reg_ganch;
1466 }
1467 }
1468 else /* pos() not defined */
1469 PL_reg_ganch = strbeg;
1470 }
1471
699c3c34
JH
1472 if (do_utf8 == (UTF!=0) &&
1473 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1474 re_scream_pos_data d;
1475
1476 d.scream_olds = &scream_olds;
1477 d.scream_pos = &scream_pos;
1478 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1479 if (!s)
1480 goto phooey; /* not present */
1481 }
1482
1483 DEBUG_r( if (!PL_colorset) reginitcolors() );
1484 DEBUG_r(PerlIO_printf(Perl_debug_log,
1485 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1486 PL_colors[4],PL_colors[5],PL_colors[0],
1487 prog->precomp,
1488 PL_colors[1],
1489 (strlen(prog->precomp) > 60 ? "..." : ""),
1490 PL_colors[0],
1491 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1492 startpos, PL_colors[1],
1493 (strend - startpos > 60 ? "..." : ""))
1494 );
1495
1496 /* Simplest case: anchored match need be tried only once. */
1497 /* [unless only anchor is BOL and multiline is set] */
1498 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1499 if (s == startpos && regtry(prog, startpos))
1500 goto got_it;
1501 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1502 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1503 {
1504 char *end;
1505
1506 if (minlen)
1507 dontbother = minlen - 1;
1aa99e6b 1508 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1509 /* for multiline we only have to try after newlines */
1510 if (prog->check_substr) {
1511 if (s == startpos)
1512 goto after_try;
1513 while (1) {
1514 if (regtry(prog, s))
1515 goto got_it;
1516 after_try:
1517 if (s >= end)
1518 goto phooey;
1519 if (prog->reganch & RE_USE_INTUIT) {
1520 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1521 if (!s)
1522 goto phooey;
1523 }
1524 else
1525 s++;
1526 }
1527 } else {
1528 if (s > startpos)
1529 s--;
1530 while (s < end) {
1531 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1532 if (regtry(prog, s))
1533 goto got_it;
1534 }
1535 }
1536 }
1537 }
1538 goto phooey;
1539 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1540 if (regtry(prog, PL_reg_ganch))
1541 goto got_it;
1542 goto phooey;
1543 }
1544
1545 /* Messy cases: unanchored match. */
1546 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1547 /* we have /x+whatever/ */
1548 /* it must be a one character string (XXXX Except UTF?) */
1549 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1550#ifdef DEBUGGING
1551 int did_match = 0;
1552#endif
1553
1aa99e6b 1554 if (do_utf8) {
6eb5f6b9
JH
1555 while (s < strend) {
1556 if (*s == ch) {
bf93d4cc 1557 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1558 if (regtry(prog, s)) goto got_it;
1559 s += UTF8SKIP(s);
1560 while (s < strend && *s == ch)
1561 s += UTF8SKIP(s);
1562 }
1563 s += UTF8SKIP(s);
1564 }
1565 }
1566 else {
1567 while (s < strend) {
1568 if (*s == ch) {
bf93d4cc 1569 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1570 if (regtry(prog, s)) goto got_it;
1571 s++;
1572 while (s < strend && *s == ch)
1573 s++;
1574 }
1575 s++;
1576 }
1577 }
bf93d4cc
GS
1578 DEBUG_r(did_match ||
1579 PerlIO_printf(Perl_debug_log,
1580 "Did not find anchored character...\n"));
6eb5f6b9
JH
1581 }
1582 /*SUPPRESS 560*/
1aa99e6b
IH
1583 else if (do_utf8 == (UTF!=0) &&
1584 (prog->anchored_substr != Nullsv
1585 || (prog->float_substr != Nullsv
1586 && prog->float_max_offset < strend - s))) {
6eb5f6b9
JH
1587 SV *must = prog->anchored_substr
1588 ? prog->anchored_substr : prog->float_substr;
1589 I32 back_max =
1590 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1591 I32 back_min =
1592 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1593 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1594 -(I32)(CHR_SVLEN(must)
1aa99e6b 1595 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1596 char *last1; /* Last position checked before */
bf93d4cc
GS
1597#ifdef DEBUGGING
1598 int did_match = 0;
1599#endif
6eb5f6b9
JH
1600
1601 if (s > PL_bostr)
1602 last1 = HOPc(s, -1);
1603 else
1604 last1 = s - 1; /* bogus */
1605
1606 /* XXXX check_substr already used to find `s', can optimize if
1607 check_substr==must. */
1608 scream_pos = -1;
1609 dontbother = end_shift;
1610 strend = HOPc(strend, -dontbother);
1611 while ( (s <= last) &&
1612 ((flags & REXEC_SCREAM)
1aa99e6b 1613 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1614 end_shift, &scream_pos, 0))
1aa99e6b 1615 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
6eb5f6b9
JH
1616 (unsigned char*)strend, must,
1617 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1618 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1619 if (HOPc(s, -back_max) > last1) {
1620 last1 = HOPc(s, -back_min);
1621 s = HOPc(s, -back_max);
1622 }
1623 else {
1624 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1625
1626 last1 = HOPc(s, -back_min);
1627 s = t;
1628 }
1aa99e6b 1629 if (do_utf8) {
6eb5f6b9
JH
1630 while (s <= last1) {
1631 if (regtry(prog, s))
1632 goto got_it;
1633 s += UTF8SKIP(s);
1634 }
1635 }
1636 else {
1637 while (s <= last1) {
1638 if (regtry(prog, s))
1639 goto got_it;
1640 s++;
1641 }
1642 }
1643 }
bf93d4cc
GS
1644 DEBUG_r(did_match ||
1645 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1646 ((must == prog->anchored_substr)
1647 ? "anchored" : "floating"),
1648 PL_colors[0],
1649 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1650 SvPVX(must),
1651 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1652 goto phooey;
1653 }
155aba94 1654 else if ((c = prog->regstclass)) {
66e933ab
GS
1655 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1656 /* don't bother with what can't match */
6eb5f6b9 1657 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1658 DEBUG_r({
1659 SV *prop = sv_newmortal();
1660 regprop(prop, c);
1661 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1662 });
6eb5f6b9
JH
1663 if (find_byclass(prog, c, s, strend, startpos, 0))
1664 goto got_it;
bf93d4cc 1665 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1666 }
1667 else {
1668 dontbother = 0;
1669 if (prog->float_substr != Nullsv) { /* Trim the end. */
1670 char *last;
d6a28714
JH
1671
1672 if (flags & REXEC_SCREAM) {
1673 last = screaminstr(sv, prog->float_substr, s - strbeg,
1674 end_shift, &scream_pos, 1); /* last one */
1675 if (!last)
ffc61ed2 1676 last = scream_olds; /* Only one occurrence. */
b8c5462f 1677 }
d6a28714
JH
1678 else {
1679 STRLEN len;
1680 char *little = SvPV(prog->float_substr, len);
1681
1682 if (SvTAIL(prog->float_substr)) {
1683 if (memEQ(strend - len + 1, little, len - 1))
1684 last = strend - len + 1;
1685 else if (!PL_multiline)
1686 last = memEQ(strend - len, little, len)
1687 ? strend - len : Nullch;
b8c5462f 1688 else
d6a28714
JH
1689 goto find_last;
1690 } else {
1691 find_last:
1692 if (len)
1693 last = rninstr(s, strend, little, little + len);
b8c5462f 1694 else
d6a28714 1695 last = strend; /* matching `$' */
b8c5462f 1696 }
b8c5462f 1697 }
bf93d4cc
GS
1698 if (last == NULL) {
1699 DEBUG_r(PerlIO_printf(Perl_debug_log,
1700 "%sCan't trim the tail, match fails (should not happen)%s\n",
1701 PL_colors[4],PL_colors[5]));
1702 goto phooey; /* Should not happen! */
1703 }
d6a28714
JH
1704 dontbother = strend - last + prog->float_min_offset;
1705 }
1706 if (minlen && (dontbother < minlen))
1707 dontbother = minlen - 1;
1708 strend -= dontbother; /* this one's always in bytes! */
1709 /* We don't know much -- general case. */
1aa99e6b 1710 if (do_utf8) {
d6a28714
JH
1711 for (;;) {
1712 if (regtry(prog, s))
1713 goto got_it;
1714 if (s >= strend)
1715 break;
b8c5462f 1716 s += UTF8SKIP(s);
d6a28714
JH
1717 };
1718 }
1719 else {
1720 do {
1721 if (regtry(prog, s))
1722 goto got_it;
1723 } while (s++ < strend);
1724 }
1725 }
1726
1727 /* Failure. */
1728 goto phooey;
1729
1730got_it:
1731 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1732
1733 if (PL_reg_eval_set) {
1734 /* Preserve the current value of $^R */
1735 if (oreplsv != GvSV(PL_replgv))
1736 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1737 restored, the value remains
1738 the same. */
1739 restore_pos(aTHXo_ 0);
1740 }
1741
1742 /* make sure $`, $&, $', and $digit will work later */
1743 if ( !(flags & REXEC_NOT_FIRST) ) {
1744 if (RX_MATCH_COPIED(prog)) {
1745 Safefree(prog->subbeg);
1746 RX_MATCH_COPIED_off(prog);
1747 }
1748 if (flags & REXEC_COPY_STR) {
1749 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1750
1751 s = savepvn(strbeg, i);
1752 prog->subbeg = s;
1753 prog->sublen = i;
1754 RX_MATCH_COPIED_on(prog);
1755 }
1756 else {
1757 prog->subbeg = strbeg;
1758 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1759 }
1760 }
1761
1762 return 1;
1763
1764phooey:
bf93d4cc
GS
1765 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1766 PL_colors[4],PL_colors[5]));
d6a28714
JH
1767 if (PL_reg_eval_set)
1768 restore_pos(aTHXo_ 0);
1769 return 0;
1770}
1771
1772/*
1773 - regtry - try match at specific point
1774 */
1775STATIC I32 /* 0 failure, 1 success */
1776S_regtry(pTHX_ regexp *prog, char *startpos)
1777{
d6a28714
JH
1778 register I32 i;
1779 register I32 *sp;
1780 register I32 *ep;
1781 CHECKPOINT lastcp;
1782
02db2b7b
IZ
1783#ifdef DEBUGGING
1784 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1785#endif
d6a28714
JH
1786 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1787 MAGIC *mg;
1788
1789 PL_reg_eval_set = RS_init;
1790 DEBUG_r(DEBUG_s(
b900a521
JH
1791 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1792 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1793 ));
e8347627 1794 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1795 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1796 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1797 SAVETMPS;
1798 /* Apparently this is not needed, judging by wantarray. */
e8347627 1799 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1800 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1801
1802 if (PL_reg_sv) {
1803 /* Make $_ available to executed code. */
1804 if (PL_reg_sv != DEFSV) {
1805 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1806 SAVESPTR(DEFSV);
1807 DEFSV = PL_reg_sv;
b8c5462f 1808 }
d6a28714
JH
1809
1810 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1811 && (mg = mg_find(PL_reg_sv, 'g')))) {
1812 /* prepare for quick setting of pos */
1813 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1814 mg = mg_find(PL_reg_sv, 'g');
1815 mg->mg_len = -1;
b8c5462f 1816 }
d6a28714
JH
1817 PL_reg_magic = mg;
1818 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1819 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1820 }
1821 if (!PL_reg_curpm)
0f79a09d 1822 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1823 PL_reg_curpm->op_pmregexp = prog;
1824 PL_reg_oldcurpm = PL_curpm;
1825 PL_curpm = PL_reg_curpm;
1826 if (RX_MATCH_COPIED(prog)) {
1827 /* Here is a serious problem: we cannot rewrite subbeg,
1828 since it may be needed if this match fails. Thus
1829 $` inside (?{}) could fail... */
1830 PL_reg_oldsaved = prog->subbeg;
1831 PL_reg_oldsavedlen = prog->sublen;
1832 RX_MATCH_COPIED_off(prog);
1833 }
1834 else
1835 PL_reg_oldsaved = Nullch;
1836 prog->subbeg = PL_bostr;
1837 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1838 }
1839 prog->startp[0] = startpos - PL_bostr;
1840 PL_reginput = startpos;
1841 PL_regstartp = prog->startp;
1842 PL_regendp = prog->endp;
1843 PL_reglastparen = &prog->lastparen;
1844 prog->lastparen = 0;
1845 PL_regsize = 0;
1846 DEBUG_r(PL_reg_starttry = startpos);
1847 if (PL_reg_start_tmpl <= prog->nparens) {
1848 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1849 if(PL_reg_start_tmp)
1850 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1851 else
1852 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1853 }
1854
1855 /* XXXX What this code is doing here?!!! There should be no need
1856 to do this again and again, PL_reglastparen should take care of
3dd2943c 1857 this! --ilya*/
dafc8851
JH
1858
1859 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1860 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1861 * PL_reglastparen), is not needed at all by the test suite
1862 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1863 * enough, for building DynaLoader, or otherwise this
1864 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1865 * will happen. Meanwhile, this code *is* needed for the
1866 * above-mentioned test suite tests to succeed. The common theme
1867 * on those tests seems to be returning null fields from matches.
1868 * --jhi */
dafc8851 1869#if 1
d6a28714
JH
1870 sp = prog->startp;
1871 ep = prog->endp;
1872 if (prog->nparens) {
09e8ae3b 1873 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1874 *++sp = -1;
1875 *++ep = -1;
1876 }
1877 }
dafc8851 1878#endif
02db2b7b 1879 REGCP_SET(lastcp);
d6a28714
JH
1880 if (regmatch(prog->program + 1)) {
1881 prog->endp[0] = PL_reginput - PL_bostr;
1882 return 1;
1883 }
02db2b7b 1884 REGCP_UNWIND(lastcp);
d6a28714
JH
1885 return 0;
1886}
1887
02db2b7b
IZ
1888#define RE_UNWIND_BRANCH 1
1889#define RE_UNWIND_BRANCHJ 2
1890
1891union re_unwind_t;
1892
1893typedef struct { /* XX: makes sense to enlarge it... */
1894 I32 type;
1895 I32 prev;
1896 CHECKPOINT lastcp;
1897} re_unwind_generic_t;
1898
1899typedef struct {
1900 I32 type;
1901 I32 prev;
1902 CHECKPOINT lastcp;
1903 I32 lastparen;
1904 regnode *next;
1905 char *locinput;
1906 I32 nextchr;
1907#ifdef DEBUGGING
1908 int regindent;
1909#endif
1910} re_unwind_branch_t;
1911
1912typedef union re_unwind_t {
1913 I32 type;
1914 re_unwind_generic_t generic;
1915 re_unwind_branch_t branch;
1916} re_unwind_t;
1917
d6a28714
JH
1918/*
1919 - regmatch - main matching routine
1920 *
1921 * Conceptually the strategy is simple: check to see whether the current
1922 * node matches, call self recursively to see whether the rest matches,
1923 * and then act accordingly. In practice we make some effort to avoid
1924 * recursion, in particular by going through "ordinary" nodes (that don't
1925 * need to know whether the rest of the match failed) by a loop instead of
1926 * by recursion.
1927 */
1928/* [lwall] I've hoisted the register declarations to the outer block in order to
1929 * maybe save a little bit of pushing and popping on the stack. It also takes
1930 * advantage of machines that use a register save mask on subroutine entry.
1931 */
1932STATIC I32 /* 0 failure, 1 success */
1933S_regmatch(pTHX_ regnode *prog)
1934{
d6a28714
JH
1935 register regnode *scan; /* Current node. */
1936 regnode *next; /* Next node. */
1937 regnode *inner; /* Next node in internal branch. */
1938 register I32 nextchr; /* renamed nextchr - nextchar colides with
1939 function of same name */
1940 register I32 n; /* no or next */
1941 register I32 ln; /* len or last */
1942 register char *s; /* operand or save */
1943 register char *locinput = PL_reginput;
1944 register I32 c1, c2, paren; /* case fold search, parenth */
1945 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1946 I32 unwind = 0;
1947 I32 firstcp = PL_savestack_ix;
ffc61ed2 1948 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1949
d6a28714
JH
1950#ifdef DEBUGGING
1951 PL_regindent++;
1952#endif
1953
1954 /* Note that nextchr is a byte even in UTF */
1955 nextchr = UCHARAT(locinput);
1956 scan = prog;
1957 while (scan != NULL) {
1958#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1959#if 1
d6a28714
JH
1960# define sayYES goto yes
1961# define sayNO goto no
7821416a
IZ
1962# define sayYES_FINAL goto yes_final
1963# define sayYES_LOUD goto yes_loud
1964# define sayNO_FINAL goto no_final
1965# define sayNO_SILENT goto do_no
d6a28714
JH
1966# define saySAME(x) if (x) goto yes; else goto no
1967# define REPORT_CODE_OFF 24
1968#else
1969# define sayYES return 1
1970# define sayNO return 0
7821416a
IZ
1971# define sayYES_FINAL return 1
1972# define sayYES_LOUD return 1
1973# define sayNO_FINAL return 0
1974# define sayNO_SILENT return 0
d6a28714
JH
1975# define saySAME(x) return x
1976#endif
1977 DEBUG_r( {
1978 SV *prop = sv_newmortal();
1979 int docolor = *PL_colors[0];
1980 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 1981 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
1982 /* The part of the string before starttry has one color
1983 (pref0_len chars), between starttry and current
1984 position another one (pref_len - pref0_len chars),
1985 after the current position the third one.
1986 We assume that pref0_len <= pref_len, otherwise we
1987 decrease pref0_len. */
1aa99e6b
IH
1988 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1989 ? (5 + taill) - l : locinput - PL_bostr;
1990 int pref0_len;
d6a28714 1991
1aa99e6b
IH
1992 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1993 pref_len++;
1994 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714
JH
1995 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1996 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1997 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
1998 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1999 l--;
d6a28714
JH
2000 if (pref0_len < 0)
2001 pref0_len = 0;
2002 if (pref0_len > pref_len)
2003 pref0_len = pref_len;
2004 regprop(prop, scan);
2005 PerlIO_printf(Perl_debug_log,
b900a521
JH
2006 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2007 (IV)(locinput - PL_bostr),
d6a28714
JH
2008 PL_colors[4], pref0_len,
2009 locinput - pref_len, PL_colors[5],
2010 PL_colors[2], pref_len - pref0_len,
2011 locinput - pref_len + pref0_len, PL_colors[3],
2012 (docolor ? "" : "> <"),
2013 PL_colors[0], l, locinput, PL_colors[1],
2014 15 - l - pref_len + 1,
2015 "",
b900a521 2016 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2017 SvPVX(prop));
2018 } );
2019
2020 next = scan + NEXT_OFF(scan);
2021 if (next == scan)
2022 next = NULL;
2023
2024 switch (OP(scan)) {
2025 case BOL:
2026 if (locinput == PL_bostr
2027 ? PL_regprev == '\n'
2028 : (PL_multiline &&
2029 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2030 {
2031 /* regtill = regbol; */
b8c5462f
JH
2032 break;
2033 }
d6a28714
JH
2034 sayNO;
2035 case MBOL:
2036 if (locinput == PL_bostr
2037 ? PL_regprev == '\n'
2038 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2039 {
b8c5462f
JH
2040 break;
2041 }
d6a28714
JH
2042 sayNO;
2043 case SBOL:
c2a73568 2044 if (locinput == PL_bostr)
b8c5462f 2045 break;
d6a28714
JH
2046 sayNO;
2047 case GPOS:
2048 if (locinput == PL_reg_ganch)
2049 break;
2050 sayNO;
2051 case EOL:
2052 if (PL_multiline)
2053 goto meol;
2054 else
2055 goto seol;
2056 case MEOL:
2057 meol:
2058 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2059 sayNO;
b8c5462f 2060 break;
d6a28714
JH
2061 case SEOL:
2062 seol:
2063 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2064 sayNO;
d6a28714 2065 if (PL_regeol - locinput > 1)
b8c5462f 2066 sayNO;
b8c5462f 2067 break;
d6a28714
JH
2068 case EOS:
2069 if (PL_regeol != locinput)
b8c5462f 2070 sayNO;
d6a28714 2071 break;
ffc61ed2 2072 case SANY:
1aa99e6b 2073 if (do_utf8) {
b8c5462f 2074 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2075 if (locinput > PL_regeol)
2076 sayNO;
b8c5462f
JH
2077 nextchr = UCHARAT(locinput);
2078 break;
2079 }
d6a28714 2080 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2081 sayNO;
b8c5462f 2082 nextchr = UCHARAT(++locinput);
a0d0e21e 2083 break;
ffc61ed2 2084 case REG_ANY:
1aa99e6b
IH
2085 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2086 sayNO;
2087 if (do_utf8) {
b8c5462f 2088 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2089 if (locinput > PL_regeol)
2090 sayNO;
a0ed51b3 2091 nextchr = UCHARAT(locinput);
a0ed51b3 2092 }
1aa99e6b
IH
2093 else
2094 nextchr = UCHARAT(++locinput);
a0ed51b3 2095 break;
d6a28714 2096 case EXACT:
cd439c50
IZ
2097 s = STRING(scan);
2098 ln = STR_LEN(scan);
1aa99e6b
IH
2099 if (do_utf8 != (UTF!=0)) {
2100 char *l = locinput;
2101 char *e = s + ln;
2102 STRLEN len;
2103 if (do_utf8)
2104 while (s < e) {
2105 if (l >= PL_regeol)
2106 sayNO;
2107 if (*((U8*)s) != utf8_to_uv_simple((U8*)l, &len))
2108 sayNO;
2109 s++;
2110 l += len;
2111 }
2112 else
2113 while (s < e) {
2114 if (l >= PL_regeol)
2115 sayNO;
2116 if (*((U8*)l) != utf8_to_uv_simple((U8*)s, &len))
2117 sayNO;
2118 s += len;
2119 l++;
2120 }
2121 locinput = l;
2122 nextchr = UCHARAT(locinput);
2123 break;
2124 }
d6a28714
JH
2125 /* Inline the first character, for speed. */
2126 if (UCHARAT(s) != nextchr)
2127 sayNO;
2128 if (PL_regeol - locinput < ln)
2129 sayNO;
2130 if (ln > 1 && memNE(s, locinput, ln))
2131 sayNO;
2132 locinput += ln;
2133 nextchr = UCHARAT(locinput);
2134 break;
2135 case EXACTFL:
b8c5462f
JH
2136 PL_reg_flags |= RF_tainted;
2137 /* FALL THROUGH */
d6a28714 2138 case EXACTF:
cd439c50
IZ
2139 s = STRING(scan);
2140 ln = STR_LEN(scan);
d6a28714 2141
1aa99e6b 2142 if (do_utf8) {
d6a28714 2143 char *l = locinput;
1aa99e6b
IH
2144 char *e;
2145 e = s + ln;
d6a28714
JH
2146 c1 = OP(scan) == EXACTF;
2147 while (s < e) {
1aa99e6b 2148 if (l >= PL_regeol) {
d6a28714
JH
2149 sayNO;
2150 }
1aa99e6b
IH
2151 if ((UTF ? utf8_to_uv((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
2152 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2153 sayNO;
2154 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2155 l += UTF8SKIP(l);
b8c5462f 2156 }
d6a28714 2157 locinput = l;
a0ed51b3
LW
2158 nextchr = UCHARAT(locinput);
2159 break;
2160 }
d6a28714
JH
2161
2162 /* Inline the first character, for speed. */
2163 if (UCHARAT(s) != nextchr &&
2164 UCHARAT(s) != ((OP(scan) == EXACTF)
2165 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2166 sayNO;
d6a28714 2167 if (PL_regeol - locinput < ln)
b8c5462f 2168 sayNO;
d6a28714
JH
2169 if (ln > 1 && (OP(scan) == EXACTF
2170 ? ibcmp(s, locinput, ln)
2171 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2172 sayNO;
d6a28714
JH
2173 locinput += ln;
2174 nextchr = UCHARAT(locinput);
a0d0e21e 2175 break;
d6a28714 2176 case ANYOF:
ffc61ed2
JH
2177 if (do_utf8) {
2178 if (!reginclass(scan, (U8*)locinput, do_utf8))
2179 sayNO;
2180 if (locinput >= PL_regeol)
2181 sayNO;
2182 locinput += PL_utf8skip[nextchr];
b8c5462f 2183 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2184 }
2185 else {
2186 if (nextchr < 0)
2187 nextchr = UCHARAT(locinput);
2188 if (!reginclass(scan, (U8*)locinput, do_utf8))
2189 sayNO;
2190 if (!nextchr && locinput >= PL_regeol)
2191 sayNO;
2192 nextchr = UCHARAT(++locinput);
2193 }
b8c5462f 2194 break;
d6a28714 2195 case ALNUML:
b8c5462f
JH
2196 PL_reg_flags |= RF_tainted;
2197 /* FALL THROUGH */
d6a28714 2198 case ALNUM:
b8c5462f 2199 if (!nextchr)
4633a7c4 2200 sayNO;
ffc61ed2
JH
2201 if (do_utf8) {
2202 if (!(OP(scan) == ALNUM
d6a28714
JH
2203 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2204 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2205 {
2206 sayNO;
a0ed51b3 2207 }
b8c5462f 2208 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2209 nextchr = UCHARAT(locinput);
2210 break;
2211 }
ffc61ed2 2212 if (!(OP(scan) == ALNUM
d6a28714 2213 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2214 sayNO;
b8c5462f 2215 nextchr = UCHARAT(++locinput);
a0d0e21e 2216 break;
d6a28714 2217 case NALNUML:
b8c5462f
JH
2218 PL_reg_flags |= RF_tainted;
2219 /* FALL THROUGH */
d6a28714
JH
2220 case NALNUM:
2221 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2222 sayNO;
ffc61ed2
JH
2223 if (do_utf8) {
2224 if (OP(scan) == NALNUM
d6a28714
JH
2225 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2226 : isALNUM_LC_utf8((U8*)locinput))
2227 {
b8c5462f 2228 sayNO;
d6a28714 2229 }
b8c5462f
JH
2230 locinput += PL_utf8skip[nextchr];
2231 nextchr = UCHARAT(locinput);
2232 break;
2233 }
ffc61ed2 2234 if (OP(scan) == NALNUM
d6a28714 2235 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2236 sayNO;
76e3520e 2237 nextchr = UCHARAT(++locinput);
a0d0e21e 2238 break;
d6a28714
JH
2239 case BOUNDL:
2240 case NBOUNDL:
3280af22 2241 PL_reg_flags |= RF_tainted;
bbce6d69 2242 /* FALL THROUGH */
d6a28714
JH
2243 case BOUND:
2244 case NBOUND:
2245 /* was last char in word? */
ffc61ed2
JH
2246 if (do_utf8) {
2247 if (locinput == PL_regbol)
2248 ln = PL_regprev;
2249 else {
2250 U8 *r = reghop((U8*)locinput, -1);
2251
2252 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2253 }
2254 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2255 ln = isALNUM_uni(ln);
2256 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2257 }
2258 else {
2259 ln = isALNUM_LC_uni(ln);
2260 n = isALNUM_LC_utf8((U8*)locinput);
2261 }
a0ed51b3 2262 }
d6a28714 2263 else {
ffc61ed2
JH
2264 ln = (locinput != PL_regbol) ?
2265 UCHARAT(locinput - 1) : PL_regprev;
2266 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2267 ln = isALNUM(ln);
2268 n = isALNUM(nextchr);
2269 }
2270 else {
2271 ln = isALNUM_LC(ln);
2272 n = isALNUM_LC(nextchr);
2273 }
d6a28714 2274 }
ffc61ed2
JH
2275 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2276 OP(scan) == BOUNDL))
2277 sayNO;
a0ed51b3 2278 break;
d6a28714 2279 case SPACEL:
3280af22 2280 PL_reg_flags |= RF_tainted;
bbce6d69 2281 /* FALL THROUGH */
d6a28714 2282 case SPACE:
9442cb0e 2283 if (!nextchr)
4633a7c4 2284 sayNO;
1aa99e6b 2285 if (do_utf8) {
fd400ab9 2286 if (UTF8_IS_CONTINUED(nextchr)) {
ffc61ed2
JH
2287 if (!(OP(scan) == SPACE
2288 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2289 : isSPACE_LC_utf8((U8*)locinput)))
2290 {
2291 sayNO;
2292 }
2293 locinput += PL_utf8skip[nextchr];
2294 nextchr = UCHARAT(locinput);
2295 break;
d6a28714 2296 }
ffc61ed2
JH
2297 if (!(OP(scan) == SPACE
2298 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2299 sayNO;
2300 nextchr = UCHARAT(++locinput);
2301 }
2302 else {
2303 if (!(OP(scan) == SPACE
2304 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2305 sayNO;
2306 nextchr = UCHARAT(++locinput);
a0ed51b3 2307 }
a0ed51b3 2308 break;
d6a28714 2309 case NSPACEL:
3280af22 2310 PL_reg_flags |= RF_tainted;
bbce6d69 2311 /* FALL THROUGH */
d6a28714 2312 case NSPACE:
9442cb0e 2313 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2314 sayNO;
1aa99e6b 2315 if (do_utf8) {
ffc61ed2 2316 if (OP(scan) == NSPACE
9442cb0e 2317 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2318 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2319 {
2320 sayNO;
2321 }
2322 locinput += PL_utf8skip[nextchr];
2323 nextchr = UCHARAT(locinput);
2324 break;
a0ed51b3 2325 }
ffc61ed2 2326 if (OP(scan) == NSPACE
d6a28714 2327 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2328 sayNO;
76e3520e 2329 nextchr = UCHARAT(++locinput);
a0d0e21e 2330 break;
d6a28714 2331 case DIGITL:
a0ed51b3
LW
2332 PL_reg_flags |= RF_tainted;
2333 /* FALL THROUGH */
d6a28714 2334 case DIGIT:
9442cb0e 2335 if (!nextchr)
a0ed51b3 2336 sayNO;
1aa99e6b 2337 if (do_utf8) {
ffc61ed2 2338 if (!(OP(scan) == DIGIT
9442cb0e
GS
2339 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2340 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2341 {
a0ed51b3 2342 sayNO;
dfe13c55 2343 }
6f06b55f 2344 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2345 nextchr = UCHARAT(locinput);
2346 break;
2347 }
ffc61ed2 2348 if (!(OP(scan) == DIGIT
9442cb0e 2349 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2350 sayNO;
2351 nextchr = UCHARAT(++locinput);
2352 break;
d6a28714 2353 case NDIGITL:
b8c5462f
JH
2354 PL_reg_flags |= RF_tainted;
2355 /* FALL THROUGH */
d6a28714 2356 case NDIGIT:
9442cb0e 2357 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2358 sayNO;
1aa99e6b 2359 if (do_utf8) {
ffc61ed2 2360 if (OP(scan) == NDIGIT
9442cb0e
GS
2361 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2362 : isDIGIT_LC_utf8((U8*)locinput))
2363 {
a0ed51b3 2364 sayNO;
9442cb0e 2365 }
6f06b55f 2366 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2367 nextchr = UCHARAT(locinput);
2368 break;
2369 }
ffc61ed2 2370 if (OP(scan) == NDIGIT
9442cb0e 2371 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2372 sayNO;
2373 nextchr = UCHARAT(++locinput);
2374 break;
2375 case CLUMP:
dfe13c55 2376 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2377 sayNO;
6f06b55f 2378 locinput += PL_utf8skip[nextchr];
dfe13c55 2379 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2380 locinput += UTF8SKIP(locinput);
2381 if (locinput > PL_regeol)
2382 sayNO;
2383 nextchr = UCHARAT(locinput);
2384 break;
c8756f30 2385 case REFFL:
3280af22 2386 PL_reg_flags |= RF_tainted;
c8756f30 2387 /* FALL THROUGH */
c277df42 2388 case REF:
c8756f30 2389 case REFF:
c277df42 2390 n = ARG(scan); /* which paren pair */
cf93c79d 2391 ln = PL_regstartp[n];
2c2d71f5 2392 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2393 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2394 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2395 if (ln == PL_regendp[n])
a0d0e21e 2396 break;
a0ed51b3 2397
cf93c79d 2398 s = PL_bostr + ln;
1aa99e6b 2399 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2400 char *l = locinput;
cf93c79d 2401 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2402 /*
2403 * Note that we can't do the "other character" lookup trick as
2404 * in the 8-bit case (no pun intended) because in Unicode we
2405 * have to map both upper and title case to lower case.
2406 */
2407 if (OP(scan) == REFF) {
2408 while (s < e) {
2409 if (l >= PL_regeol)
2410 sayNO;
dfe13c55 2411 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2412 sayNO;
2413 s += UTF8SKIP(s);
2414 l += UTF8SKIP(l);
2415 }
2416 }
2417 else {
2418 while (s < e) {
2419 if (l >= PL_regeol)
2420 sayNO;
dfe13c55 2421 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2422 sayNO;
2423 s += UTF8SKIP(s);
2424 l += UTF8SKIP(l);
2425 }
2426 }
2427 locinput = l;
2428 nextchr = UCHARAT(locinput);
2429 break;
2430 }
2431
a0d0e21e 2432 /* Inline the first character, for speed. */
76e3520e 2433 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2434 (OP(scan) == REF ||
2435 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2436 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2437 sayNO;
cf93c79d 2438 ln = PL_regendp[n] - ln;
3280af22 2439 if (locinput + ln > PL_regeol)
4633a7c4 2440 sayNO;
c8756f30
AK
2441 if (ln > 1 && (OP(scan) == REF
2442 ? memNE(s, locinput, ln)
2443 : (OP(scan) == REFF
2444 ? ibcmp(s, locinput, ln)
2445 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2446 sayNO;
a0d0e21e 2447 locinput += ln;
76e3520e 2448 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2449 break;
2450
2451 case NOTHING:
c277df42 2452 case TAIL:
a0d0e21e
LW
2453 break;
2454 case BACK:
2455 break;
c277df42
IZ
2456 case EVAL:
2457 {
2458 dSP;
533c011a 2459 OP_4tree *oop = PL_op;
3280af22
NIS
2460 COP *ocurcop = PL_curcop;
2461 SV **ocurpad = PL_curpad;
c277df42
IZ
2462 SV *ret;
2463
2464 n = ARG(scan);
533c011a 2465 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2466 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2467 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2468 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2469
cea2e8a9 2470 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2471 SPAGAIN;
2472 ret = POPs;
2473 PUTBACK;
2474
0f5d15d6
IZ
2475 PL_op = oop;
2476 PL_curpad = ocurpad;
2477 PL_curcop = ocurcop;
c277df42 2478 if (logical) {
0f5d15d6
IZ
2479 if (logical == 2) { /* Postponed subexpression. */
2480 regexp *re;
22c35a8c 2481 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2482 re_cc_state state;
0f5d15d6
IZ
2483 CHECKPOINT cp, lastcp;
2484
2485 if(SvROK(ret) || SvRMAGICAL(ret)) {
2486 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2487
2488 if(SvMAGICAL(sv))
2489 mg = mg_find(sv, 'r');
2490 }
2491 if (mg) {
2492 re = (regexp *)mg->mg_obj;
df0003d4 2493 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2494 }
2495 else {
2496 STRLEN len;
2497 char *t = SvPV(ret, len);
2498 PMOP pm;
2499 char *oprecomp = PL_regprecomp;
2500 I32 osize = PL_regsize;
2501 I32 onpar = PL_regnpar;
2502
2503 pm.op_pmflags = 0;
cea2e8a9 2504 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2505 if (!(SvFLAGS(ret)
2506 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2507 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2508 PL_regprecomp = oprecomp;
2509 PL_regsize = osize;
2510 PL_regnpar = onpar;
2511 }
2512 DEBUG_r(
2513 PerlIO_printf(Perl_debug_log,
2514 "Entering embedded `%s%.60s%s%s'\n",
2515 PL_colors[0],
2516 re->precomp,
2517 PL_colors[1],
2518 (strlen(re->precomp) > 60 ? "..." : ""))
2519 );
2520 state.node = next;
2521 state.prev = PL_reg_call_cc;
2522 state.cc = PL_regcc;
2523 state.re = PL_reg_re;
2524
2ab05381 2525 PL_regcc = 0;
0f5d15d6
IZ
2526
2527 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2528 REGCP_SET(lastcp);
0f5d15d6
IZ
2529 cache_re(re);
2530 state.ss = PL_savestack_ix;
2531 *PL_reglastparen = 0;
2532 PL_reg_call_cc = &state;
2533 PL_reginput = locinput;
2c2d71f5
JH
2534
2535 /* XXXX This is too dramatic a measure... */
2536 PL_reg_maxiter = 0;
2537
0f5d15d6 2538 if (regmatch(re->program + 1)) {
2c914db6
IZ
2539 /* Even though we succeeded, we need to restore
2540 global variables, since we may be wrapped inside
2541 SUSPEND, thus the match may be not finished yet. */
2542
2543 /* XXXX Do this only if SUSPENDed? */
2544 PL_reg_call_cc = state.prev;
2545 PL_regcc = state.cc;
2546 PL_reg_re = state.re;
2547 cache_re(PL_reg_re);
2548
2549 /* XXXX This is too dramatic a measure... */
2550 PL_reg_maxiter = 0;
2551
2552 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2553 ReREFCNT_dec(re);
2554 regcpblow(cp);
2555 sayYES;
2556 }
0f5d15d6 2557 ReREFCNT_dec(re);
02db2b7b 2558 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2559 regcppop();
2560 PL_reg_call_cc = state.prev;
2561 PL_regcc = state.cc;
2562 PL_reg_re = state.re;
d3790889 2563 cache_re(PL_reg_re);
2c2d71f5
JH
2564
2565 /* XXXX This is too dramatic a measure... */
2566 PL_reg_maxiter = 0;
2567
0f5d15d6
IZ
2568 sayNO;
2569 }
c277df42 2570 sw = SvTRUE(ret);
0f5d15d6 2571 logical = 0;
a0ed51b3
LW
2572 }
2573 else
3280af22 2574 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2575 break;
2576 }
a0d0e21e 2577 case OPEN:
c277df42 2578 n = ARG(scan); /* which paren pair */
3280af22
NIS
2579 PL_reg_start_tmp[n] = locinput;
2580 if (n > PL_regsize)
2581 PL_regsize = n;
a0d0e21e
LW
2582 break;
2583 case CLOSE:
c277df42 2584 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2585 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2586 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2587 if (n > *PL_reglastparen)
2588 *PL_reglastparen = n;
a0d0e21e 2589 break;
c277df42
IZ
2590 case GROUPP:
2591 n = ARG(scan); /* which paren pair */
cf93c79d 2592 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2593 break;
2594 case IFTHEN:
2c2d71f5 2595 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2596 if (sw)
2597 next = NEXTOPER(NEXTOPER(scan));
2598 else {
2599 next = scan + ARG(scan);
2600 if (OP(next) == IFTHEN) /* Fake one. */
2601 next = NEXTOPER(NEXTOPER(next));
2602 }
2603 break;
2604 case LOGICAL:
0f5d15d6 2605 logical = scan->flags;
c277df42 2606 break;
2ab05381
IZ
2607/*******************************************************************
2608 PL_regcc contains infoblock about the innermost (...)* loop, and
2609 a pointer to the next outer infoblock.
2610
2611 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2612
2613 1) After matching X, regnode for CURLYX is processed;
2614
2615 2) This regnode creates infoblock on the stack, and calls
2616 regmatch() recursively with the starting point at WHILEM node;
2617
2618 3) Each hit of WHILEM node tries to match A and Z (in the order
2619 depending on the current iteration, min/max of {min,max} and
2620 greediness). The information about where are nodes for "A"
2621 and "Z" is read from the infoblock, as is info on how many times "A"
2622 was already matched, and greediness.
2623
2624 4) After A matches, the same WHILEM node is hit again.
2625
2626 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2627 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2628 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2629 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2630 of the external loop.
2631
2632 Currently present infoblocks form a tree with a stem formed by PL_curcc
2633 and whatever it mentions via ->next, and additional attached trees
2634 corresponding to temporarily unset infoblocks as in "5" above.
2635
2636 In the following picture infoblocks for outer loop of
2637 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2638 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2639 infoblocks are drawn below the "reset" infoblock.
2640
2641 In fact in the picture below we do not show failed matches for Z and T
2642 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2643 more obvious *why* one needs to *temporary* unset infoblocks.]
2644
2645 Matched REx position InfoBlocks Comment
2646 (Y(A)*?Z)*?T x
2647 Y(A)*?Z)*?T x <- O
2648 Y (A)*?Z)*?T x <- O
2649 Y A)*?Z)*?T x <- O <- I
2650 YA )*?Z)*?T x <- O <- I
2651 YA A)*?Z)*?T x <- O <- I
2652 YAA )*?Z)*?T x <- O <- I
2653 YAA Z)*?T x <- O # Temporary unset I
2654 I
2655
2656 YAAZ Y(A)*?Z)*?T x <- O
2657 I
2658
2659 YAAZY (A)*?Z)*?T x <- O
2660 I
2661
2662 YAAZY A)*?Z)*?T x <- O <- I
2663 I
2664
2665 YAAZYA )*?Z)*?T x <- O <- I
2666 I
2667
2668 YAAZYA Z)*?T x <- O # Temporary unset I
2669 I,I
2670
2671 YAAZYAZ )*?T x <- O
2672 I,I
2673
2674 YAAZYAZ T x # Temporary unset O
2675 O
2676 I,I
2677
2678 YAAZYAZT x
2679 O
2680 I,I
2681 *******************************************************************/
a0d0e21e
LW
2682 case CURLYX: {
2683 CURCUR cc;
3280af22 2684 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2685 /* No need to save/restore up to this paren */
2686 I32 parenfloor = scan->flags;
c277df42
IZ
2687
2688 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2689 next += ARG(next);
3280af22
NIS
2690 cc.oldcc = PL_regcc;
2691 PL_regcc = &cc;
cb434fcc
IZ
2692 /* XXXX Probably it is better to teach regpush to support
2693 parenfloor > PL_regsize... */
2694 if (parenfloor > *PL_reglastparen)
2695 parenfloor = *PL_reglastparen; /* Pessimization... */
2696 cc.parenfloor = parenfloor;
a0d0e21e
LW
2697 cc.cur = -1;
2698 cc.min = ARG1(scan);
2699 cc.max = ARG2(scan);
c277df42 2700 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2701 cc.next = next;
2702 cc.minmod = minmod;
2703 cc.lastloc = 0;
3280af22 2704 PL_reginput = locinput;
a0d0e21e
LW
2705 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2706 regcpblow(cp);
3280af22 2707 PL_regcc = cc.oldcc;
4633a7c4 2708 saySAME(n);
a0d0e21e
LW
2709 }
2710 /* NOT REACHED */
2711 case WHILEM: {
2712 /*
2713 * This is really hard to understand, because after we match
2714 * what we're trying to match, we must make sure the rest of
2c2d71f5 2715 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2716 * to go back UP the parse tree by recursing ever deeper. And
2717 * if it fails, we have to reset our parent's current state
2718 * that we can try again after backing off.
2719 */
2720
c277df42 2721 CHECKPOINT cp, lastcp;
3280af22 2722 CURCUR* cc = PL_regcc;
c277df42
IZ
2723 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2724
4633a7c4 2725 n = cc->cur + 1; /* how many we know we matched */
3280af22 2726 PL_reginput = locinput;
a0d0e21e 2727
c277df42
IZ
2728 DEBUG_r(
2729 PerlIO_printf(Perl_debug_log,
2730 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2731 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2732 (long)n, (long)cc->min,
2733 (long)cc->max, (long)cc)
2734 );
4633a7c4 2735
a0d0e21e
LW
2736 /* If degenerate scan matches "", assume scan done. */
2737
579cf2c3 2738 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2739 PL_regcc = cc->oldcc;
2ab05381
IZ
2740 if (PL_regcc)
2741 ln = PL_regcc->cur;
c277df42 2742 DEBUG_r(
c3464db5
DD
2743 PerlIO_printf(Perl_debug_log,
2744 "%*s empty match detected, try continuation...\n",
3280af22 2745 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2746 );
a0d0e21e 2747 if (regmatch(cc->next))
4633a7c4 2748 sayYES;
2ab05381
IZ
2749 if (PL_regcc)
2750 PL_regcc->cur = ln;
3280af22 2751 PL_regcc = cc;
4633a7c4 2752 sayNO;
a0d0e21e
LW
2753 }
2754
2755 /* First just match a string of min scans. */
2756
2757 if (n < cc->min) {
2758 cc->cur = n;
2759 cc->lastloc = locinput;
4633a7c4
LW
2760 if (regmatch(cc->scan))
2761 sayYES;
2762 cc->cur = n - 1;
c277df42 2763 cc->lastloc = lastloc;
4633a7c4 2764 sayNO;
a0d0e21e
LW
2765 }
2766
2c2d71f5
JH
2767 if (scan->flags) {
2768 /* Check whether we already were at this position.
2769 Postpone detection until we know the match is not
2770 *that* much linear. */
2771 if (!PL_reg_maxiter) {
2772 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2773 PL_reg_leftiter = PL_reg_maxiter;
2774 }
2775 if (PL_reg_leftiter-- == 0) {
2776 I32 size = (PL_reg_maxiter + 7)/8;
2777 if (PL_reg_poscache) {
2778 if (PL_reg_poscache_size < size) {
2779 Renew(PL_reg_poscache, size, char);
2780 PL_reg_poscache_size = size;
2781 }
2782 Zero(PL_reg_poscache, size, char);
2783 }
2784 else {
2785 PL_reg_poscache_size = size;
2786 Newz(29, PL_reg_poscache, size, char);
2787 }
2788 DEBUG_r(
2789 PerlIO_printf(Perl_debug_log,
2790 "%sDetected a super-linear match, switching on caching%s...\n",
2791 PL_colors[4], PL_colors[5])
2792 );
2793 }
2794 if (PL_reg_leftiter < 0) {
2795 I32 o = locinput - PL_bostr, b;
2796
2797 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2798 b = o % 8;
2799 o /= 8;
2800 if (PL_reg_poscache[o] & (1<<b)) {
2801 DEBUG_r(
2802 PerlIO_printf(Perl_debug_log,
2803 "%*s already tried at this position...\n",
2804 REPORT_CODE_OFF+PL_regindent*2, "")
2805 );
7821416a 2806 sayNO_SILENT;
2c2d71f5
JH
2807 }
2808 PL_reg_poscache[o] |= (1<<b);
2809 }
2810 }
2811
a0d0e21e
LW
2812 /* Prefer next over scan for minimal matching. */
2813
2814 if (cc->minmod) {
3280af22 2815 PL_regcc = cc->oldcc;
2ab05381
IZ
2816 if (PL_regcc)
2817 ln = PL_regcc->cur;
5f05dabc 2818 cp = regcppush(cc->parenfloor);
02db2b7b 2819 REGCP_SET(lastcp);
5f05dabc 2820 if (regmatch(cc->next)) {
c277df42 2821 regcpblow(cp);
4633a7c4 2822 sayYES; /* All done. */
5f05dabc 2823 }
02db2b7b 2824 REGCP_UNWIND(lastcp);
5f05dabc 2825 regcppop();
2ab05381
IZ
2826 if (PL_regcc)
2827 PL_regcc->cur = ln;
3280af22 2828 PL_regcc = cc;
a0d0e21e 2829
c277df42 2830 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2831 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2832 && !(PL_reg_flags & RF_warned)) {
2833 PL_reg_flags |= RF_warned;
e476b1b5 2834 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2835 "Complex regular subexpression recursion",
2836 REG_INFTY - 1);
c277df42 2837 }
4633a7c4 2838 sayNO;
c277df42 2839 }
a687059c 2840
c277df42 2841 DEBUG_r(
c3464db5
DD
2842 PerlIO_printf(Perl_debug_log,
2843 "%*s trying longer...\n",
3280af22 2844 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2845 );
a0d0e21e 2846 /* Try scanning more and see if it helps. */
3280af22 2847 PL_reginput = locinput;
a0d0e21e
LW
2848 cc->cur = n;
2849 cc->lastloc = locinput;
5f05dabc 2850 cp = regcppush(cc->parenfloor);
02db2b7b 2851 REGCP_SET(lastcp);
5f05dabc 2852 if (regmatch(cc->scan)) {
c277df42 2853 regcpblow(cp);
4633a7c4 2854 sayYES;
5f05dabc 2855 }
02db2b7b 2856 REGCP_UNWIND(lastcp);
5f05dabc 2857 regcppop();
4633a7c4 2858 cc->cur = n - 1;
c277df42 2859 cc->lastloc = lastloc;
4633a7c4 2860 sayNO;
a0d0e21e
LW
2861 }
2862
2863 /* Prefer scan over next for maximal matching. */
2864
2865 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2866 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2867 cc->cur = n;
2868 cc->lastloc = locinput;
02db2b7b 2869 REGCP_SET(lastcp);
5f05dabc 2870 if (regmatch(cc->scan)) {
c277df42 2871 regcpblow(cp);
4633a7c4 2872 sayYES;
5f05dabc 2873 }
02db2b7b 2874 REGCP_UNWIND(lastcp);
a0d0e21e 2875 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2876 PL_reginput = locinput;
c277df42 2877 DEBUG_r(
c3464db5
DD
2878 PerlIO_printf(Perl_debug_log,
2879 "%*s failed, try continuation...\n",
3280af22 2880 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2881 );
2882 }
e476b1b5 2883 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2884 && !(PL_reg_flags & RF_warned)) {
3280af22 2885 PL_reg_flags |= RF_warned;
e476b1b5 2886 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2887 "Complex regular subexpression recursion",
2888 REG_INFTY - 1);
a0d0e21e
LW
2889 }
2890
2891 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2892 PL_regcc = cc->oldcc;
2ab05381
IZ
2893 if (PL_regcc)
2894 ln = PL_regcc->cur;
a0d0e21e 2895 if (regmatch(cc->next))
4633a7c4 2896 sayYES;
2ab05381
IZ
2897 if (PL_regcc)
2898 PL_regcc->cur = ln;
3280af22 2899 PL_regcc = cc;
4633a7c4 2900 cc->cur = n - 1;
c277df42 2901 cc->lastloc = lastloc;
4633a7c4 2902 sayNO;
a0d0e21e
LW
2903 }
2904 /* NOT REACHED */
c277df42
IZ
2905 case BRANCHJ:
2906 next = scan + ARG(scan);
2907 if (next == scan)
2908 next = NULL;
2909 inner = NEXTOPER(NEXTOPER(scan));
2910 goto do_branch;
2911 case BRANCH:
2912 inner = NEXTOPER(scan);
2913 do_branch:
2914 {
2915 CHECKPOINT lastcp;
2916 c1 = OP(scan);
2917 if (OP(next) != c1) /* No choice. */
2918 next = inner; /* Avoid recursion. */
a0d0e21e 2919 else {
02db2b7b
IZ
2920 I32 lastparen = *PL_reglastparen;
2921 I32 unwind1;
2922 re_unwind_branch_t *uw;
2923
2924 /* Put unwinding data on stack */
2925 unwind1 = SSNEWt(1,re_unwind_branch_t);
2926 uw = SSPTRt(unwind1,re_unwind_branch_t);
2927 uw->prev = unwind;
2928 unwind = unwind1;
2929 uw->type = ((c1 == BRANCH)
2930 ? RE_UNWIND_BRANCH
2931 : RE_UNWIND_BRANCHJ);
2932 uw->lastparen = lastparen;
2933 uw->next = next;
2934 uw->locinput = locinput;
2935 uw->nextchr = nextchr;
2936#ifdef DEBUGGING
2937 uw->regindent = ++PL_regindent;
2938#endif
c277df42 2939
02db2b7b
IZ
2940 REGCP_SET(uw->lastcp);
2941
2942 /* Now go into the first branch */
2943 next = inner;
a687059c 2944 }
a0d0e21e
LW
2945 }
2946 break;
2947 case MINMOD:
2948 minmod = 1;
2949 break;
c277df42
IZ
2950 case CURLYM:
2951 {
00db4c45 2952 I32 l = 0;
c277df42
IZ
2953 CHECKPOINT lastcp;
2954
2955 /* We suppose that the next guy does not need
2956 backtracking: in particular, it is of constant length,
2957 and has no parenths to influence future backrefs. */
2958 ln = ARG1(scan); /* min to match */
2959 n = ARG2(scan); /* max to match */
c277df42
IZ
2960 paren = scan->flags;
2961 if (paren) {
3280af22
NIS
2962 if (paren > PL_regsize)
2963 PL_regsize = paren;
2964 if (paren > *PL_reglastparen)
2965 *PL_reglastparen = paren;
c277df42 2966 }
dc45a647 2967 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2968 if (paren)
2969 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2970 PL_reginput = locinput;
c277df42
IZ
2971 if (minmod) {
2972 minmod = 0;
2973 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2974 sayNO;
5f4b28b2 2975 if (ln && l == 0 && n >= ln
c277df42
IZ
2976 /* In fact, this is tricky. If paren, then the
2977 fact that we did/didnot match may influence
2978 future execution. */
2979 && !(paren && ln == 0))
2980 ln = n;
3280af22 2981 locinput = PL_reginput;
22c35a8c 2982 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2983 c1 = (U8)*STRING(next);
c277df42 2984 if (OP(next) == EXACTF)
22c35a8c 2985 c2 = PL_fold[c1];
c277df42 2986 else if (OP(next) == EXACTFL)
22c35a8c 2987 c2 = PL_fold_locale[c1];
c277df42
IZ
2988 else
2989 c2 = c1;
a0ed51b3
LW
2990 }
2991 else
c277df42 2992 c1 = c2 = -1000;
02db2b7b 2993 REGCP_SET(lastcp);
5f4b28b2 2994 /* This may be improved if l == 0. */
c277df42
IZ
2995 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2996 /* If it could work, try it. */
2997 if (c1 == -1000 ||
3280af22
NIS
2998 UCHARAT(PL_reginput) == c1 ||
2999 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3000 {
3001 if (paren) {
3002 if (n) {
cf93c79d
IZ
3003 PL_regstartp[paren] =
3004 HOPc(PL_reginput, -l) - PL_bostr;
3005 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3006 }
3007 else
cf93c79d 3008 PL_regendp[paren] = -1;
c277df42
IZ
3009 }
3010 if (regmatch(next))
3011 sayYES;
02db2b7b 3012 REGCP_UNWIND(lastcp);
c277df42
IZ
3013 }
3014 /* Couldn't or didn't -- move forward. */
3280af22 3015 PL_reginput = locinput;
c277df42
IZ
3016 if (regrepeat_hard(scan, 1, &l)) {
3017 ln++;
3280af22 3018 locinput = PL_reginput;
c277df42
IZ
3019 }
3020 else
3021 sayNO;
3022 }
a0ed51b3
LW
3023 }
3024 else {
c277df42
IZ
3025 n = regrepeat_hard(scan, n, &l);
3026 if (n != 0 && l == 0
3027 /* In fact, this is tricky. If paren, then the
3028 fact that we did/didnot match may influence
3029 future execution. */
3030 && !(paren && ln == 0))
3031 ln = n;
3280af22 3032 locinput = PL_reginput;
c277df42 3033 DEBUG_r(
5c0ca799 3034 PerlIO_printf(Perl_debug_log,
faccc32b 3035 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3036 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3037 (IV) n, (IV)l)
c277df42
IZ
3038 );
3039 if (n >= ln) {
22c35a8c 3040 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3041 c1 = (U8)*STRING(next);
c277df42 3042 if (OP(next) == EXACTF)
22c35a8c 3043 c2 = PL_fold[c1];
c277df42 3044 else if (OP(next) == EXACTFL)
22c35a8c 3045 c2 = PL_fold_locale[c1];
c277df42
IZ
3046 else
3047 c2 = c1;
a0ed51b3
LW
3048 }
3049 else
c277df42
IZ
3050 c1 = c2 = -1000;
3051 }
02db2b7b 3052 REGCP_SET(lastcp);
c277df42
IZ
3053 while (n >= ln) {
3054 /* If it could work, try it. */
3055 if (c1 == -1000 ||
3280af22
NIS
3056 UCHARAT(PL_reginput) == c1 ||
3057 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3058 {
3059 DEBUG_r(
c3464db5 3060 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3061 "%*s trying tail with n=%"IVdf"...\n",
3062 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3063 );
3064 if (paren) {
3065 if (n) {
cf93c79d
IZ
3066 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3067 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3068 }
a0ed51b3 3069 else
cf93c79d 3070 PL_regendp[paren] = -1;
c277df42 3071 }
a0ed51b3
LW
3072 if (regmatch(next))
3073 sayYES;
02db2b7b 3074 REGCP_UNWIND(lastcp);
a0ed51b3 3075 }
c277df42
IZ
3076 /* Couldn't or didn't -- back up. */
3077 n--;
dfe13c55 3078 locinput = HOPc(locinput, -l);
3280af22 3079 PL_reginput = locinput;
c277df42
IZ
3080 }
3081 }
3082 sayNO;
3083 break;
3084 }
3085 case CURLYN:
3086 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3087 if (paren > PL_regsize)
3088 PL_regsize = paren;
3089 if (paren > *PL_reglastparen)
3090 *PL_reglastparen = paren;
c277df42
IZ
3091 ln = ARG1(scan); /* min to match */
3092 n = ARG2(scan); /* max to match */
dc45a647 3093 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3094 goto repeat;
a0d0e21e 3095 case CURLY:
c277df42 3096 paren = 0;
a0d0e21e
LW
3097 ln = ARG1(scan); /* min to match */
3098 n = ARG2(scan); /* max to match */
dc45a647 3099 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3100 goto repeat;
3101 case STAR:
3102 ln = 0;
c277df42 3103 n = REG_INFTY;
a0d0e21e 3104 scan = NEXTOPER(scan);
c277df42 3105 paren = 0;
a0d0e21e
LW
3106 goto repeat;
3107 case PLUS:
c277df42
IZ
3108 ln = 1;
3109 n = REG_INFTY;
3110 scan = NEXTOPER(scan);
3111 paren = 0;
3112 repeat:
a0d0e21e
LW
3113 /*
3114 * Lookahead to avoid useless match attempts
3115 * when we know what character comes next.
3116 */
22c35a8c 3117 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3118 U8 *s = (U8*)STRING(next);
3119 if (!UTF) {
3120 c2 = c1 = *s;
3121 if (OP(next) == EXACTF)
3122 c2 = PL_fold[c1];
3123 else if (OP(next) == EXACTFL)
3124 c2 = PL_fold_locale[c1];
3125 }
3126 else { /* UTF */
3127 if (OP(next) == EXACTF) {
3128 c1 = to_utf8_lower(s);
3129 c2 = to_utf8_upper(s);
3130 }
3131 else {
3132 c2 = c1 = utf8_to_uv_simple(s, NULL);
3133 }
3134 }
bbce6d69 3135 }
a0d0e21e 3136 else
bbce6d69 3137 c1 = c2 = -1000;
3280af22 3138 PL_reginput = locinput;
a0d0e21e 3139 if (minmod) {
c277df42 3140 CHECKPOINT lastcp;
a0d0e21e
LW
3141 minmod = 0;
3142 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3143 sayNO;
a0ed51b3 3144 locinput = PL_reginput;
02db2b7b 3145 REGCP_SET(lastcp);
0fe9bf95 3146 if (c1 != -1000) {
1aa99e6b 3147 char *e; /* Should not check after this */
0fe9bf95
IZ
3148 char *old = locinput;
3149
1aa99e6b 3150 if (n == REG_INFTY) {
0fe9bf95 3151 e = PL_regeol - 1;
1aa99e6b
IH
3152 if (do_utf8)
3153 while (UTF8_IS_CONTINUATION(*(U8*)e))
3154 e--;
3155 }
3156 else if (do_utf8) {
3157 int m = n - ln;
3158 for (e = locinput;
3159 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3160 e += UTF8SKIP(e);
3161 }
3162 else {
3163 e = locinput + n - ln;
3164 if (e >= PL_regeol)
3165 e = PL_regeol - 1;
3166 }
0fe9bf95 3167 while (1) {
1aa99e6b 3168 int count;
0fe9bf95 3169 /* Find place 'next' could work */
1aa99e6b
IH
3170 if (!do_utf8) {
3171 if (c1 == c2) {
3172 while (locinput <= e && *locinput != c1)
3173 locinput++;
3174 } else {
3175 while (locinput <= e
3176 && *locinput != c1
3177 && *locinput != c2)
3178 locinput++;
3179 }
3180 count = locinput - old;
3181 }
3182 else {
3183 STRLEN len;
3184 if (c1 == c2) {
3185 for (count = 0;
3186 locinput <= e &&
3187 utf8_to_uv_simple((U8*)locinput, &len) != c1;
3188 count++)
3189 locinput += len;
3190
3191 } else {
3192 for (count = 0; locinput <= e; count++) {
3193 UV c = utf8_to_uv_simple((U8*)locinput, &len);
3194 if (c == c1 || c == c2)
3195 break;
3196 locinput += len;
3197 }
3198 }
0fe9bf95
IZ
3199 }
3200 if (locinput > e)
3201 sayNO;
3202 /* PL_reginput == old now */
3203 if (locinput != old) {
3204 ln = 1; /* Did some */
1aa99e6b 3205 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3206 sayNO;
3207 }
3208 /* PL_reginput == locinput now */
29d1e993 3209 TRYPAREN(paren, ln, locinput);
0fe9bf95 3210 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3211 REGCP_UNWIND(lastcp);
0fe9bf95 3212 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3213 old = locinput;
3214 if (do_utf8)
3215 locinput += UTF8SKIP(locinput);
3216 else
3217 locinput++;
0fe9bf95
IZ
3218 }
3219 }
3220 else
c277df42 3221 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3222 UV c;
3223 if (c1 != -1000) {
3224 if (do_utf8)
3225 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3226 else
3227 c = UCHARAT(PL_reginput);
3228 }
a0d0e21e 3229 /* If it could work, try it. */
1aa99e6b 3230 if (c1 == -1000 || c == c1 || c == c2)
bbce6d69 3231 {
29d1e993 3232 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3233 REGCP_UNWIND(lastcp);
bbce6d69 3234 }
c277df42 3235 /* Couldn't or didn't -- move forward. */
a0ed51b3 3236 PL_reginput = locinput;
a0d0e21e
LW
3237 if (regrepeat(scan, 1)) {
3238 ln++;
a0ed51b3
LW
3239 locinput = PL_reginput;
3240 }
3241 else
4633a7c4 3242 sayNO;
a0d0e21e
LW
3243 }
3244 }
3245 else {
c277df42 3246 CHECKPOINT lastcp;
a0d0e21e 3247 n = regrepeat(scan, n);
a0ed51b3 3248 locinput = PL_reginput;
22c35a8c 3249 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3250 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3251 ln = n; /* why back off? */
1aeab75a
GS
3252 /* ...because $ and \Z can match before *and* after
3253 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3254 We should back off by one in this case. */
3255 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3256 ln--;
3257 }
02db2b7b 3258 REGCP_SET(lastcp);
c277df42 3259 if (paren) {
1aa99e6b 3260 UV c;
c277df42 3261 while (n >= ln) {
1aa99e6b
IH
3262 if (c1 != -1000) {
3263 if (do_utf8)
3264 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3265 else
3266 c = UCHARAT(PL_reginput);
3267 }
c277df42 3268 /* If it could work, try it. */
1aa99e6b 3269 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3270 {
29d1e993 3271 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3272 REGCP_UNWIND(lastcp);
c277df42
IZ
3273 }
3274 /* Couldn't or didn't -- back up. */
3275 n--;
dfe13c55 3276 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3277 }
a0ed51b3
LW
3278 }
3279 else {
1aa99e6b 3280 UV c;
c277df42 3281 while (n >= ln) {
1aa99e6b
IH
3282 if (c1 != -1000) {
3283 if (do_utf8)
3284 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3285 else
3286 c = UCHARAT(PL_reginput);
3287 }
c277df42 3288 /* If it could work, try it. */
1aa99e6b 3289 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3290 {
29d1e993 3291 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3292 REGCP_UNWIND(lastcp);
c277df42
IZ
3293 }
3294 /* Couldn't or didn't -- back up. */
3295 n--;
dfe13c55 3296 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3297 }
a0d0e21e
LW
3298 }
3299 }
4633a7c4 3300 sayNO;
c277df42 3301 break;
a0d0e21e 3302 case END:
0f5d15d6
IZ
3303 if (PL_reg_call_cc) {
3304 re_cc_state *cur_call_cc = PL_reg_call_cc;
3305 CURCUR *cctmp = PL_regcc;
3306 regexp *re = PL_reg_re;
3307 CHECKPOINT cp, lastcp;
3308
3309 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3310 REGCP_SET(lastcp);
0f5d15d6
IZ
3311 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3312 the caller. */
3313 PL_reginput = locinput; /* Make position available to
3314 the callcc. */
3315 cache_re(PL_reg_call_cc->re);
3316 PL_regcc = PL_reg_call_cc->cc;
3317 PL_reg_call_cc = PL_reg_call_cc->prev;
3318 if (regmatch(cur_call_cc->node)) {
3319 PL_reg_call_cc = cur_call_cc;
3320 regcpblow(cp);
3321 sayYES;
3322 }
02db2b7b 3323 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3324 regcppop();
3325 PL_reg_call_cc = cur_call_cc;
3326 PL_regcc = cctmp;
3327 PL_reg_re = re;
3328 cache_re(re);
3329
3330 DEBUG_r(
3331 PerlIO_printf(Perl_debug_log,
3332 "%*s continuation failed...\n",
3333 REPORT_CODE_OFF+PL_regindent*2, "")
3334 );
7821416a 3335 sayNO_SILENT;
0f5d15d6 3336 }
7821416a
IZ
3337 if (locinput < PL_regtill) {
3338 DEBUG_r(PerlIO_printf(Perl_debug_log,
3339 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3340 PL_colors[4],
3341 (long)(locinput - PL_reg_starttry),
3342 (long)(PL_regtill - PL_reg_starttry),
3343 PL_colors[5]));
3344 sayNO_FINAL; /* Cannot match: too short. */
3345 }
3346 PL_reginput = locinput; /* put where regtry can find it */
3347 sayYES_FINAL; /* Success! */
7e5428c5 3348 case SUCCEED:
3280af22 3349 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3350 sayYES_LOUD; /* Success! */
c277df42
IZ
3351 case SUSPEND:
3352 n = 1;
9fe1d20c 3353 PL_reginput = locinput;
c277df42 3354 goto do_ifmatch;
a0d0e21e 3355 case UNLESSM:
c277df42 3356 n = 0;
a0ed51b3 3357 if (scan->flags) {
0fe9bf95
IZ
3358 if (UTF) { /* XXXX This is absolutely
3359 broken, we read before
3360 start of string. */
3361 s = HOPMAYBEc(locinput, -scan->flags);
3362 if (!s)
3363 goto say_yes;
3364 PL_reginput = s;
3365 }
3366 else {
3367 if (locinput < PL_bostr + scan->flags)
3368 goto say_yes;
3369 PL_reginput = locinput - scan->flags;
3370 goto do_ifmatch;
3371 }
a0ed51b3
LW
3372 }
3373 else
3374 PL_reginput = locinput;
c277df42
IZ
3375 goto do_ifmatch;
3376 case IFMATCH:
3377 n = 1;
a0ed51b3 3378 if (scan->flags) {
0fe9bf95
IZ
3379 if (UTF) { /* XXXX This is absolutely
3380 broken, we read before
3381 start of string. */
3382 s = HOPMAYBEc(locinput, -scan->flags);
3383 if (!s || s < PL_bostr)
3384 goto say_no;
3385 PL_reginput = s;
3386 }
3387 else {
3388 if (locinput < PL_bostr + scan->flags)
3389 goto say_no;
3390 PL_reginput = locinput - scan->flags;
3391 goto do_ifmatch;
3392 }
a0ed51b3
LW
3393 }
3394 else
3395 PL_reginput = locinput;
3396
c277df42 3397 do_ifmatch:
c277df42
IZ
3398 inner = NEXTOPER(NEXTOPER(scan));
3399 if (regmatch(inner) != n) {
3400 say_no:
3401 if (logical) {
3402 logical = 0;
3403 sw = 0;
3404 goto do_longjump;
a0ed51b3
LW
3405 }
3406 else
c277df42
IZ
3407 sayNO;
3408 }
3409 say_yes:
3410 if (logical) {
3411 logical = 0;
3412 sw = 1;
3413 }
fe44a5e8 3414 if (OP(scan) == SUSPEND) {
3280af22 3415 locinput = PL_reginput;
565764a8 3416 nextchr = UCHARAT(locinput);
fe44a5e8 3417 }
c277df42
IZ
3418 /* FALL THROUGH. */
3419 case LONGJMP:
3420 do_longjump:
3421 next = scan + ARG(scan);
3422 if (next == scan)
3423 next = NULL;
a0d0e21e
LW
3424 break;
3425 default:
b900a521 3426 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3427 PTR2UV(scan), OP(scan));
cea2e8a9 3428 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3429 }
02db2b7b 3430 reenter:
a0d0e21e
LW
3431 scan = next;
3432 }
a687059c 3433
a0d0e21e
LW
3434 /*
3435 * We get here only if there's trouble -- normally "case END" is
3436 * the terminating point.
3437 */
cea2e8a9 3438 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3439 /*NOTREACHED*/
4633a7c4
LW
3440 sayNO;
3441
7821416a
IZ
3442yes_loud:
3443 DEBUG_r(
3444 PerlIO_printf(Perl_debug_log,
3445 "%*s %scould match...%s\n",
3446 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3447 );
3448 goto yes;
3449yes_final:
3450 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3451 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3452yes:
3453#ifdef DEBUGGING
3280af22 3454 PL_regindent--;
4633a7c4 3455#endif
02db2b7b
IZ
3456
3457#if 0 /* Breaks $^R */
3458 if (unwind)
3459 regcpblow(firstcp);
3460#endif
4633a7c4
LW
3461 return 1;
3462
3463no:
7821416a
IZ
3464 DEBUG_r(
3465 PerlIO_printf(Perl_debug_log,
3466 "%*s %sfailed...%s\n",
3467 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3468 );
3469 goto do_no;
3470no_final:
3471do_no:
02db2b7b
IZ
3472 if (unwind) {
3473 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3474
3475 switch (uw->type) {
3476 case RE_UNWIND_BRANCH:
3477 case RE_UNWIND_BRANCHJ:
3478 {
3479 re_unwind_branch_t *uwb = &(uw->branch);
3480 I32 lastparen = uwb->lastparen;
3481
3482 REGCP_UNWIND(uwb->lastcp);
3483 for (n = *PL_reglastparen; n > lastparen; n--)
3484 PL_regendp[n] = -1;
3485 *PL_reglastparen = n;
3486 scan = next = uwb->next;
3487 if ( !scan ||
3488 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3489 ? BRANCH : BRANCHJ) ) { /* Failure */
3490 unwind = uwb->prev;
3491#ifdef DEBUGGING
3492 PL_regindent--;
3493#endif
3494 goto do_no;
3495 }
3496 /* Have more choice yet. Reuse the same uwb. */
3497 /*SUPPRESS 560*/
3498 if ((n = (uwb->type == RE_UNWIND_BRANCH
3499 ? NEXT_OFF(next) : ARG(next))))
3500 next += n;
3501 else
3502 next = NULL; /* XXXX Needn't unwinding in this case... */
3503 uwb->next = next;
3504 next = NEXTOPER(scan);
3505 if (uwb->type == RE_UNWIND_BRANCHJ)
3506 next = NEXTOPER(next);
3507 locinput = uwb->locinput;
3508 nextchr = uwb->nextchr;
3509#ifdef DEBUGGING
3510 PL_regindent = uwb->regindent;
3511#endif
3512
3513 goto reenter;
3514 }
3515 /* NOT REACHED */
3516 default:
3517 Perl_croak(aTHX_ "regexp unwind memory corruption");
3518 }
3519 /* NOT REACHED */
3520 }
4633a7c4 3521#ifdef DEBUGGING
3280af22 3522 PL_regindent--;
4633a7c4 3523#endif
a0d0e21e 3524 return 0;
a687059c
LW
3525}
3526
3527/*
3528 - regrepeat - repeatedly match something simple, report how many
3529 */
3530/*
3531 * [This routine now assumes that it will only match on things of length 1.
3532 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3533 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3534 */
76e3520e 3535STATIC I32
cea2e8a9 3536S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3537{
a0d0e21e 3538 register char *scan;
a0d0e21e 3539 register I32 c;
3280af22 3540 register char *loceol = PL_regeol;
a0ed51b3 3541 register I32 hardcount = 0;
ffc61ed2 3542 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3543
3280af22 3544 scan = PL_reginput;
c277df42 3545 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3546 loceol = scan + max;
a0d0e21e 3547 switch (OP(p)) {
22c35a8c 3548 case REG_ANY:
1aa99e6b 3549 if (do_utf8) {
ffc61ed2 3550 loceol = PL_regeol;
1aa99e6b 3551 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3552 scan += UTF8SKIP(scan);
3553 hardcount++;
3554 }
3555 } else {
3556 while (scan < loceol && *scan != '\n')
3557 scan++;
a0ed51b3
LW
3558 }
3559 break;
ffc61ed2 3560 case SANY:
1aa99e6b 3561 if (do_utf8) {
ffc61ed2 3562 loceol = PL_regeol;
1aa99e6b 3563 while (hardcount < max && scan < loceol) {
ffc61ed2
JH
3564 scan += UTF8SKIP(scan);
3565 hardcount++;
3566 }
3567 } else {
3568 scan = loceol;
a0ed51b3
LW
3569 }
3570 break;
bbce6d69 3571 case EXACT: /* length of string is 1 */
cd439c50 3572 c = (U8)*STRING(p);
bbce6d69 3573 while (scan < loceol && UCHARAT(scan) == c)
3574 scan++;
3575 break;
3576 case EXACTF: /* length of string is 1 */
cd439c50 3577 c = (U8)*STRING(p);
bbce6d69 3578 while (scan < loceol &&
22c35a8c 3579 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3580 scan++;
3581 break;
3582 case EXACTFL: /* length of string is 1 */
3280af22 3583 PL_reg_flags |= RF_tainted;
cd439c50 3584 c = (U8)*STRING(p);
bbce6d69 3585 while (scan < loceol &&
22c35a8c 3586 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3587 scan++;
3588 break;
3589 case ANYOF:
ffc61ed2
JH
3590 if (do_utf8) {
3591 loceol = PL_regeol;
1aa99e6b
IH
3592 while (hardcount < max && scan < loceol &&
3593 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3594 scan += UTF8SKIP(scan);
3595 hardcount++;
3596 }
3597 } else {
3598 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3599 scan++;
3600 }
a0d0e21e
LW
3601 break;
3602 case ALNUM:
1aa99e6b 3603 if (do_utf8) {
ffc61ed2 3604 loceol = PL_regeol;
1aa99e6b
IH
3605 while (hardcount < max && scan < loceol &&
3606 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3607 scan += UTF8SKIP(scan);
3608 hardcount++;
3609 }
3610 } else {
3611 while (scan < loceol && isALNUM(*scan))
3612 scan++;
a0ed51b3
LW
3613 }
3614 break;
bbce6d69 3615 case ALNUML:
3280af22 3616 PL_reg_flags |= RF_tainted;
1aa99e6b 3617 if (do_utf8) {
ffc61ed2 3618 loceol = PL_regeol;
1aa99e6b
IH
3619 while (hardcount < max && scan < loceol &&
3620 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3621 scan += UTF8SKIP(scan);
3622 hardcount++;
3623 }
3624 } else {
3625 while (scan < loceol && isALNUM_LC(*scan))
3626 scan++;
a0ed51b3
LW
3627 }
3628 break;
a0d0e21e 3629 case NALNUM:
1aa99e6b 3630 if (do_utf8) {
ffc61ed2 3631 loceol = PL_regeol;
1aa99e6b
IH
3632 while (hardcount < max && scan < loceol &&
3633 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3634 scan += UTF8SKIP(scan);
3635 hardcount++;
3636 }
3637 } else {
3638 while (scan < loceol && !isALNUM(*scan))
3639 scan++;
a0ed51b3
LW
3640 }
3641 break;
bbce6d69 3642 case NALNUML:
3280af22 3643 PL_reg_flags |= RF_tainted;
1aa99e6b 3644 if (do_utf8) {
ffc61ed2 3645 loceol = PL_regeol;
1aa99e6b
IH
3646 while (hardcount < max && scan < loceol &&
3647 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3648 scan += UTF8SKIP(scan);
3649 hardcount++;
3650 }
3651 } else {
3652 while (scan < loceol && !isALNUM_LC(*scan))
3653 scan++;
a0ed51b3
LW
3654 }
3655 break;
a0d0e21e 3656 case SPACE:
1aa99e6b 3657 if (do_utf8) {
ffc61ed2 3658 loceol = PL_regeol;
1aa99e6b 3659 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3660 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3661 scan += UTF8SKIP(scan);
3662 hardcount++;
3663 }
3664 } else {
3665 while (scan < loceol && isSPACE(*scan))
3666 scan++;
a0ed51b3
LW
3667 }
3668 break;
bbce6d69 3669 case SPACEL:
3280af22 3670 PL_reg_flags |= RF_tainted;
1aa99e6b 3671 if (do_utf8) {
ffc61ed2 3672 loceol = PL_regeol;
1aa99e6b 3673 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3674 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3675 scan += UTF8SKIP(scan);
3676 hardcount++;
3677 }
3678 } else {
3679 while (scan < loceol && isSPACE_LC(*scan))
3680 scan++;
a0ed51b3
LW
3681 }
3682 break;
a0d0e21e 3683 case NSPACE:
1aa99e6b 3684 if (do_utf8) {
ffc61ed2 3685 loceol = PL_regeol;
1aa99e6b 3686 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3687 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3688 scan += UTF8SKIP(scan);
3689 hardcount++;
3690 }
3691 } else {
3692 while (scan < loceol && !isSPACE(*scan))
3693 scan++;
3694 break;
a0ed51b3 3695 }
bbce6d69 3696 case NSPACEL:
3280af22 3697 PL_reg_flags |= RF_tainted;
1aa99e6b 3698 if (do_utf8) {
ffc61ed2 3699 loceol = PL_regeol;
1aa99e6b 3700 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3701 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3702 scan += UTF8SKIP(scan);
3703 hardcount++;
3704 }
3705 } else {
3706 while (scan < loceol && !isSPACE_LC(*scan))
3707 scan++;
a0ed51b3
LW
3708 }
3709 break;
a0d0e21e 3710 case DIGIT:
1aa99e6b 3711 if (do_utf8) {
ffc61ed2 3712 loceol = PL_regeol;
1aa99e6b
IH
3713 while (hardcount < max && scan < loceol &&
3714 swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3715 scan += UTF8SKIP(scan);
3716 hardcount++;
3717 }
3718 } else {
3719 while (scan < loceol && isDIGIT(*scan))
3720 scan++;
a0ed51b3
LW
3721 }
3722 break;
a0d0e21e 3723 case NDIGIT:
1aa99e6b 3724 if (do_utf8) {
ffc61ed2 3725 loceol = PL_regeol;
1aa99e6b
IH
3726 while (hardcount < max && scan < loceol &&
3727 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3728 scan += UTF8SKIP(scan);
3729 hardcount++;
3730 }
3731 } else {
3732 while (scan < loceol && !isDIGIT(*scan))
3733 scan++;
a0ed51b3
LW
3734 }
3735 break;
a0d0e21e
LW
3736 default: /* Called on something of 0 width. */
3737 break; /* So match right here or not at all. */
3738 }
a687059c 3739
a0ed51b3
LW
3740 if (hardcount)
3741 c = hardcount;
3742 else
3743 c = scan - PL_reginput;
3280af22 3744 PL_reginput = scan;
a687059c 3745
c277df42
IZ
3746 DEBUG_r(
3747 {
3748 SV *prop = sv_newmortal();
3749
3750 regprop(prop, p);
3751 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3752 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3753 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3754 });
3755
a0d0e21e 3756 return(c);
a687059c
LW
3757}
3758
3759/*
c277df42
IZ
3760 - regrepeat_hard - repeatedly match something, report total lenth and length
3761 *
3762 * The repeater is supposed to have constant length.
3763 */
3764
76e3520e 3765STATIC I32
cea2e8a9 3766S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3767{
3768 register char *scan;
3769 register char *start;
3280af22 3770 register char *loceol = PL_regeol;
a0ed51b3 3771 I32 l = 0;
708e3b05 3772 I32 count = 0, res = 1;
a0ed51b3
LW
3773
3774 if (!max)
3775 return 0;
c277df42 3776
3280af22 3777 start = PL_reginput;
1aa99e6b 3778 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3779 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3780 if (!count++) {
3781 l = 0;
3782 while (start < PL_reginput) {
3783 l++;
3784 start += UTF8SKIP(start);
3785 }
3786 *lp = l;
3787 if (l == 0)
3788 return max;
3789 }
3790 if (count == max)
3791 return count;
3792 }
3793 }
3794 else {
708e3b05 3795 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3796 if (!count++) {
3797 *lp = l = PL_reginput - start;
3798 if (max != REG_INFTY && l*max < loceol - scan)
3799 loceol = scan + l*max;
3800 if (l == 0)
3801 return max;
c277df42
IZ
3802 }
3803 }
3804 }
708e3b05 3805 if (!res)
3280af22 3806 PL_reginput = scan;
c277df42 3807
a0ed51b3 3808 return count;
c277df42
IZ
3809}
3810
3811/*
ffc61ed2
JH
3812- regclass_swash - prepare the utf8 swash
3813*/
3814
3815SV *
3816Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3817{
3818 SV *sw = NULL;
3819 SV *si = NULL;
3820
3821 if (PL_regdata && PL_regdata->count) {
3822 U32 n = ARG(node);
3823
3824 if (PL_regdata->what[n] == 's') {
3825 SV *rv = (SV*)PL_regdata->data[n];
3826 AV *av = (AV*)SvRV((SV*)rv);
3827 SV **a;
3828
3829 si = *av_fetch(av, 0, FALSE);
3830 a = av_fetch(av, 1, FALSE);
3831
3832 if (a)
3833 sw = *a;
3834 else if (si && doinit) {
3835 sw = swash_init("utf8", "", si, 1, 0);
3836 (void)av_store(av, 1, sw);
3837 }
3838 }
3839 }
3840
3841 if (initsvp)
3842 *initsvp = si;
3843
3844 return sw;
3845}
3846
3847/*
cb8d8820 3848 - reginclass - determine if a character falls into a character class
bbce6d69 3849 */
3850
76e3520e 3851STATIC bool
ffc61ed2 3852S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3853{
ffc61ed2 3854 char flags = ANYOF_FLAGS(n);
bbce6d69 3855 bool match = FALSE;
1aa99e6b
IH
3856 UV c;
3857 STRLEN len;
3858
3859 if (do_utf8)
3860 c = utf8_to_uv_simple(p, &len);
3861 else
3862 c = *p;
bbce6d69 3863
ffc61ed2
JH
3864 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3865 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3866 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3867 match = TRUE;
bbce6d69 3868 }
1aa99e6b
IH
3869 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3870 match = TRUE;
ffc61ed2
JH
3871 if (!match) {
3872 SV *sw = regclass_swash(n, TRUE, 0);
3873
3874 if (sw) {
3875 if (swash_fetch(sw, p))
3876 match = TRUE;
3877 else if (flags & ANYOF_FOLD) {
3878 U8 tmpbuf[UTF8_MAXLEN+1];
3879
3880 if (flags & ANYOF_LOCALE) {
3881 PL_reg_flags |= RF_tainted;
3882 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3883 }
1aa99e6b
IH
3884 else
3885 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
ffc61ed2
JH
3886 if (swash_fetch(sw, tmpbuf))
3887 match = TRUE;
3888 }
3889 }
bbce6d69 3890 }
3891 }
1aa99e6b 3892 if (!match && c < 256) {
ffc61ed2
JH
3893 if (ANYOF_BITMAP_TEST(n, c))
3894 match = TRUE;
3895 else if (flags & ANYOF_FOLD) {
3896 I32 f;
a0ed51b3 3897
ffc61ed2
JH
3898 if (flags & ANYOF_LOCALE) {
3899 PL_reg_flags |= RF_tainted;
3900 f = PL_fold_locale[c];
3901 }
3902 else
3903 f = PL_fold[c];
3904 if (f != c && ANYOF_BITMAP_TEST(n, f))
3905 match = TRUE;
3906 }
3907
3908 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3909 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3910 if (
3911 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3912 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3913 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3914 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3915 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3916 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3917 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3933 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3941 ) /* How's that for a conditional? */
3942 {
3943 match = TRUE;
3944 }
a0ed51b3 3945 }
a0ed51b3
LW
3946 }
3947
a0ed51b3
LW
3948 return (flags & ANYOF_INVERT) ? !match : match;
3949}
161b471a 3950
dfe13c55 3951STATIC U8 *
cea2e8a9 3952S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3953{
1aa99e6b
IH
3954 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3955}
3956
3957STATIC U8 *
3958S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
3959{
a0ed51b3 3960 if (off >= 0) {
1aa99e6b 3961 while (off-- && s < lim) {
ffc61ed2 3962 /* XXX could check well-formedness here */
a0ed51b3 3963 s += UTF8SKIP(s);
ffc61ed2 3964 }
a0ed51b3
LW
3965 }
3966 else {
3967 while (off++) {
1aa99e6b 3968 if (s > lim) {
a0ed51b3 3969 s--;
ffc61ed2 3970 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3971 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3972 s--;
ffc61ed2
JH
3973 }
3974 /* XXX could check well-formedness here */
a0ed51b3
LW
3975 }
3976 }
3977 }
3978 return s;
3979}
161b471a 3980
dfe13c55 3981STATIC U8 *
1aa99e6b
IH
3982S_reghopmaybe(pTHX_ U8 *s, I32 off)
3983{
3984 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3985}
3986
3987STATIC U8 *
3988S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
3989{
3990 if (off >= 0) {
1aa99e6b 3991 while (off-- && s < lim) {
ffc61ed2 3992 /* XXX could check well-formedness here */
a0ed51b3 3993 s += UTF8SKIP(s);
ffc61ed2 3994 }
a0ed51b3
LW
3995 if (off >= 0)
3996 return 0;
3997 }
3998 else {
3999 while (off++) {
1aa99e6b 4000 if (s > lim) {
a0ed51b3 4001 s--;
ffc61ed2 4002 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4003 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4004 s--;
ffc61ed2
JH
4005 }
4006 /* XXX could check well-formedness here */
a0ed51b3
LW
4007 }
4008 else
4009 break;
4010 }
4011 if (off <= 0)
4012 return 0;
4013 }
4014 return s;
4015}
51371543
GS
4016
4017#ifdef PERL_OBJECT
51371543
GS
4018#include "XSUB.h"
4019#endif
4020
4021static void
4022restore_pos(pTHXo_ void *arg)
4023{
51371543
GS
4024 if (PL_reg_eval_set) {
4025 if (PL_reg_oldsaved) {
4026 PL_reg_re->subbeg = PL_reg_oldsaved;
4027 PL_reg_re->sublen = PL_reg_oldsavedlen;
4028 RX_MATCH_COPIED_on(PL_reg_re);
4029 }
4030 PL_reg_magic->mg_len = PL_reg_oldpos;
4031 PL_reg_eval_set = 0;
4032 PL_curpm = PL_reg_oldcurpm;
4033 }
4034}