This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[patch] xsubpp: make sv_setref_* targetable
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
56953603
IZ
45#endif
46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
1aa99e6b
IH
119#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
125
51371543
GS
126static void restore_pos(pTHXo_ void *arg);
127
128
76e3520e 129STATIC CHECKPOINT
cea2e8a9 130S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 131{
3280af22 132 int retval = PL_savestack_ix;
b1ce53c5
JH
133#define REGCP_PAREN_ELEMS 4
134 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
135 int p;
136
b1ce53c5
JH
137#define REGCP_OTHER_ELEMS 5
138 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 139 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 140/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
141 SSPUSHINT(PL_regendp[p]);
142 SSPUSHINT(PL_regstartp[p]);
3280af22 143 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
144 SSPUSHINT(p);
145 }
b1ce53c5 146/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
147 SSPUSHINT(PL_regsize);
148 SSPUSHINT(*PL_reglastparen);
149 SSPUSHPTR(PL_reginput);
41123dfd
JH
150#define REGCP_FRAME_ELEMS 2
151/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
152 * are needed for the regexp context stack bookkeeping. */
153 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 154 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 155
a0d0e21e
LW
156 return retval;
157}
158
c277df42 159/* These are needed since we do not localize EVAL nodes: */
02db2b7b 160# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 161 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 162 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 163
02db2b7b 164# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 165 PerlIO_printf(Perl_debug_log, \
faccc32b 166 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 167 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 168
76e3520e 169STATIC char *
cea2e8a9 170S_regcppop(pTHX)
a0d0e21e 171{
b1ce53c5 172 I32 i;
a0d0e21e
LW
173 U32 paren = 0;
174 char *input;
cf93c79d 175 I32 tmps;
b1ce53c5
JH
176
177 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 178 i = SSPOPINT;
b1ce53c5
JH
179 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
180 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 181 input = (char *) SSPOPPTR;
3280af22
NIS
182 *PL_reglastparen = SSPOPINT;
183 PL_regsize = SSPOPINT;
b1ce53c5
JH
184
185 /* Now restore the parentheses context. */
41123dfd
JH
186 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
187 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 188 paren = (U32)SSPOPINT;
3280af22 189 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
190 PL_regstartp[paren] = SSPOPINT;
191 tmps = SSPOPINT;
3280af22
NIS
192 if (paren <= *PL_reglastparen)
193 PL_regendp[paren] = tmps;
c277df42 194 DEBUG_r(
c3464db5 195 PerlIO_printf(Perl_debug_log,
b900a521
JH
196 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
197 (UV)paren, (IV)PL_regstartp[paren],
198 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
199 (IV)PL_regendp[paren],
3280af22 200 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 201 );
a0d0e21e 202 }
c277df42 203 DEBUG_r(
3280af22 204 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 205 PerlIO_printf(Perl_debug_log,
faccc32b
JH
206 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
207 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
208 }
209 );
daf18116 210#if 1
dafc8851
JH
211 /* It would seem that the similar code in regtry()
212 * already takes care of this, and in fact it is in
213 * a better location to since this code can #if 0-ed out
214 * but the code in regtry() is needed or otherwise tests
215 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
216 * (as of patchlevel 7877) will fail. Then again,
217 * this code seems to be necessary or otherwise
218 * building DynaLoader will fail:
219 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
220 * --jhi */
3280af22
NIS
221 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
222 if (paren > PL_regsize)
cf93c79d
IZ
223 PL_regstartp[paren] = -1;
224 PL_regendp[paren] = -1;
a0d0e21e 225 }
dafc8851 226#endif
a0d0e21e
LW
227 return input;
228}
229
0f5d15d6 230STATIC char *
cea2e8a9 231S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
232{
233 I32 tmp = PL_savestack_ix;
234
235 PL_savestack_ix = ss;
236 regcppop();
237 PL_savestack_ix = tmp;
942e002e 238 return Nullch;
0f5d15d6
IZ
239}
240
241typedef struct re_cc_state
242{
243 I32 ss;
244 regnode *node;
245 struct re_cc_state *prev;
246 CURCUR *cc;
247 regexp *re;
248} re_cc_state;
249
02db2b7b 250#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 251
29d1e993
HS
252#define TRYPAREN(paren, n, input) { \
253 if (paren) { \
254 if (n) { \
255 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
256 PL_regendp[paren] = input - PL_bostr; \
257 } \
258 else \
259 PL_regendp[paren] = -1; \
260 } \
261 if (regmatch(next)) \
262 sayYES; \
263 if (paren && n) \
264 PL_regendp[paren] = -1; \
265}
266
267
a687059c 268/*
e50aee73 269 * pregexec and friends
a687059c
LW
270 */
271
272/*
c277df42 273 - pregexec - match a regexp against a string
a687059c 274 */
c277df42 275I32
864dbfa3 276Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 277 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
278/* strend: pointer to null at end of string */
279/* strbeg: real beginning of string */
280/* minend: end of match must be >=minend after stringarg. */
281/* nosave: For optimizations. */
282{
283 return
284 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
285 nosave ? 0 : REXEC_COPY_STR);
286}
0f5d15d6
IZ
287
288STATIC void
cea2e8a9 289S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
290{
291 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
292#ifdef DEBUGGING
293 PL_regprogram = prog->program;
294#endif
295 PL_regnpar = prog->nparens;
296 PL_regdata = prog->data;
297 PL_reg_re = prog;
298}
22e551b9 299
cad2e5aa
JH
300/*
301 * Need to implement the following flags for reg_anch:
302 *
303 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
304 * USE_INTUIT_ML
305 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
306 * INTUIT_AUTORITATIVE_ML
307 * INTUIT_ONCE_NOML - Intuit can match in one location only.
308 * INTUIT_ONCE_ML
309 *
310 * Another flag for this function: SECOND_TIME (so that float substrs
311 * with giant delta may be not rechecked).
312 */
313
314/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
315
2c2d71f5 316/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
317 Otherwise, only SvCUR(sv) is used to get strbeg. */
318
319/* XXXX We assume that strpos is strbeg unless sv. */
320
6eb5f6b9
JH
321/* XXXX Some places assume that there is a fixed substring.
322 An update may be needed if optimizer marks as "INTUITable"
323 RExen without fixed substrings. Similarly, it is assumed that
324 lengths of all the strings are no more than minlen, thus they
325 cannot come from lookahead.
326 (Or minlen should take into account lookahead.) */
327
2c2d71f5
JH
328/* A failure to find a constant substring means that there is no need to make
329 an expensive call to REx engine, thus we celebrate a failure. Similarly,
330 finding a substring too deep into the string means that less calls to
30944b6d
IZ
331 regtry() should be needed.
332
333 REx compiler's optimizer found 4 possible hints:
334 a) Anchored substring;
335 b) Fixed substring;
336 c) Whether we are anchored (beginning-of-line or \G);
337 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 338 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
339 string which does not contradict any of them.
340 */
2c2d71f5 341
6eb5f6b9
JH
342/* Most of decisions we do here should have been done at compile time.
343 The nodes of the REx which we used for the search should have been
344 deleted from the finite automaton. */
345
cad2e5aa
JH
346char *
347Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
348 char *strend, U32 flags, re_scream_pos_data *data)
349{
2c2d71f5 350 register I32 start_shift;
cad2e5aa 351 /* Should be nonnegative! */
2c2d71f5
JH
352 register I32 end_shift;
353 register char *s;
354 register SV *check;
a1933d95 355 char *strbeg;
cad2e5aa
JH
356 char *t;
357 I32 ml_anch;
2c2d71f5 358 char *tmp;
6eb5f6b9
JH
359 register char *other_last = Nullch; /* other substr checked before this */
360 char *check_at; /* check substr found at this pos */
30944b6d
IZ
361#ifdef DEBUGGING
362 char *i_strpos = strpos;
363#endif
cad2e5aa
JH
364
365 DEBUG_r( if (!PL_colorset) reginitcolors() );
366 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 367 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
368 PL_colors[4],PL_colors[5],PL_colors[0],
369 prog->precomp,
370 PL_colors[1],
371 (strlen(prog->precomp) > 60 ? "..." : ""),
372 PL_colors[0],
b900a521 373 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
374 strpos, PL_colors[1],
375 (strend - strpos > 60 ? "..." : ""))
376 );
377
1aa99e6b
IH
378 if (prog->reganch & ROPT_UTF8)
379 PL_reg_flags |= RF_utf8;
380
381 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 382 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 383 goto fail;
2c2d71f5 384 }
a1933d95 385 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 386 PL_regeol = strend;
653099ff 387 check = prog->check_substr;
2c2d71f5 388 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
389 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
390 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 391 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 392
5675c1a6
IZ
393 if (!ml_anch) {
394 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
395 /* SvCUR is not set on references: SvRV and SvPVX overlap */
396 && sv && !SvROK(sv)
a1933d95 397 && (strpos != strbeg)) {
5675c1a6
IZ
398 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
399 goto fail;
400 }
3baa4c62
JH
401 if (prog->check_offset_min == prog->check_offset_max &&
402 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 403 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
404 I32 slen;
405
1aa99e6b 406 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
407 if (SvTAIL(check)) {
408 slen = SvCUR(check); /* >= 1 */
cad2e5aa 409
2c2d71f5
JH
410 if ( strend - s > slen || strend - s < slen - 1
411 || (strend - s == slen && strend[-1] != '\n')) {
412 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
413 goto fail_finish;
cad2e5aa
JH
414 }
415 /* Now should match s[0..slen-2] */
416 slen--;
653099ff 417 if (slen && (*SvPVX(check) != *s
cad2e5aa 418 || (slen > 1
653099ff 419 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
420 report_neq:
421 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
422 goto fail_finish;
423 }
cad2e5aa 424 }
653099ff
GS
425 else if (*SvPVX(check) != *s
426 || ((slen = SvCUR(check)) > 1
427 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
428 goto report_neq;
429 goto success_at_start;
5675c1a6 430 }
cad2e5aa 431 }
2c2d71f5 432 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 433 s = strpos;
2c2d71f5 434 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 435 end_shift = prog->minlen - start_shift -
653099ff 436 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 437 if (!ml_anch) {
653099ff
GS
438 I32 end = prog->check_offset_max + CHR_SVLEN(check)
439 - (SvTAIL(check) != 0);
1aa99e6b 440 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
441
442 if (end_shift < eshift)
443 end_shift = eshift;
444 }
cad2e5aa 445 }
2c2d71f5 446 else { /* Can match at random position */
cad2e5aa
JH
447 ml_anch = 0;
448 s = strpos;
2c2d71f5
JH
449 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
450 /* Should be nonnegative! */
451 end_shift = prog->minlen - start_shift -
653099ff 452 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
453 }
454
2c2d71f5 455#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 456 if (end_shift < 0)
6bbae5e6 457 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
458#endif
459
2c2d71f5
JH
460 restart:
461 /* Find a possible match in the region s..strend by looking for
462 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 463 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
464 I32 p = -1; /* Internal iterator of scream. */
465 I32 *pp = data ? data->scream_pos : &p;
466
2c2d71f5
JH
467 if (PL_screamfirst[BmRARE(check)] >= 0
468 || ( BmRARE(check) == '\n'
469 && (BmPREVIOUS(check) == SvCUR(check) - 1)
470 && SvTAIL(check) ))
471 s = screaminstr(sv, check,
472 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 473 else
2c2d71f5 474 goto fail_finish;
cad2e5aa
JH
475 if (data)
476 *data->scream_olds = s;
477 }
3baa4c62
JH
478 else if (prog->reganch & ROPT_SANY_SEEN)
479 s = fbm_instr((U8*)(s + start_shift),
480 (U8*)(strend - end_shift),
481 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 482 else
1aa99e6b
IH
483 s = fbm_instr(HOP3(s, start_shift, strend),
484 HOP3(strend, -end_shift, strbeg),
2c2d71f5 485 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
486
487 /* Update the count-of-usability, remove useless subpatterns,
488 unshift s. */
2c2d71f5
JH
489
490 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
491 (s ? "Found" : "Did not find"),
492 ((check == prog->anchored_substr) ? "anchored" : "floating"),
493 PL_colors[0],
7b0972df
JH
494 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
495 SvPVX(check),
2c2d71f5
JH
496 PL_colors[1], (SvTAIL(check) ? "$" : ""),
497 (s ? " at offset " : "...\n") ) );
498
499 if (!s)
500 goto fail_finish;
501
6eb5f6b9
JH
502 check_at = s;
503
2c2d71f5 504 /* Finish the diagnostic message */
30944b6d 505 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
506
507 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
508 Start with the other substr.
509 XXXX no SCREAM optimization yet - and a very coarse implementation
510 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
511 *always* match. Probably should be marked during compile...
512 Probably it is right to do no SCREAM here...
513 */
514
515 if (prog->float_substr && prog->anchored_substr) {
30944b6d 516 /* Take into account the "other" substring. */
2c2d71f5
JH
517 /* XXXX May be hopelessly wrong for UTF... */
518 if (!other_last)
6eb5f6b9 519 other_last = strpos;
2c2d71f5 520 if (check == prog->float_substr) {
30944b6d
IZ
521 do_other_anchored:
522 {
1aa99e6b 523 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
524 char *s1 = s;
525
2c2d71f5
JH
526 t = s - prog->check_offset_max;
527 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
528 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 529 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 530 && t > strpos)))
30944b6d 531 /* EMPTY */;
2c2d71f5
JH
532 else
533 t = strpos;
1aa99e6b 534 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
535 if (t < other_last) /* These positions already checked */
536 t = other_last;
1aa99e6b 537 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
538 if (last < last1)
539 last1 = last;
540 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
541 /* On end-of-str: see comment below. */
542 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
543 HOP3(HOP3(last1, prog->anchored_offset, strend)
544 + SvCUR(prog->anchored_substr),
545 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
546 prog->anchored_substr,
547 PL_multiline ? FBMrf_MULTILINE : 0);
548 DEBUG_r(PerlIO_printf(Perl_debug_log,
549 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
550 (s ? "Found" : "Contradicts"),
551 PL_colors[0],
7b0972df
JH
552 (int)(SvCUR(prog->anchored_substr)
553 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
554 SvPVX(prog->anchored_substr),
555 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
556 if (!s) {
557 if (last1 >= last2) {
558 DEBUG_r(PerlIO_printf(Perl_debug_log,
559 ", giving up...\n"));
560 goto fail_finish;
561 }
562 DEBUG_r(PerlIO_printf(Perl_debug_log,
563 ", trying floating at offset %ld...\n",
1aa99e6b
IH
564 (long)(HOP3c(s1, 1, strend) - i_strpos)));
565 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
566 s = HOP3c(last, 1, strend);
2c2d71f5
JH
567 goto restart;
568 }
569 else {
570 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 571 (long)(s - i_strpos)));
1aa99e6b
IH
572 t = HOP3c(s, -prog->anchored_offset, strbeg);
573 other_last = HOP3c(s, 1, strend);
30944b6d 574 s = s1;
2c2d71f5
JH
575 if (t == strpos)
576 goto try_at_start;
2c2d71f5
JH
577 goto try_at_offset;
578 }
30944b6d 579 }
2c2d71f5
JH
580 }
581 else { /* Take into account the floating substring. */
582 char *last, *last1;
583 char *s1 = s;
584
1aa99e6b
IH
585 t = HOP3c(s, -start_shift, strbeg);
586 last1 = last =
587 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
588 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
589 last = HOP3c(t, prog->float_max_offset, strend);
590 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
591 if (s < other_last)
592 s = other_last;
2c2d71f5
JH
593 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
594 /* fbm_instr() takes into account exact value of end-of-str
595 if the check is SvTAIL(ed). Since false positives are OK,
596 and end-of-str is not later than strend we are OK. */
597 s = fbm_instr((unsigned char*)s,
598 (unsigned char*)last + SvCUR(prog->float_substr)
599 - (SvTAIL(prog->float_substr)!=0),
600 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
601 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
602 (s ? "Found" : "Contradicts"),
603 PL_colors[0],
7b0972df
JH
604 (int)(SvCUR(prog->float_substr)
605 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
606 SvPVX(prog->float_substr),
607 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
608 if (!s) {
609 if (last1 == last) {
610 DEBUG_r(PerlIO_printf(Perl_debug_log,
611 ", giving up...\n"));
612 goto fail_finish;
613 }
614 DEBUG_r(PerlIO_printf(Perl_debug_log,
615 ", trying anchored starting at offset %ld...\n",
30944b6d 616 (long)(s1 + 1 - i_strpos)));
803ff556 617 other_last = last;
1aa99e6b 618 s = HOP3c(t, 1, strend);
2c2d71f5
JH
619 goto restart;
620 }
621 else {
622 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 623 (long)(s - i_strpos)));
803ff556 624 other_last = s; /* Fix this later. --Hugo */
30944b6d 625 s = s1;
2c2d71f5
JH
626 if (t == strpos)
627 goto try_at_start;
2c2d71f5
JH
628 goto try_at_offset;
629 }
630 }
cad2e5aa 631 }
2c2d71f5
JH
632
633 t = s - prog->check_offset_max;
2c2d71f5
JH
634 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
635 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
636 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
637 && t > strpos))) {
2c2d71f5
JH
638 /* Fixed substring is found far enough so that the match
639 cannot start at strpos. */
640 try_at_offset:
cad2e5aa 641 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
642 /* Eventually fbm_*() should handle this, but often
643 anchored_offset is not 0, so this check will not be wasted. */
644 /* XXXX In the code below we prefer to look for "^" even in
645 presence of anchored substrings. And we search even
646 beyond the found float position. These pessimizations
647 are historical artefacts only. */
648 find_anchor:
2c2d71f5 649 while (t < strend - prog->minlen) {
cad2e5aa 650 if (*t == '\n') {
4ee3650e 651 if (t < check_at - prog->check_offset_min) {
30944b6d 652 if (prog->anchored_substr) {
4ee3650e
GS
653 /* Since we moved from the found position,
654 we definitely contradict the found anchored
30944b6d
IZ
655 substr. Due to the above check we do not
656 contradict "check" substr.
657 Thus we can arrive here only if check substr
658 is float. Redo checking for "other"=="fixed".
659 */
660 strpos = t + 1;
661 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
662 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
663 goto do_other_anchored;
664 }
4ee3650e
GS
665 /* We don't contradict the found floating substring. */
666 /* XXXX Why not check for STCLASS? */
cad2e5aa 667 s = t + 1;
2c2d71f5 668 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 669 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
670 goto set_useful;
671 }
4ee3650e
GS
672 /* Position contradicts check-string */
673 /* XXXX probably better to look for check-string
674 than for "\n", so one should lower the limit for t? */
675 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 676 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 677 other_last = strpos = s = t + 1;
cad2e5aa
JH
678 goto restart;
679 }
680 t++;
681 }
2c2d71f5
JH
682 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
683 PL_colors[0],PL_colors[1]));
684 goto fail_finish;
cad2e5aa 685 }
f5952150
GS
686 else {
687 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
688 PL_colors[0],PL_colors[1]));
689 }
cad2e5aa
JH
690 s = t;
691 set_useful:
2c2d71f5 692 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
693 }
694 else {
f5952150 695 /* The found string does not prohibit matching at strpos,
2c2d71f5 696 - no optimization of calling REx engine can be performed,
f5952150
GS
697 unless it was an MBOL and we are not after MBOL,
698 or a future STCLASS check will fail this. */
2c2d71f5
JH
699 try_at_start:
700 /* Even in this situation we may use MBOL flag if strpos is offset
701 wrt the start of the string. */
05b4157f 702 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 703 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
704 /* May be due to an implicit anchor of m{.*foo} */
705 && !(prog->reganch & ROPT_IMPLICIT))
706 {
cad2e5aa
JH
707 t = strpos;
708 goto find_anchor;
709 }
30944b6d 710 DEBUG_r( if (ml_anch)
f5952150
GS
711 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
712 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 713 );
2c2d71f5 714 success_at_start:
30944b6d 715 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 716 && prog->check_substr /* Could be deleted already */
cad2e5aa 717 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
718 && prog->check_substr == prog->float_substr)
719 {
cad2e5aa 720 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 721 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
722 SvREFCNT_dec(prog->check_substr);
723 prog->check_substr = Nullsv; /* disable */
724 prog->float_substr = Nullsv; /* clear */
5e39e1e5 725 check = Nullsv; /* abort */
cad2e5aa 726 s = strpos;
3cf5c195
IZ
727 /* XXXX This is a remnant of the old implementation. It
728 looks wasteful, since now INTUIT can use many
6eb5f6b9 729 other heuristics. */
cad2e5aa
JH
730 prog->reganch &= ~RE_USE_INTUIT;
731 }
732 else
733 s = strpos;
734 }
735
6eb5f6b9
JH
736 /* Last resort... */
737 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
738 if (prog->regstclass) {
739 /* minlen == 0 is possible if regstclass is \b or \B,
740 and the fixed substr is ''$.
741 Since minlen is already taken into account, s+1 is before strend;
742 accidentally, minlen >= 1 guaranties no false positives at s + 1
743 even for \b or \B. But (minlen? 1 : 0) below assumes that
744 regstclass does not come from lookahead... */
745 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
746 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 747 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 748 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 749 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 750 : 1);
6eb5f6b9 751 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
752 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
753 : (prog->float_substr
754 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
755 cl_l, strend)
756 : strend);
a1933d95 757 char *startpos = strbeg;
6eb5f6b9
JH
758
759 t = s;
76384e4a 760 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 761 PL_regdata = prog->data;
76384e4a
GS
762 PL_bostr = startpos;
763 }
6eb5f6b9
JH
764 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
765 if (!s) {
766#ifdef DEBUGGING
767 char *what;
768#endif
769 if (endpos == strend) {
770 DEBUG_r( PerlIO_printf(Perl_debug_log,
771 "Could not match STCLASS...\n") );
772 goto fail;
773 }
66e933ab
GS
774 DEBUG_r( PerlIO_printf(Perl_debug_log,
775 "This position contradicts STCLASS...\n") );
653099ff
GS
776 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
777 goto fail;
6eb5f6b9
JH
778 /* Contradict one of substrings */
779 if (prog->anchored_substr) {
6eb5f6b9
JH
780 if (prog->anchored_substr == check) {
781 DEBUG_r( what = "anchored" );
782 hop_and_restart:
1aa99e6b 783 s = HOP3c(t, 1, strend);
66e933ab
GS
784 if (s + start_shift + end_shift > strend) {
785 /* XXXX Should be taken into account earlier? */
786 DEBUG_r( PerlIO_printf(Perl_debug_log,
787 "Could not match STCLASS...\n") );
788 goto fail;
789 }
5e39e1e5
HS
790 if (!check)
791 goto giveup;
6eb5f6b9 792 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 793 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
794 what, (long)(s + start_shift - i_strpos)) );
795 goto restart;
796 }
66e933ab 797 /* Have both, check_string is floating */
6eb5f6b9
JH
798 if (t + start_shift >= check_at) /* Contradicts floating=check */
799 goto retry_floating_check;
800 /* Recheck anchored substring, but not floating... */
801 s = check_at;
5e39e1e5
HS
802 if (!check)
803 goto giveup;
6eb5f6b9 804 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 805 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
806 (long)(other_last - i_strpos)) );
807 goto do_other_anchored;
808 }
60e71179
GS
809 /* Another way we could have checked stclass at the
810 current position only: */
811 if (ml_anch) {
812 s = t = t + 1;
5e39e1e5
HS
813 if (!check)
814 goto giveup;
60e71179 815 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
816 "Looking for /%s^%s/m starting at offset %ld...\n",
817 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 818 goto try_at_offset;
66e933ab 819 }
60e71179
GS
820 if (!prog->float_substr) /* Could have been deleted */
821 goto fail;
6eb5f6b9
JH
822 /* Check is floating subtring. */
823 retry_floating_check:
824 t = check_at - start_shift;
825 DEBUG_r( what = "floating" );
826 goto hop_and_restart;
827 }
828 DEBUG_r( if (t != s)
829 PerlIO_printf(Perl_debug_log,
830 "By STCLASS: moving %ld --> %ld\n",
831 (long)(t - i_strpos), (long)(s - i_strpos));
832 else
833 PerlIO_printf(Perl_debug_log,
834 "Does not contradict STCLASS...\n") );
835 }
5e39e1e5
HS
836 giveup:
837 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
838 PL_colors[4], (check ? "Guessed" : "Giving up"),
839 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 840 return s;
2c2d71f5
JH
841
842 fail_finish: /* Substring not found */
66e933ab
GS
843 if (prog->check_substr) /* could be removed already */
844 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 845 fail:
2c2d71f5 846 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
847 PL_colors[4],PL_colors[5]));
848 return Nullch;
849}
9661b544 850
6eb5f6b9 851/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
852STATIC char *
853S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 854{
6eb5f6b9
JH
855 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
856 char *m;
d8093b23
G
857 STRLEN ln;
858 unsigned int c1;
859 unsigned int c2;
6eb5f6b9
JH
860 char *e;
861 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 862 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 863
6eb5f6b9
JH
864 /* We know what class it must start with. */
865 switch (OP(c)) {
6eb5f6b9 866 case ANYOF:
a0ed51b3 867 while (s < strend) {
ffc61ed2 868 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
869 if (tmp && (norun || regtry(prog, s)))
870 goto got_it;
871 else
872 tmp = doevery;
a0ed51b3 873 }
6eb5f6b9
JH
874 else
875 tmp = 1;
ffc61ed2 876 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 877 }
6eb5f6b9
JH
878 break;
879 case EXACTF:
880 m = STRING(c);
881 ln = STR_LEN(c);
1aa99e6b
IH
882 if (UTF) {
883 c1 = to_utf8_lower((U8*)m);
884 c2 = to_utf8_upper((U8*)m);
885 }
886 else {
887 c1 = *(U8*)m;
888 c2 = PL_fold[c1];
889 }
6eb5f6b9
JH
890 goto do_exactf;
891 case EXACTFL:
892 m = STRING(c);
893 ln = STR_LEN(c);
d8093b23 894 c1 = *(U8*)m;
6eb5f6b9
JH
895 c2 = PL_fold_locale[c1];
896 do_exactf:
897 e = strend - ln;
b3c9acc1 898
6eb5f6b9
JH
899 if (norun && e < s)
900 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
901
902 if (do_utf8) {
903 STRLEN len;
904 if (c1 == c2)
905 while (s <= e) {
906 if ( utf8_to_uv_simple((U8*)s, &len) == c1
907 && regtry(prog, s) )
908 goto got_it;
909 s += len;
910 }
911 else
912 while (s <= e) {
913 UV c = utf8_to_uv_simple((U8*)s, &len);
914 if ( (c == c1 || c == c2) && regtry(prog, s) )
915 goto got_it;
916 s += len;
917 }
918 }
919 else {
920 if (c1 == c2)
921 while (s <= e) {
922 if ( *(U8*)s == c1
923 && (ln == 1 || !(OP(c) == EXACTF
924 ? ibcmp(s, m, ln)
925 : ibcmp_locale(s, m, ln)))
926 && (norun || regtry(prog, s)) )
927 goto got_it;
928 s++;
929 }
930 else
931 while (s <= e) {
932 if ( (*(U8*)s == c1 || *(U8*)s == c2)
933 && (ln == 1 || !(OP(c) == EXACTF
934 ? ibcmp(s, m, ln)
935 : ibcmp_locale(s, m, ln)))
936 && (norun || regtry(prog, s)) )
937 goto got_it;
938 s++;
939 }
b3c9acc1
IZ
940 }
941 break;
bbce6d69 942 case BOUNDL:
3280af22 943 PL_reg_flags |= RF_tainted;
bbce6d69 944 /* FALL THROUGH */
a0d0e21e 945 case BOUND:
ffc61ed2
JH
946 if (do_utf8) {
947 if (s == startpos)
948 tmp = '\n';
949 else {
1aa99e6b 950 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
ffc61ed2
JH
951
952 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
953 }
954 tmp = ((OP(c) == BOUND ?
955 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
956 while (s < strend) {
957 if (tmp == !(OP(c) == BOUND ?
958 swash_fetch(PL_utf8_alnum, (U8*)s) :
959 isALNUM_LC_utf8((U8*)s)))
960 {
961 tmp = !tmp;
962 if ((norun || regtry(prog, s)))
963 goto got_it;
964 }
965 s += UTF8SKIP(s);
a687059c 966 }
a0d0e21e 967 }
667bb95a 968 else {
ffc61ed2
JH
969 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
970 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
971 while (s < strend) {
972 if (tmp ==
973 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
974 tmp = !tmp;
975 if ((norun || regtry(prog, s)))
976 goto got_it;
977 }
978 s++;
a0ed51b3 979 }
a0ed51b3 980 }
6eb5f6b9 981 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
982 goto got_it;
983 break;
bbce6d69 984 case NBOUNDL:
3280af22 985 PL_reg_flags |= RF_tainted;
bbce6d69 986 /* FALL THROUGH */
a0d0e21e 987 case NBOUND:
ffc61ed2
JH
988 if (do_utf8) {
989 if (s == startpos)
990 tmp = '\n';
991 else {
1aa99e6b 992 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
ffc61ed2
JH
993
994 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
995 }
996 tmp = ((OP(c) == NBOUND ?
997 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
998 while (s < strend) {
999 if (tmp == !(OP(c) == NBOUND ?
1000 swash_fetch(PL_utf8_alnum, (U8*)s) :
1001 isALNUM_LC_utf8((U8*)s)))
1002 tmp = !tmp;
1003 else if ((norun || regtry(prog, s)))
1004 goto got_it;
1005 s += UTF8SKIP(s);
1006 }
a0d0e21e 1007 }
667bb95a 1008 else {
ffc61ed2
JH
1009 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
1010 tmp = ((OP(c) == NBOUND ?
1011 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1012 while (s < strend) {
1013 if (tmp ==
1014 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1015 tmp = !tmp;
1016 else if ((norun || regtry(prog, s)))
1017 goto got_it;
1018 s++;
1019 }
a0ed51b3 1020 }
6eb5f6b9 1021 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1022 goto got_it;
1023 break;
a0d0e21e 1024 case ALNUM:
ffc61ed2
JH
1025 if (do_utf8) {
1026 while (s < strend) {
1027 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1028 if (tmp && (norun || regtry(prog, s)))
1029 goto got_it;
1030 else
1031 tmp = doevery;
1032 }
bbce6d69 1033 else
ffc61ed2
JH
1034 tmp = 1;
1035 s += UTF8SKIP(s);
bbce6d69 1036 }
bbce6d69 1037 }
ffc61ed2
JH
1038 else {
1039 while (s < strend) {
1040 if (isALNUM(*s)) {
1041 if (tmp && (norun || regtry(prog, s)))
1042 goto got_it;
1043 else
1044 tmp = doevery;
1045 }
a0ed51b3 1046 else
ffc61ed2
JH
1047 tmp = 1;
1048 s++;
a0ed51b3 1049 }
a0ed51b3
LW
1050 }
1051 break;
bbce6d69 1052 case ALNUML:
3280af22 1053 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1054 if (do_utf8) {
1055 while (s < strend) {
1056 if (isALNUM_LC_utf8((U8*)s)) {
1057 if (tmp && (norun || regtry(prog, s)))
1058 goto got_it;
1059 else
1060 tmp = doevery;
1061 }
a687059c 1062 else
ffc61ed2
JH
1063 tmp = 1;
1064 s += UTF8SKIP(s);
a0d0e21e 1065 }
a0d0e21e 1066 }
ffc61ed2
JH
1067 else {
1068 while (s < strend) {
1069 if (isALNUM_LC(*s)) {
1070 if (tmp && (norun || regtry(prog, s)))
1071 goto got_it;
1072 else
1073 tmp = doevery;
1074 }
a0ed51b3 1075 else
ffc61ed2
JH
1076 tmp = 1;
1077 s++;
a0ed51b3 1078 }
a0ed51b3
LW
1079 }
1080 break;
a0d0e21e 1081 case NALNUM:
ffc61ed2
JH
1082 if (do_utf8) {
1083 while (s < strend) {
1084 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1085 if (tmp && (norun || regtry(prog, s)))
1086 goto got_it;
1087 else
1088 tmp = doevery;
1089 }
bbce6d69 1090 else
ffc61ed2
JH
1091 tmp = 1;
1092 s += UTF8SKIP(s);
bbce6d69 1093 }
bbce6d69 1094 }
ffc61ed2
JH
1095 else {
1096 while (s < strend) {
1097 if (!isALNUM(*s)) {
1098 if (tmp && (norun || regtry(prog, s)))
1099 goto got_it;
1100 else
1101 tmp = doevery;
1102 }
a0ed51b3 1103 else
ffc61ed2
JH
1104 tmp = 1;
1105 s++;
a0ed51b3 1106 }
a0ed51b3
LW
1107 }
1108 break;
bbce6d69 1109 case NALNUML:
3280af22 1110 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1111 if (do_utf8) {
1112 while (s < strend) {
1113 if (!isALNUM_LC_utf8((U8*)s)) {
1114 if (tmp && (norun || regtry(prog, s)))
1115 goto got_it;
1116 else
1117 tmp = doevery;
1118 }
a687059c 1119 else
ffc61ed2
JH
1120 tmp = 1;
1121 s += UTF8SKIP(s);
a687059c 1122 }
a0d0e21e 1123 }
ffc61ed2
JH
1124 else {
1125 while (s < strend) {
1126 if (!isALNUM_LC(*s)) {
1127 if (tmp && (norun || regtry(prog, s)))
1128 goto got_it;
1129 else
1130 tmp = doevery;
1131 }
a0ed51b3 1132 else
ffc61ed2
JH
1133 tmp = 1;
1134 s++;
a0ed51b3 1135 }
a0ed51b3
LW
1136 }
1137 break;
a0d0e21e 1138 case SPACE:
ffc61ed2
JH
1139 if (do_utf8) {
1140 while (s < strend) {
1141 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1142 if (tmp && (norun || regtry(prog, s)))
1143 goto got_it;
1144 else
1145 tmp = doevery;
1146 }
a0d0e21e 1147 else
ffc61ed2
JH
1148 tmp = 1;
1149 s += UTF8SKIP(s);
2304df62 1150 }
a0d0e21e 1151 }
ffc61ed2
JH
1152 else {
1153 while (s < strend) {
1154 if (isSPACE(*s)) {
1155 if (tmp && (norun || regtry(prog, s)))
1156 goto got_it;
1157 else
1158 tmp = doevery;
1159 }
a0ed51b3 1160 else
ffc61ed2
JH
1161 tmp = 1;
1162 s++;
a0ed51b3 1163 }
a0ed51b3
LW
1164 }
1165 break;
bbce6d69 1166 case SPACEL:
3280af22 1167 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1168 if (do_utf8) {
1169 while (s < strend) {
1170 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1171 if (tmp && (norun || regtry(prog, s)))
1172 goto got_it;
1173 else
1174 tmp = doevery;
1175 }
bbce6d69 1176 else
ffc61ed2
JH
1177 tmp = 1;
1178 s += UTF8SKIP(s);
bbce6d69 1179 }
bbce6d69 1180 }
ffc61ed2
JH
1181 else {
1182 while (s < strend) {
1183 if (isSPACE_LC(*s)) {
1184 if (tmp && (norun || regtry(prog, s)))
1185 goto got_it;
1186 else
1187 tmp = doevery;
1188 }
a0ed51b3 1189 else
ffc61ed2
JH
1190 tmp = 1;
1191 s++;
a0ed51b3 1192 }
a0ed51b3
LW
1193 }
1194 break;
a0d0e21e 1195 case NSPACE:
ffc61ed2
JH
1196 if (do_utf8) {
1197 while (s < strend) {
1198 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1199 if (tmp && (norun || regtry(prog, s)))
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
a0d0e21e 1204 else
ffc61ed2
JH
1205 tmp = 1;
1206 s += UTF8SKIP(s);
a687059c 1207 }
a0d0e21e 1208 }
ffc61ed2
JH
1209 else {
1210 while (s < strend) {
1211 if (!isSPACE(*s)) {
1212 if (tmp && (norun || regtry(prog, s)))
1213 goto got_it;
1214 else
1215 tmp = doevery;
1216 }
a0ed51b3 1217 else
ffc61ed2
JH
1218 tmp = 1;
1219 s++;
a0ed51b3 1220 }
a0ed51b3
LW
1221 }
1222 break;
bbce6d69 1223 case NSPACEL:
3280af22 1224 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1225 if (do_utf8) {
1226 while (s < strend) {
1227 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1228 if (tmp && (norun || regtry(prog, s)))
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
bbce6d69 1233 else
ffc61ed2
JH
1234 tmp = 1;
1235 s += UTF8SKIP(s);
bbce6d69 1236 }
bbce6d69 1237 }
ffc61ed2
JH
1238 else {
1239 while (s < strend) {
1240 if (!isSPACE_LC(*s)) {
1241 if (tmp && (norun || regtry(prog, s)))
1242 goto got_it;
1243 else
1244 tmp = doevery;
1245 }
a0ed51b3 1246 else
ffc61ed2
JH
1247 tmp = 1;
1248 s++;
a0ed51b3 1249 }
a0ed51b3
LW
1250 }
1251 break;
a0d0e21e 1252 case DIGIT:
ffc61ed2
JH
1253 if (do_utf8) {
1254 while (s < strend) {
1255 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1256 if (tmp && (norun || regtry(prog, s)))
1257 goto got_it;
1258 else
1259 tmp = doevery;
1260 }
a0d0e21e 1261 else
ffc61ed2
JH
1262 tmp = 1;
1263 s += UTF8SKIP(s);
2b69d0c2 1264 }
a0d0e21e 1265 }
ffc61ed2
JH
1266 else {
1267 while (s < strend) {
1268 if (isDIGIT(*s)) {
1269 if (tmp && (norun || regtry(prog, s)))
1270 goto got_it;
1271 else
1272 tmp = doevery;
1273 }
a0ed51b3 1274 else
ffc61ed2
JH
1275 tmp = 1;
1276 s++;
a0ed51b3 1277 }
a0ed51b3
LW
1278 }
1279 break;
b8c5462f
JH
1280 case DIGITL:
1281 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1282 if (do_utf8) {
1283 while (s < strend) {
1284 if (isDIGIT_LC_utf8((U8*)s)) {
1285 if (tmp && (norun || regtry(prog, s)))
1286 goto got_it;
1287 else
1288 tmp = doevery;
1289 }
b8c5462f 1290 else
ffc61ed2
JH
1291 tmp = 1;
1292 s += UTF8SKIP(s);
b8c5462f 1293 }
b8c5462f 1294 }
ffc61ed2
JH
1295 else {
1296 while (s < strend) {
1297 if (isDIGIT_LC(*s)) {
1298 if (tmp && (norun || regtry(prog, s)))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
b8c5462f 1303 else
ffc61ed2
JH
1304 tmp = 1;
1305 s++;
b8c5462f 1306 }
b8c5462f
JH
1307 }
1308 break;
a0d0e21e 1309 case NDIGIT:
ffc61ed2
JH
1310 if (do_utf8) {
1311 while (s < strend) {
1312 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1313 if (tmp && (norun || regtry(prog, s)))
1314 goto got_it;
1315 else
1316 tmp = doevery;
1317 }
a0d0e21e 1318 else
ffc61ed2
JH
1319 tmp = 1;
1320 s += UTF8SKIP(s);
a687059c 1321 }
a0d0e21e 1322 }
ffc61ed2
JH
1323 else {
1324 while (s < strend) {
1325 if (!isDIGIT(*s)) {
1326 if (tmp && (norun || regtry(prog, s)))
1327 goto got_it;
1328 else
1329 tmp = doevery;
1330 }
a0ed51b3 1331 else
ffc61ed2
JH
1332 tmp = 1;
1333 s++;
a0ed51b3 1334 }
a0ed51b3
LW
1335 }
1336 break;
b8c5462f
JH
1337 case NDIGITL:
1338 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1339 if (do_utf8) {
1340 while (s < strend) {
1341 if (!isDIGIT_LC_utf8((U8*)s)) {
1342 if (tmp && (norun || regtry(prog, s)))
1343 goto got_it;
1344 else
1345 tmp = doevery;
1346 }
b8c5462f 1347 else
ffc61ed2
JH
1348 tmp = 1;
1349 s += UTF8SKIP(s);
b8c5462f 1350 }
a0ed51b3 1351 }
ffc61ed2
JH
1352 else {
1353 while (s < strend) {
1354 if (!isDIGIT_LC(*s)) {
1355 if (tmp && (norun || regtry(prog, s)))
1356 goto got_it;
1357 else
1358 tmp = doevery;
1359 }
cf93c79d 1360 else
ffc61ed2
JH
1361 tmp = 1;
1362 s++;
b8c5462f 1363 }
b8c5462f
JH
1364 }
1365 break;
b3c9acc1 1366 default:
3c3eec57
GS
1367 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1368 break;
d6a28714 1369 }
6eb5f6b9
JH
1370 return 0;
1371 got_it:
1372 return s;
1373}
1374
1375/*
1376 - regexec_flags - match a regexp against a string
1377 */
1378I32
1379Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1380 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1381/* strend: pointer to null at end of string */
1382/* strbeg: real beginning of string */
1383/* minend: end of match must be >=minend after stringarg. */
1384/* data: May be used for some additional optimizations. */
1385/* nosave: For optimizations. */
1386{
6eb5f6b9
JH
1387 register char *s;
1388 register regnode *c;
1389 register char *startpos = stringarg;
6eb5f6b9
JH
1390 I32 minlen; /* must match at least this many chars */
1391 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1392 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1393 constant substr. */ /* CC */
1394 I32 end_shift = 0; /* Same for the end. */ /* CC */
1395 I32 scream_pos = -1; /* Internal iterator of scream. */
1396 char *scream_olds;
1397 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1398 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1399
1400 PL_regcc = 0;
1401
1402 cache_re(prog);
1403#ifdef DEBUGGING
1404 PL_regnarrate = PL_debug & 512;
1405#endif
1406
1407 /* Be paranoid... */
1408 if (prog == NULL || startpos == NULL) {
1409 Perl_croak(aTHX_ "NULL regexp parameter");
1410 return 0;
1411 }
1412
1413 minlen = prog->minlen;
1aa99e6b 1414 if (do_utf8) {
3baa4c62
JH
1415 if (!(prog->reganch & ROPT_SANY_SEEN))
1416 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1417 }
1418 else {
1419 if (strend - startpos < minlen) goto phooey;
1420 }
6eb5f6b9
JH
1421
1422 if (startpos == strbeg) /* is ^ valid at stringarg? */
1423 PL_regprev = '\n';
1424 else {
1aa99e6b
IH
1425 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1426 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
1427 PL_regprev = utf8_to_uv(s, (U8*)stringarg - s, NULL, 0);
1428 }
1429 else
1430 PL_regprev = (U32)stringarg[-1];
6eb5f6b9
JH
1431 if (!PL_multiline && PL_regprev == '\n')
1432 PL_regprev = '\0'; /* force ^ to NOT match */
1433 }
1434
1435 /* Check validity of program. */
1436 if (UCHARAT(prog->program) != REG_MAGIC) {
1437 Perl_croak(aTHX_ "corrupted regexp program");
1438 }
1439
1440 PL_reg_flags = 0;
1441 PL_reg_eval_set = 0;
1442 PL_reg_maxiter = 0;
1443
1444 if (prog->reganch & ROPT_UTF8)
1445 PL_reg_flags |= RF_utf8;
1446
1447 /* Mark beginning of line for ^ and lookbehind. */
1448 PL_regbol = startpos;
1449 PL_bostr = strbeg;
1450 PL_reg_sv = sv;
1451
1452 /* Mark end of line for $ (and such) */
1453 PL_regeol = strend;
1454
1455 /* see how far we have to get to not match where we matched before */
1456 PL_regtill = startpos+minend;
1457
1458 /* We start without call_cc context. */
1459 PL_reg_call_cc = 0;
1460
1461 /* If there is a "must appear" string, look for it. */
1462 s = startpos;
1463
1464 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1465 MAGIC *mg;
1466
1467 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1468 PL_reg_ganch = startpos;
1469 else if (sv && SvTYPE(sv) >= SVt_PVMG
1470 && SvMAGIC(sv)
1471 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1472 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1473 if (prog->reganch & ROPT_ANCH_GPOS) {
1474 if (s > PL_reg_ganch)
1475 goto phooey;
1476 s = PL_reg_ganch;
1477 }
1478 }
1479 else /* pos() not defined */
1480 PL_reg_ganch = strbeg;
1481 }
1482
699c3c34
JH
1483 if (do_utf8 == (UTF!=0) &&
1484 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1485 re_scream_pos_data d;
1486
1487 d.scream_olds = &scream_olds;
1488 d.scream_pos = &scream_pos;
1489 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1490 if (!s)
1491 goto phooey; /* not present */
1492 }
1493
1494 DEBUG_r( if (!PL_colorset) reginitcolors() );
1495 DEBUG_r(PerlIO_printf(Perl_debug_log,
1496 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1497 PL_colors[4],PL_colors[5],PL_colors[0],
1498 prog->precomp,
1499 PL_colors[1],
1500 (strlen(prog->precomp) > 60 ? "..." : ""),
1501 PL_colors[0],
1502 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1503 startpos, PL_colors[1],
1504 (strend - startpos > 60 ? "..." : ""))
1505 );
1506
1507 /* Simplest case: anchored match need be tried only once. */
1508 /* [unless only anchor is BOL and multiline is set] */
1509 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1510 if (s == startpos && regtry(prog, startpos))
1511 goto got_it;
1512 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1513 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1514 {
1515 char *end;
1516
1517 if (minlen)
1518 dontbother = minlen - 1;
1aa99e6b 1519 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1520 /* for multiline we only have to try after newlines */
1521 if (prog->check_substr) {
1522 if (s == startpos)
1523 goto after_try;
1524 while (1) {
1525 if (regtry(prog, s))
1526 goto got_it;
1527 after_try:
1528 if (s >= end)
1529 goto phooey;
1530 if (prog->reganch & RE_USE_INTUIT) {
1531 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1532 if (!s)
1533 goto phooey;
1534 }
1535 else
1536 s++;
1537 }
1538 } else {
1539 if (s > startpos)
1540 s--;
1541 while (s < end) {
1542 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1543 if (regtry(prog, s))
1544 goto got_it;
1545 }
1546 }
1547 }
1548 }
1549 goto phooey;
1550 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1551 if (regtry(prog, PL_reg_ganch))
1552 goto got_it;
1553 goto phooey;
1554 }
1555
1556 /* Messy cases: unanchored match. */
1557 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1558 /* we have /x+whatever/ */
1559 /* it must be a one character string (XXXX Except UTF?) */
1560 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1561#ifdef DEBUGGING
1562 int did_match = 0;
1563#endif
1564
1aa99e6b 1565 if (do_utf8) {
6eb5f6b9
JH
1566 while (s < strend) {
1567 if (*s == ch) {
bf93d4cc 1568 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1569 if (regtry(prog, s)) goto got_it;
1570 s += UTF8SKIP(s);
1571 while (s < strend && *s == ch)
1572 s += UTF8SKIP(s);
1573 }
1574 s += UTF8SKIP(s);
1575 }
1576 }
1577 else {
1578 while (s < strend) {
1579 if (*s == ch) {
bf93d4cc 1580 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1581 if (regtry(prog, s)) goto got_it;
1582 s++;
1583 while (s < strend && *s == ch)
1584 s++;
1585 }
1586 s++;
1587 }
1588 }
bf93d4cc
GS
1589 DEBUG_r(did_match ||
1590 PerlIO_printf(Perl_debug_log,
1591 "Did not find anchored character...\n"));
6eb5f6b9
JH
1592 }
1593 /*SUPPRESS 560*/
1aa99e6b
IH
1594 else if (do_utf8 == (UTF!=0) &&
1595 (prog->anchored_substr != Nullsv
1596 || (prog->float_substr != Nullsv
1597 && prog->float_max_offset < strend - s))) {
6eb5f6b9
JH
1598 SV *must = prog->anchored_substr
1599 ? prog->anchored_substr : prog->float_substr;
1600 I32 back_max =
1601 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1602 I32 back_min =
1603 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1604 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1605 -(I32)(CHR_SVLEN(must)
1aa99e6b 1606 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1607 char *last1; /* Last position checked before */
bf93d4cc
GS
1608#ifdef DEBUGGING
1609 int did_match = 0;
1610#endif
6eb5f6b9
JH
1611
1612 if (s > PL_bostr)
1613 last1 = HOPc(s, -1);
1614 else
1615 last1 = s - 1; /* bogus */
1616
1617 /* XXXX check_substr already used to find `s', can optimize if
1618 check_substr==must. */
1619 scream_pos = -1;
1620 dontbother = end_shift;
1621 strend = HOPc(strend, -dontbother);
1622 while ( (s <= last) &&
1623 ((flags & REXEC_SCREAM)
1aa99e6b 1624 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1625 end_shift, &scream_pos, 0))
1aa99e6b 1626 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
6eb5f6b9
JH
1627 (unsigned char*)strend, must,
1628 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1629 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1630 if (HOPc(s, -back_max) > last1) {
1631 last1 = HOPc(s, -back_min);
1632 s = HOPc(s, -back_max);
1633 }
1634 else {
1635 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1636
1637 last1 = HOPc(s, -back_min);
1638 s = t;
1639 }
1aa99e6b 1640 if (do_utf8) {
6eb5f6b9
JH
1641 while (s <= last1) {
1642 if (regtry(prog, s))
1643 goto got_it;
1644 s += UTF8SKIP(s);
1645 }
1646 }
1647 else {
1648 while (s <= last1) {
1649 if (regtry(prog, s))
1650 goto got_it;
1651 s++;
1652 }
1653 }
1654 }
bf93d4cc
GS
1655 DEBUG_r(did_match ||
1656 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1657 ((must == prog->anchored_substr)
1658 ? "anchored" : "floating"),
1659 PL_colors[0],
1660 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1661 SvPVX(must),
1662 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1663 goto phooey;
1664 }
155aba94 1665 else if ((c = prog->regstclass)) {
66e933ab
GS
1666 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1667 /* don't bother with what can't match */
6eb5f6b9 1668 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1669 DEBUG_r({
1670 SV *prop = sv_newmortal();
1671 regprop(prop, c);
1672 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1673 });
6eb5f6b9
JH
1674 if (find_byclass(prog, c, s, strend, startpos, 0))
1675 goto got_it;
bf93d4cc 1676 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1677 }
1678 else {
1679 dontbother = 0;
1680 if (prog->float_substr != Nullsv) { /* Trim the end. */
1681 char *last;
d6a28714
JH
1682
1683 if (flags & REXEC_SCREAM) {
1684 last = screaminstr(sv, prog->float_substr, s - strbeg,
1685 end_shift, &scream_pos, 1); /* last one */
1686 if (!last)
ffc61ed2 1687 last = scream_olds; /* Only one occurrence. */
b8c5462f 1688 }
d6a28714
JH
1689 else {
1690 STRLEN len;
1691 char *little = SvPV(prog->float_substr, len);
1692
1693 if (SvTAIL(prog->float_substr)) {
1694 if (memEQ(strend - len + 1, little, len - 1))
1695 last = strend - len + 1;
1696 else if (!PL_multiline)
1697 last = memEQ(strend - len, little, len)
1698 ? strend - len : Nullch;
b8c5462f 1699 else
d6a28714
JH
1700 goto find_last;
1701 } else {
1702 find_last:
1703 if (len)
1704 last = rninstr(s, strend, little, little + len);
b8c5462f 1705 else
d6a28714 1706 last = strend; /* matching `$' */
b8c5462f 1707 }
b8c5462f 1708 }
bf93d4cc
GS
1709 if (last == NULL) {
1710 DEBUG_r(PerlIO_printf(Perl_debug_log,
1711 "%sCan't trim the tail, match fails (should not happen)%s\n",
1712 PL_colors[4],PL_colors[5]));
1713 goto phooey; /* Should not happen! */
1714 }
d6a28714
JH
1715 dontbother = strend - last + prog->float_min_offset;
1716 }
1717 if (minlen && (dontbother < minlen))
1718 dontbother = minlen - 1;
1719 strend -= dontbother; /* this one's always in bytes! */
1720 /* We don't know much -- general case. */
1aa99e6b 1721 if (do_utf8) {
d6a28714
JH
1722 for (;;) {
1723 if (regtry(prog, s))
1724 goto got_it;
1725 if (s >= strend)
1726 break;
b8c5462f 1727 s += UTF8SKIP(s);
d6a28714
JH
1728 };
1729 }
1730 else {
1731 do {
1732 if (regtry(prog, s))
1733 goto got_it;
1734 } while (s++ < strend);
1735 }
1736 }
1737
1738 /* Failure. */
1739 goto phooey;
1740
1741got_it:
1742 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1743
1744 if (PL_reg_eval_set) {
1745 /* Preserve the current value of $^R */
1746 if (oreplsv != GvSV(PL_replgv))
1747 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1748 restored, the value remains
1749 the same. */
1750 restore_pos(aTHXo_ 0);
1751 }
1752
1753 /* make sure $`, $&, $', and $digit will work later */
1754 if ( !(flags & REXEC_NOT_FIRST) ) {
1755 if (RX_MATCH_COPIED(prog)) {
1756 Safefree(prog->subbeg);
1757 RX_MATCH_COPIED_off(prog);
1758 }
1759 if (flags & REXEC_COPY_STR) {
1760 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1761
1762 s = savepvn(strbeg, i);
1763 prog->subbeg = s;
1764 prog->sublen = i;
1765 RX_MATCH_COPIED_on(prog);
1766 }
1767 else {
1768 prog->subbeg = strbeg;
1769 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1770 }
1771 }
1772
1773 return 1;
1774
1775phooey:
bf93d4cc
GS
1776 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1777 PL_colors[4],PL_colors[5]));
d6a28714
JH
1778 if (PL_reg_eval_set)
1779 restore_pos(aTHXo_ 0);
1780 return 0;
1781}
1782
1783/*
1784 - regtry - try match at specific point
1785 */
1786STATIC I32 /* 0 failure, 1 success */
1787S_regtry(pTHX_ regexp *prog, char *startpos)
1788{
d6a28714
JH
1789 register I32 i;
1790 register I32 *sp;
1791 register I32 *ep;
1792 CHECKPOINT lastcp;
1793
02db2b7b
IZ
1794#ifdef DEBUGGING
1795 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1796#endif
d6a28714
JH
1797 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1798 MAGIC *mg;
1799
1800 PL_reg_eval_set = RS_init;
1801 DEBUG_r(DEBUG_s(
b900a521
JH
1802 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1803 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1804 ));
e8347627 1805 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1806 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1807 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1808 SAVETMPS;
1809 /* Apparently this is not needed, judging by wantarray. */
e8347627 1810 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1811 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1812
1813 if (PL_reg_sv) {
1814 /* Make $_ available to executed code. */
1815 if (PL_reg_sv != DEFSV) {
1816 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1817 SAVESPTR(DEFSV);
1818 DEFSV = PL_reg_sv;
b8c5462f 1819 }
d6a28714
JH
1820
1821 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1822 && (mg = mg_find(PL_reg_sv, 'g')))) {
1823 /* prepare for quick setting of pos */
1824 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1825 mg = mg_find(PL_reg_sv, 'g');
1826 mg->mg_len = -1;
b8c5462f 1827 }
d6a28714
JH
1828 PL_reg_magic = mg;
1829 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1830 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1831 }
1832 if (!PL_reg_curpm)
0f79a09d 1833 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1834 PL_reg_curpm->op_pmregexp = prog;
1835 PL_reg_oldcurpm = PL_curpm;
1836 PL_curpm = PL_reg_curpm;
1837 if (RX_MATCH_COPIED(prog)) {
1838 /* Here is a serious problem: we cannot rewrite subbeg,
1839 since it may be needed if this match fails. Thus
1840 $` inside (?{}) could fail... */
1841 PL_reg_oldsaved = prog->subbeg;
1842 PL_reg_oldsavedlen = prog->sublen;
1843 RX_MATCH_COPIED_off(prog);
1844 }
1845 else
1846 PL_reg_oldsaved = Nullch;
1847 prog->subbeg = PL_bostr;
1848 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1849 }
1850 prog->startp[0] = startpos - PL_bostr;
1851 PL_reginput = startpos;
1852 PL_regstartp = prog->startp;
1853 PL_regendp = prog->endp;
1854 PL_reglastparen = &prog->lastparen;
1855 prog->lastparen = 0;
1856 PL_regsize = 0;
1857 DEBUG_r(PL_reg_starttry = startpos);
1858 if (PL_reg_start_tmpl <= prog->nparens) {
1859 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1860 if(PL_reg_start_tmp)
1861 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1862 else
1863 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1864 }
1865
1866 /* XXXX What this code is doing here?!!! There should be no need
1867 to do this again and again, PL_reglastparen should take care of
3dd2943c 1868 this! --ilya*/
dafc8851
JH
1869
1870 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1871 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1872 * PL_reglastparen), is not needed at all by the test suite
1873 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1874 * enough, for building DynaLoader, or otherwise this
1875 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1876 * will happen. Meanwhile, this code *is* needed for the
1877 * above-mentioned test suite tests to succeed. The common theme
1878 * on those tests seems to be returning null fields from matches.
1879 * --jhi */
dafc8851 1880#if 1
d6a28714
JH
1881 sp = prog->startp;
1882 ep = prog->endp;
1883 if (prog->nparens) {
09e8ae3b 1884 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1885 *++sp = -1;
1886 *++ep = -1;
1887 }
1888 }
dafc8851 1889#endif
02db2b7b 1890 REGCP_SET(lastcp);
d6a28714
JH
1891 if (regmatch(prog->program + 1)) {
1892 prog->endp[0] = PL_reginput - PL_bostr;
1893 return 1;
1894 }
02db2b7b 1895 REGCP_UNWIND(lastcp);
d6a28714
JH
1896 return 0;
1897}
1898
02db2b7b
IZ
1899#define RE_UNWIND_BRANCH 1
1900#define RE_UNWIND_BRANCHJ 2
1901
1902union re_unwind_t;
1903
1904typedef struct { /* XX: makes sense to enlarge it... */
1905 I32 type;
1906 I32 prev;
1907 CHECKPOINT lastcp;
1908} re_unwind_generic_t;
1909
1910typedef struct {
1911 I32 type;
1912 I32 prev;
1913 CHECKPOINT lastcp;
1914 I32 lastparen;
1915 regnode *next;
1916 char *locinput;
1917 I32 nextchr;
1918#ifdef DEBUGGING
1919 int regindent;
1920#endif
1921} re_unwind_branch_t;
1922
1923typedef union re_unwind_t {
1924 I32 type;
1925 re_unwind_generic_t generic;
1926 re_unwind_branch_t branch;
1927} re_unwind_t;
1928
d6a28714
JH
1929/*
1930 - regmatch - main matching routine
1931 *
1932 * Conceptually the strategy is simple: check to see whether the current
1933 * node matches, call self recursively to see whether the rest matches,
1934 * and then act accordingly. In practice we make some effort to avoid
1935 * recursion, in particular by going through "ordinary" nodes (that don't
1936 * need to know whether the rest of the match failed) by a loop instead of
1937 * by recursion.
1938 */
1939/* [lwall] I've hoisted the register declarations to the outer block in order to
1940 * maybe save a little bit of pushing and popping on the stack. It also takes
1941 * advantage of machines that use a register save mask on subroutine entry.
1942 */
1943STATIC I32 /* 0 failure, 1 success */
1944S_regmatch(pTHX_ regnode *prog)
1945{
d6a28714
JH
1946 register regnode *scan; /* Current node. */
1947 regnode *next; /* Next node. */
1948 regnode *inner; /* Next node in internal branch. */
1949 register I32 nextchr; /* renamed nextchr - nextchar colides with
1950 function of same name */
1951 register I32 n; /* no or next */
1952 register I32 ln; /* len or last */
1953 register char *s; /* operand or save */
1954 register char *locinput = PL_reginput;
1955 register I32 c1, c2, paren; /* case fold search, parenth */
1956 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1957 I32 unwind = 0;
1958 I32 firstcp = PL_savestack_ix;
ffc61ed2 1959 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1960
d6a28714
JH
1961#ifdef DEBUGGING
1962 PL_regindent++;
1963#endif
1964
1965 /* Note that nextchr is a byte even in UTF */
1966 nextchr = UCHARAT(locinput);
1967 scan = prog;
1968 while (scan != NULL) {
1969#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1970#if 1
d6a28714
JH
1971# define sayYES goto yes
1972# define sayNO goto no
7821416a
IZ
1973# define sayYES_FINAL goto yes_final
1974# define sayYES_LOUD goto yes_loud
1975# define sayNO_FINAL goto no_final
1976# define sayNO_SILENT goto do_no
d6a28714
JH
1977# define saySAME(x) if (x) goto yes; else goto no
1978# define REPORT_CODE_OFF 24
1979#else
1980# define sayYES return 1
1981# define sayNO return 0
7821416a
IZ
1982# define sayYES_FINAL return 1
1983# define sayYES_LOUD return 1
1984# define sayNO_FINAL return 0
1985# define sayNO_SILENT return 0
d6a28714
JH
1986# define saySAME(x) return x
1987#endif
1988 DEBUG_r( {
1989 SV *prop = sv_newmortal();
1990 int docolor = *PL_colors[0];
1991 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 1992 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
1993 /* The part of the string before starttry has one color
1994 (pref0_len chars), between starttry and current
1995 position another one (pref_len - pref0_len chars),
1996 after the current position the third one.
1997 We assume that pref0_len <= pref_len, otherwise we
1998 decrease pref0_len. */
1aa99e6b
IH
1999 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2000 ? (5 + taill) - l : locinput - PL_bostr;
2001 int pref0_len;
d6a28714 2002
1aa99e6b
IH
2003 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2004 pref_len++;
2005 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714
JH
2006 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2007 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2008 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2009 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2010 l--;
d6a28714
JH
2011 if (pref0_len < 0)
2012 pref0_len = 0;
2013 if (pref0_len > pref_len)
2014 pref0_len = pref_len;
2015 regprop(prop, scan);
2016 PerlIO_printf(Perl_debug_log,
b900a521
JH
2017 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2018 (IV)(locinput - PL_bostr),
d6a28714
JH
2019 PL_colors[4], pref0_len,
2020 locinput - pref_len, PL_colors[5],
2021 PL_colors[2], pref_len - pref0_len,
2022 locinput - pref_len + pref0_len, PL_colors[3],
2023 (docolor ? "" : "> <"),
2024 PL_colors[0], l, locinput, PL_colors[1],
2025 15 - l - pref_len + 1,
2026 "",
b900a521 2027 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2028 SvPVX(prop));
2029 } );
2030
2031 next = scan + NEXT_OFF(scan);
2032 if (next == scan)
2033 next = NULL;
2034
2035 switch (OP(scan)) {
2036 case BOL:
2037 if (locinput == PL_bostr
2038 ? PL_regprev == '\n'
2039 : (PL_multiline &&
2040 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2041 {
2042 /* regtill = regbol; */
b8c5462f
JH
2043 break;
2044 }
d6a28714
JH
2045 sayNO;
2046 case MBOL:
2047 if (locinput == PL_bostr
2048 ? PL_regprev == '\n'
2049 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2050 {
b8c5462f
JH
2051 break;
2052 }
d6a28714
JH
2053 sayNO;
2054 case SBOL:
c2a73568 2055 if (locinput == PL_bostr)
b8c5462f 2056 break;
d6a28714
JH
2057 sayNO;
2058 case GPOS:
2059 if (locinput == PL_reg_ganch)
2060 break;
2061 sayNO;
2062 case EOL:
2063 if (PL_multiline)
2064 goto meol;
2065 else
2066 goto seol;
2067 case MEOL:
2068 meol:
2069 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2070 sayNO;
b8c5462f 2071 break;
d6a28714
JH
2072 case SEOL:
2073 seol:
2074 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2075 sayNO;
d6a28714 2076 if (PL_regeol - locinput > 1)
b8c5462f 2077 sayNO;
b8c5462f 2078 break;
d6a28714
JH
2079 case EOS:
2080 if (PL_regeol != locinput)
b8c5462f 2081 sayNO;
d6a28714 2082 break;
ffc61ed2 2083 case SANY:
d6a28714 2084 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2085 sayNO;
b8c5462f 2086 nextchr = UCHARAT(++locinput);
a0d0e21e 2087 break;
ffc61ed2 2088 case REG_ANY:
1aa99e6b
IH
2089 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2090 sayNO;
2091 if (do_utf8) {
b8c5462f 2092 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2093 if (locinput > PL_regeol)
2094 sayNO;
a0ed51b3 2095 nextchr = UCHARAT(locinput);
a0ed51b3 2096 }
1aa99e6b
IH
2097 else
2098 nextchr = UCHARAT(++locinput);
a0ed51b3 2099 break;
d6a28714 2100 case EXACT:
cd439c50
IZ
2101 s = STRING(scan);
2102 ln = STR_LEN(scan);
1aa99e6b
IH
2103 if (do_utf8 != (UTF!=0)) {
2104 char *l = locinput;
2105 char *e = s + ln;
2106 STRLEN len;
2107 if (do_utf8)
2108 while (s < e) {
2109 if (l >= PL_regeol)
2110 sayNO;
2111 if (*((U8*)s) != utf8_to_uv_simple((U8*)l, &len))
2112 sayNO;
2113 s++;
2114 l += len;
2115 }
2116 else
2117 while (s < e) {
2118 if (l >= PL_regeol)
2119 sayNO;
2120 if (*((U8*)l) != utf8_to_uv_simple((U8*)s, &len))
2121 sayNO;
2122 s += len;
2123 l++;
2124 }
2125 locinput = l;
2126 nextchr = UCHARAT(locinput);
2127 break;
2128 }
d6a28714
JH
2129 /* Inline the first character, for speed. */
2130 if (UCHARAT(s) != nextchr)
2131 sayNO;
2132 if (PL_regeol - locinput < ln)
2133 sayNO;
2134 if (ln > 1 && memNE(s, locinput, ln))
2135 sayNO;
2136 locinput += ln;
2137 nextchr = UCHARAT(locinput);
2138 break;
2139 case EXACTFL:
b8c5462f
JH
2140 PL_reg_flags |= RF_tainted;
2141 /* FALL THROUGH */
d6a28714 2142 case EXACTF:
cd439c50
IZ
2143 s = STRING(scan);
2144 ln = STR_LEN(scan);
d6a28714 2145
1aa99e6b 2146 if (do_utf8) {
d6a28714 2147 char *l = locinput;
1aa99e6b
IH
2148 char *e;
2149 e = s + ln;
d6a28714
JH
2150 c1 = OP(scan) == EXACTF;
2151 while (s < e) {
1aa99e6b 2152 if (l >= PL_regeol) {
d6a28714
JH
2153 sayNO;
2154 }
1aa99e6b
IH
2155 if ((UTF ? utf8_to_uv((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
2156 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2157 sayNO;
2158 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2159 l += UTF8SKIP(l);
b8c5462f 2160 }
d6a28714 2161 locinput = l;
a0ed51b3
LW
2162 nextchr = UCHARAT(locinput);
2163 break;
2164 }
d6a28714
JH
2165
2166 /* Inline the first character, for speed. */
2167 if (UCHARAT(s) != nextchr &&
2168 UCHARAT(s) != ((OP(scan) == EXACTF)
2169 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2170 sayNO;
d6a28714 2171 if (PL_regeol - locinput < ln)
b8c5462f 2172 sayNO;
d6a28714
JH
2173 if (ln > 1 && (OP(scan) == EXACTF
2174 ? ibcmp(s, locinput, ln)
2175 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2176 sayNO;
d6a28714
JH
2177 locinput += ln;
2178 nextchr = UCHARAT(locinput);
a0d0e21e 2179 break;
d6a28714 2180 case ANYOF:
ffc61ed2
JH
2181 if (do_utf8) {
2182 if (!reginclass(scan, (U8*)locinput, do_utf8))
2183 sayNO;
2184 if (locinput >= PL_regeol)
2185 sayNO;
2186 locinput += PL_utf8skip[nextchr];
b8c5462f 2187 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2188 }
2189 else {
2190 if (nextchr < 0)
2191 nextchr = UCHARAT(locinput);
2192 if (!reginclass(scan, (U8*)locinput, do_utf8))
2193 sayNO;
2194 if (!nextchr && locinput >= PL_regeol)
2195 sayNO;
2196 nextchr = UCHARAT(++locinput);
2197 }
b8c5462f 2198 break;
d6a28714 2199 case ALNUML:
b8c5462f
JH
2200 PL_reg_flags |= RF_tainted;
2201 /* FALL THROUGH */
d6a28714 2202 case ALNUM:
b8c5462f 2203 if (!nextchr)
4633a7c4 2204 sayNO;
ffc61ed2
JH
2205 if (do_utf8) {
2206 if (!(OP(scan) == ALNUM
d6a28714
JH
2207 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2208 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2209 {
2210 sayNO;
a0ed51b3 2211 }
b8c5462f 2212 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2213 nextchr = UCHARAT(locinput);
2214 break;
2215 }
ffc61ed2 2216 if (!(OP(scan) == ALNUM
d6a28714 2217 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2218 sayNO;
b8c5462f 2219 nextchr = UCHARAT(++locinput);
a0d0e21e 2220 break;
d6a28714 2221 case NALNUML:
b8c5462f
JH
2222 PL_reg_flags |= RF_tainted;
2223 /* FALL THROUGH */
d6a28714
JH
2224 case NALNUM:
2225 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2226 sayNO;
ffc61ed2
JH
2227 if (do_utf8) {
2228 if (OP(scan) == NALNUM
d6a28714
JH
2229 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2230 : isALNUM_LC_utf8((U8*)locinput))
2231 {
b8c5462f 2232 sayNO;
d6a28714 2233 }
b8c5462f
JH
2234 locinput += PL_utf8skip[nextchr];
2235 nextchr = UCHARAT(locinput);
2236 break;
2237 }
ffc61ed2 2238 if (OP(scan) == NALNUM
d6a28714 2239 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2240 sayNO;
76e3520e 2241 nextchr = UCHARAT(++locinput);
a0d0e21e 2242 break;
d6a28714
JH
2243 case BOUNDL:
2244 case NBOUNDL:
3280af22 2245 PL_reg_flags |= RF_tainted;
bbce6d69 2246 /* FALL THROUGH */
d6a28714
JH
2247 case BOUND:
2248 case NBOUND:
2249 /* was last char in word? */
ffc61ed2
JH
2250 if (do_utf8) {
2251 if (locinput == PL_regbol)
2252 ln = PL_regprev;
2253 else {
2254 U8 *r = reghop((U8*)locinput, -1);
2255
2256 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2257 }
2258 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2259 ln = isALNUM_uni(ln);
2260 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2261 }
2262 else {
2263 ln = isALNUM_LC_uni(ln);
2264 n = isALNUM_LC_utf8((U8*)locinput);
2265 }
a0ed51b3 2266 }
d6a28714 2267 else {
ffc61ed2
JH
2268 ln = (locinput != PL_regbol) ?
2269 UCHARAT(locinput - 1) : PL_regprev;
2270 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2271 ln = isALNUM(ln);
2272 n = isALNUM(nextchr);
2273 }
2274 else {
2275 ln = isALNUM_LC(ln);
2276 n = isALNUM_LC(nextchr);
2277 }
d6a28714 2278 }
ffc61ed2
JH
2279 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2280 OP(scan) == BOUNDL))
2281 sayNO;
a0ed51b3 2282 break;
d6a28714 2283 case SPACEL:
3280af22 2284 PL_reg_flags |= RF_tainted;
bbce6d69 2285 /* FALL THROUGH */
d6a28714 2286 case SPACE:
9442cb0e 2287 if (!nextchr)
4633a7c4 2288 sayNO;
1aa99e6b 2289 if (do_utf8) {
fd400ab9 2290 if (UTF8_IS_CONTINUED(nextchr)) {
ffc61ed2
JH
2291 if (!(OP(scan) == SPACE
2292 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2293 : isSPACE_LC_utf8((U8*)locinput)))
2294 {
2295 sayNO;
2296 }
2297 locinput += PL_utf8skip[nextchr];
2298 nextchr = UCHARAT(locinput);
2299 break;
d6a28714 2300 }
ffc61ed2
JH
2301 if (!(OP(scan) == SPACE
2302 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2303 sayNO;
2304 nextchr = UCHARAT(++locinput);
2305 }
2306 else {
2307 if (!(OP(scan) == SPACE
2308 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2309 sayNO;
2310 nextchr = UCHARAT(++locinput);
a0ed51b3 2311 }
a0ed51b3 2312 break;
d6a28714 2313 case NSPACEL:
3280af22 2314 PL_reg_flags |= RF_tainted;
bbce6d69 2315 /* FALL THROUGH */
d6a28714 2316 case NSPACE:
9442cb0e 2317 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2318 sayNO;
1aa99e6b 2319 if (do_utf8) {
ffc61ed2 2320 if (OP(scan) == NSPACE
9442cb0e 2321 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2322 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2323 {
2324 sayNO;
2325 }
2326 locinput += PL_utf8skip[nextchr];
2327 nextchr = UCHARAT(locinput);
2328 break;
a0ed51b3 2329 }
ffc61ed2 2330 if (OP(scan) == NSPACE
d6a28714 2331 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2332 sayNO;
76e3520e 2333 nextchr = UCHARAT(++locinput);
a0d0e21e 2334 break;
d6a28714 2335 case DIGITL:
a0ed51b3
LW
2336 PL_reg_flags |= RF_tainted;
2337 /* FALL THROUGH */
d6a28714 2338 case DIGIT:
9442cb0e 2339 if (!nextchr)
a0ed51b3 2340 sayNO;
1aa99e6b 2341 if (do_utf8) {
ffc61ed2 2342 if (!(OP(scan) == DIGIT
9442cb0e
GS
2343 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2344 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2345 {
a0ed51b3 2346 sayNO;
dfe13c55 2347 }
6f06b55f 2348 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2349 nextchr = UCHARAT(locinput);
2350 break;
2351 }
ffc61ed2 2352 if (!(OP(scan) == DIGIT
9442cb0e 2353 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2354 sayNO;
2355 nextchr = UCHARAT(++locinput);
2356 break;
d6a28714 2357 case NDIGITL:
b8c5462f
JH
2358 PL_reg_flags |= RF_tainted;
2359 /* FALL THROUGH */
d6a28714 2360 case NDIGIT:
9442cb0e 2361 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2362 sayNO;
1aa99e6b 2363 if (do_utf8) {
ffc61ed2 2364 if (OP(scan) == NDIGIT
9442cb0e
GS
2365 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2366 : isDIGIT_LC_utf8((U8*)locinput))
2367 {
a0ed51b3 2368 sayNO;
9442cb0e 2369 }
6f06b55f 2370 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2371 nextchr = UCHARAT(locinput);
2372 break;
2373 }
ffc61ed2 2374 if (OP(scan) == NDIGIT
9442cb0e 2375 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2376 sayNO;
2377 nextchr = UCHARAT(++locinput);
2378 break;
2379 case CLUMP:
dfe13c55 2380 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2381 sayNO;
6f06b55f 2382 locinput += PL_utf8skip[nextchr];
dfe13c55 2383 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2384 locinput += UTF8SKIP(locinput);
2385 if (locinput > PL_regeol)
2386 sayNO;
2387 nextchr = UCHARAT(locinput);
2388 break;
c8756f30 2389 case REFFL:
3280af22 2390 PL_reg_flags |= RF_tainted;
c8756f30 2391 /* FALL THROUGH */
c277df42 2392 case REF:
c8756f30 2393 case REFF:
c277df42 2394 n = ARG(scan); /* which paren pair */
cf93c79d 2395 ln = PL_regstartp[n];
2c2d71f5 2396 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2397 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2398 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2399 if (ln == PL_regendp[n])
a0d0e21e 2400 break;
a0ed51b3 2401
cf93c79d 2402 s = PL_bostr + ln;
1aa99e6b 2403 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2404 char *l = locinput;
cf93c79d 2405 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2406 /*
2407 * Note that we can't do the "other character" lookup trick as
2408 * in the 8-bit case (no pun intended) because in Unicode we
2409 * have to map both upper and title case to lower case.
2410 */
2411 if (OP(scan) == REFF) {
2412 while (s < e) {
2413 if (l >= PL_regeol)
2414 sayNO;
dfe13c55 2415 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2416 sayNO;
2417 s += UTF8SKIP(s);
2418 l += UTF8SKIP(l);
2419 }
2420 }
2421 else {
2422 while (s < e) {
2423 if (l >= PL_regeol)
2424 sayNO;
dfe13c55 2425 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2426 sayNO;
2427 s += UTF8SKIP(s);
2428 l += UTF8SKIP(l);
2429 }
2430 }
2431 locinput = l;
2432 nextchr = UCHARAT(locinput);
2433 break;
2434 }
2435
a0d0e21e 2436 /* Inline the first character, for speed. */
76e3520e 2437 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2438 (OP(scan) == REF ||
2439 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2440 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2441 sayNO;
cf93c79d 2442 ln = PL_regendp[n] - ln;
3280af22 2443 if (locinput + ln > PL_regeol)
4633a7c4 2444 sayNO;
c8756f30
AK
2445 if (ln > 1 && (OP(scan) == REF
2446 ? memNE(s, locinput, ln)
2447 : (OP(scan) == REFF
2448 ? ibcmp(s, locinput, ln)
2449 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2450 sayNO;
a0d0e21e 2451 locinput += ln;
76e3520e 2452 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2453 break;
2454
2455 case NOTHING:
c277df42 2456 case TAIL:
a0d0e21e
LW
2457 break;
2458 case BACK:
2459 break;
c277df42
IZ
2460 case EVAL:
2461 {
2462 dSP;
533c011a 2463 OP_4tree *oop = PL_op;
3280af22
NIS
2464 COP *ocurcop = PL_curcop;
2465 SV **ocurpad = PL_curpad;
c277df42
IZ
2466 SV *ret;
2467
2468 n = ARG(scan);
533c011a 2469 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2470 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2471 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2472 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2473
cea2e8a9 2474 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2475 SPAGAIN;
2476 ret = POPs;
2477 PUTBACK;
2478
0f5d15d6
IZ
2479 PL_op = oop;
2480 PL_curpad = ocurpad;
2481 PL_curcop = ocurcop;
c277df42 2482 if (logical) {
0f5d15d6
IZ
2483 if (logical == 2) { /* Postponed subexpression. */
2484 regexp *re;
22c35a8c 2485 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2486 re_cc_state state;
0f5d15d6
IZ
2487 CHECKPOINT cp, lastcp;
2488
2489 if(SvROK(ret) || SvRMAGICAL(ret)) {
2490 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2491
2492 if(SvMAGICAL(sv))
2493 mg = mg_find(sv, 'r');
2494 }
2495 if (mg) {
2496 re = (regexp *)mg->mg_obj;
df0003d4 2497 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2498 }
2499 else {
2500 STRLEN len;
2501 char *t = SvPV(ret, len);
2502 PMOP pm;
2503 char *oprecomp = PL_regprecomp;
2504 I32 osize = PL_regsize;
2505 I32 onpar = PL_regnpar;
2506
2507 pm.op_pmflags = 0;
cea2e8a9 2508 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2509 if (!(SvFLAGS(ret)
2510 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2511 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2512 PL_regprecomp = oprecomp;
2513 PL_regsize = osize;
2514 PL_regnpar = onpar;
2515 }
2516 DEBUG_r(
2517 PerlIO_printf(Perl_debug_log,
2518 "Entering embedded `%s%.60s%s%s'\n",
2519 PL_colors[0],
2520 re->precomp,
2521 PL_colors[1],
2522 (strlen(re->precomp) > 60 ? "..." : ""))
2523 );
2524 state.node = next;
2525 state.prev = PL_reg_call_cc;
2526 state.cc = PL_regcc;
2527 state.re = PL_reg_re;
2528
2ab05381 2529 PL_regcc = 0;
0f5d15d6
IZ
2530
2531 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2532 REGCP_SET(lastcp);
0f5d15d6
IZ
2533 cache_re(re);
2534 state.ss = PL_savestack_ix;
2535 *PL_reglastparen = 0;
2536 PL_reg_call_cc = &state;
2537 PL_reginput = locinput;
2c2d71f5
JH
2538
2539 /* XXXX This is too dramatic a measure... */
2540 PL_reg_maxiter = 0;
2541
0f5d15d6 2542 if (regmatch(re->program + 1)) {
2c914db6
IZ
2543 /* Even though we succeeded, we need to restore
2544 global variables, since we may be wrapped inside
2545 SUSPEND, thus the match may be not finished yet. */
2546
2547 /* XXXX Do this only if SUSPENDed? */
2548 PL_reg_call_cc = state.prev;
2549 PL_regcc = state.cc;
2550 PL_reg_re = state.re;
2551 cache_re(PL_reg_re);
2552
2553 /* XXXX This is too dramatic a measure... */
2554 PL_reg_maxiter = 0;
2555
2556 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2557 ReREFCNT_dec(re);
2558 regcpblow(cp);
2559 sayYES;
2560 }
0f5d15d6 2561 ReREFCNT_dec(re);
02db2b7b 2562 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2563 regcppop();
2564 PL_reg_call_cc = state.prev;
2565 PL_regcc = state.cc;
2566 PL_reg_re = state.re;
d3790889 2567 cache_re(PL_reg_re);
2c2d71f5
JH
2568
2569 /* XXXX This is too dramatic a measure... */
2570 PL_reg_maxiter = 0;
2571
0f5d15d6
IZ
2572 sayNO;
2573 }
c277df42 2574 sw = SvTRUE(ret);
0f5d15d6 2575 logical = 0;
a0ed51b3
LW
2576 }
2577 else
3280af22 2578 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2579 break;
2580 }
a0d0e21e 2581 case OPEN:
c277df42 2582 n = ARG(scan); /* which paren pair */
3280af22
NIS
2583 PL_reg_start_tmp[n] = locinput;
2584 if (n > PL_regsize)
2585 PL_regsize = n;
a0d0e21e
LW
2586 break;
2587 case CLOSE:
c277df42 2588 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2589 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2590 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2591 if (n > *PL_reglastparen)
2592 *PL_reglastparen = n;
a0d0e21e 2593 break;
c277df42
IZ
2594 case GROUPP:
2595 n = ARG(scan); /* which paren pair */
cf93c79d 2596 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2597 break;
2598 case IFTHEN:
2c2d71f5 2599 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2600 if (sw)
2601 next = NEXTOPER(NEXTOPER(scan));
2602 else {
2603 next = scan + ARG(scan);
2604 if (OP(next) == IFTHEN) /* Fake one. */
2605 next = NEXTOPER(NEXTOPER(next));
2606 }
2607 break;
2608 case LOGICAL:
0f5d15d6 2609 logical = scan->flags;
c277df42 2610 break;
2ab05381
IZ
2611/*******************************************************************
2612 PL_regcc contains infoblock about the innermost (...)* loop, and
2613 a pointer to the next outer infoblock.
2614
2615 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2616
2617 1) After matching X, regnode for CURLYX is processed;
2618
2619 2) This regnode creates infoblock on the stack, and calls
2620 regmatch() recursively with the starting point at WHILEM node;
2621
2622 3) Each hit of WHILEM node tries to match A and Z (in the order
2623 depending on the current iteration, min/max of {min,max} and
2624 greediness). The information about where are nodes for "A"
2625 and "Z" is read from the infoblock, as is info on how many times "A"
2626 was already matched, and greediness.
2627
2628 4) After A matches, the same WHILEM node is hit again.
2629
2630 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2631 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2632 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2633 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2634 of the external loop.
2635
2636 Currently present infoblocks form a tree with a stem formed by PL_curcc
2637 and whatever it mentions via ->next, and additional attached trees
2638 corresponding to temporarily unset infoblocks as in "5" above.
2639
2640 In the following picture infoblocks for outer loop of
2641 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2642 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2643 infoblocks are drawn below the "reset" infoblock.
2644
2645 In fact in the picture below we do not show failed matches for Z and T
2646 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2647 more obvious *why* one needs to *temporary* unset infoblocks.]
2648
2649 Matched REx position InfoBlocks Comment
2650 (Y(A)*?Z)*?T x
2651 Y(A)*?Z)*?T x <- O
2652 Y (A)*?Z)*?T x <- O
2653 Y A)*?Z)*?T x <- O <- I
2654 YA )*?Z)*?T x <- O <- I
2655 YA A)*?Z)*?T x <- O <- I
2656 YAA )*?Z)*?T x <- O <- I
2657 YAA Z)*?T x <- O # Temporary unset I
2658 I
2659
2660 YAAZ Y(A)*?Z)*?T x <- O
2661 I
2662
2663 YAAZY (A)*?Z)*?T x <- O
2664 I
2665
2666 YAAZY A)*?Z)*?T x <- O <- I
2667 I
2668
2669 YAAZYA )*?Z)*?T x <- O <- I
2670 I
2671
2672 YAAZYA Z)*?T x <- O # Temporary unset I
2673 I,I
2674
2675 YAAZYAZ )*?T x <- O
2676 I,I
2677
2678 YAAZYAZ T x # Temporary unset O
2679 O
2680 I,I
2681
2682 YAAZYAZT x
2683 O
2684 I,I
2685 *******************************************************************/
a0d0e21e
LW
2686 case CURLYX: {
2687 CURCUR cc;
3280af22 2688 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2689 /* No need to save/restore up to this paren */
2690 I32 parenfloor = scan->flags;
c277df42
IZ
2691
2692 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2693 next += ARG(next);
3280af22
NIS
2694 cc.oldcc = PL_regcc;
2695 PL_regcc = &cc;
cb434fcc
IZ
2696 /* XXXX Probably it is better to teach regpush to support
2697 parenfloor > PL_regsize... */
2698 if (parenfloor > *PL_reglastparen)
2699 parenfloor = *PL_reglastparen; /* Pessimization... */
2700 cc.parenfloor = parenfloor;
a0d0e21e
LW
2701 cc.cur = -1;
2702 cc.min = ARG1(scan);
2703 cc.max = ARG2(scan);
c277df42 2704 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2705 cc.next = next;
2706 cc.minmod = minmod;
2707 cc.lastloc = 0;
3280af22 2708 PL_reginput = locinput;
a0d0e21e
LW
2709 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2710 regcpblow(cp);
3280af22 2711 PL_regcc = cc.oldcc;
4633a7c4 2712 saySAME(n);
a0d0e21e
LW
2713 }
2714 /* NOT REACHED */
2715 case WHILEM: {
2716 /*
2717 * This is really hard to understand, because after we match
2718 * what we're trying to match, we must make sure the rest of
2c2d71f5 2719 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2720 * to go back UP the parse tree by recursing ever deeper. And
2721 * if it fails, we have to reset our parent's current state
2722 * that we can try again after backing off.
2723 */
2724
c277df42 2725 CHECKPOINT cp, lastcp;
3280af22 2726 CURCUR* cc = PL_regcc;
c277df42
IZ
2727 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2728
4633a7c4 2729 n = cc->cur + 1; /* how many we know we matched */
3280af22 2730 PL_reginput = locinput;
a0d0e21e 2731
c277df42
IZ
2732 DEBUG_r(
2733 PerlIO_printf(Perl_debug_log,
2734 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2735 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2736 (long)n, (long)cc->min,
2737 (long)cc->max, (long)cc)
2738 );
4633a7c4 2739
a0d0e21e
LW
2740 /* If degenerate scan matches "", assume scan done. */
2741
579cf2c3 2742 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2743 PL_regcc = cc->oldcc;
2ab05381
IZ
2744 if (PL_regcc)
2745 ln = PL_regcc->cur;
c277df42 2746 DEBUG_r(
c3464db5
DD
2747 PerlIO_printf(Perl_debug_log,
2748 "%*s empty match detected, try continuation...\n",
3280af22 2749 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2750 );
a0d0e21e 2751 if (regmatch(cc->next))
4633a7c4 2752 sayYES;
2ab05381
IZ
2753 if (PL_regcc)
2754 PL_regcc->cur = ln;
3280af22 2755 PL_regcc = cc;
4633a7c4 2756 sayNO;
a0d0e21e
LW
2757 }
2758
2759 /* First just match a string of min scans. */
2760
2761 if (n < cc->min) {
2762 cc->cur = n;
2763 cc->lastloc = locinput;
4633a7c4
LW
2764 if (regmatch(cc->scan))
2765 sayYES;
2766 cc->cur = n - 1;
c277df42 2767 cc->lastloc = lastloc;
4633a7c4 2768 sayNO;
a0d0e21e
LW
2769 }
2770
2c2d71f5
JH
2771 if (scan->flags) {
2772 /* Check whether we already were at this position.
2773 Postpone detection until we know the match is not
2774 *that* much linear. */
2775 if (!PL_reg_maxiter) {
2776 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2777 PL_reg_leftiter = PL_reg_maxiter;
2778 }
2779 if (PL_reg_leftiter-- == 0) {
2780 I32 size = (PL_reg_maxiter + 7)/8;
2781 if (PL_reg_poscache) {
2782 if (PL_reg_poscache_size < size) {
2783 Renew(PL_reg_poscache, size, char);
2784 PL_reg_poscache_size = size;
2785 }
2786 Zero(PL_reg_poscache, size, char);
2787 }
2788 else {
2789 PL_reg_poscache_size = size;
2790 Newz(29, PL_reg_poscache, size, char);
2791 }
2792 DEBUG_r(
2793 PerlIO_printf(Perl_debug_log,
2794 "%sDetected a super-linear match, switching on caching%s...\n",
2795 PL_colors[4], PL_colors[5])
2796 );
2797 }
2798 if (PL_reg_leftiter < 0) {
2799 I32 o = locinput - PL_bostr, b;
2800
2801 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2802 b = o % 8;
2803 o /= 8;
2804 if (PL_reg_poscache[o] & (1<<b)) {
2805 DEBUG_r(
2806 PerlIO_printf(Perl_debug_log,
2807 "%*s already tried at this position...\n",
2808 REPORT_CODE_OFF+PL_regindent*2, "")
2809 );
7821416a 2810 sayNO_SILENT;
2c2d71f5
JH
2811 }
2812 PL_reg_poscache[o] |= (1<<b);
2813 }
2814 }
2815
a0d0e21e
LW
2816 /* Prefer next over scan for minimal matching. */
2817
2818 if (cc->minmod) {
3280af22 2819 PL_regcc = cc->oldcc;
2ab05381
IZ
2820 if (PL_regcc)
2821 ln = PL_regcc->cur;
5f05dabc 2822 cp = regcppush(cc->parenfloor);
02db2b7b 2823 REGCP_SET(lastcp);
5f05dabc 2824 if (regmatch(cc->next)) {
c277df42 2825 regcpblow(cp);
4633a7c4 2826 sayYES; /* All done. */
5f05dabc 2827 }
02db2b7b 2828 REGCP_UNWIND(lastcp);
5f05dabc 2829 regcppop();
2ab05381
IZ
2830 if (PL_regcc)
2831 PL_regcc->cur = ln;
3280af22 2832 PL_regcc = cc;
a0d0e21e 2833
c277df42 2834 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2835 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2836 && !(PL_reg_flags & RF_warned)) {
2837 PL_reg_flags |= RF_warned;
e476b1b5 2838 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2839 "Complex regular subexpression recursion",
2840 REG_INFTY - 1);
c277df42 2841 }
4633a7c4 2842 sayNO;
c277df42 2843 }
a687059c 2844
c277df42 2845 DEBUG_r(
c3464db5
DD
2846 PerlIO_printf(Perl_debug_log,
2847 "%*s trying longer...\n",
3280af22 2848 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2849 );
a0d0e21e 2850 /* Try scanning more and see if it helps. */
3280af22 2851 PL_reginput = locinput;
a0d0e21e
LW
2852 cc->cur = n;
2853 cc->lastloc = locinput;
5f05dabc 2854 cp = regcppush(cc->parenfloor);
02db2b7b 2855 REGCP_SET(lastcp);
5f05dabc 2856 if (regmatch(cc->scan)) {
c277df42 2857 regcpblow(cp);
4633a7c4 2858 sayYES;
5f05dabc 2859 }
02db2b7b 2860 REGCP_UNWIND(lastcp);
5f05dabc 2861 regcppop();
4633a7c4 2862 cc->cur = n - 1;
c277df42 2863 cc->lastloc = lastloc;
4633a7c4 2864 sayNO;
a0d0e21e
LW
2865 }
2866
2867 /* Prefer scan over next for maximal matching. */
2868
2869 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2870 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2871 cc->cur = n;
2872 cc->lastloc = locinput;
02db2b7b 2873 REGCP_SET(lastcp);
5f05dabc 2874 if (regmatch(cc->scan)) {
c277df42 2875 regcpblow(cp);
4633a7c4 2876 sayYES;
5f05dabc 2877 }
02db2b7b 2878 REGCP_UNWIND(lastcp);
a0d0e21e 2879 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2880 PL_reginput = locinput;
c277df42 2881 DEBUG_r(
c3464db5
DD
2882 PerlIO_printf(Perl_debug_log,
2883 "%*s failed, try continuation...\n",
3280af22 2884 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2885 );
2886 }
e476b1b5 2887 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2888 && !(PL_reg_flags & RF_warned)) {
3280af22 2889 PL_reg_flags |= RF_warned;
e476b1b5 2890 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2891 "Complex regular subexpression recursion",
2892 REG_INFTY - 1);
a0d0e21e
LW
2893 }
2894
2895 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2896 PL_regcc = cc->oldcc;
2ab05381
IZ
2897 if (PL_regcc)
2898 ln = PL_regcc->cur;
a0d0e21e 2899 if (regmatch(cc->next))
4633a7c4 2900 sayYES;
2ab05381
IZ
2901 if (PL_regcc)
2902 PL_regcc->cur = ln;
3280af22 2903 PL_regcc = cc;
4633a7c4 2904 cc->cur = n - 1;
c277df42 2905 cc->lastloc = lastloc;
4633a7c4 2906 sayNO;
a0d0e21e
LW
2907 }
2908 /* NOT REACHED */
c277df42
IZ
2909 case BRANCHJ:
2910 next = scan + ARG(scan);
2911 if (next == scan)
2912 next = NULL;
2913 inner = NEXTOPER(NEXTOPER(scan));
2914 goto do_branch;
2915 case BRANCH:
2916 inner = NEXTOPER(scan);
2917 do_branch:
2918 {
2919 CHECKPOINT lastcp;
2920 c1 = OP(scan);
2921 if (OP(next) != c1) /* No choice. */
2922 next = inner; /* Avoid recursion. */
a0d0e21e 2923 else {
02db2b7b
IZ
2924 I32 lastparen = *PL_reglastparen;
2925 I32 unwind1;
2926 re_unwind_branch_t *uw;
2927
2928 /* Put unwinding data on stack */
2929 unwind1 = SSNEWt(1,re_unwind_branch_t);
2930 uw = SSPTRt(unwind1,re_unwind_branch_t);
2931 uw->prev = unwind;
2932 unwind = unwind1;
2933 uw->type = ((c1 == BRANCH)
2934 ? RE_UNWIND_BRANCH
2935 : RE_UNWIND_BRANCHJ);
2936 uw->lastparen = lastparen;
2937 uw->next = next;
2938 uw->locinput = locinput;
2939 uw->nextchr = nextchr;
2940#ifdef DEBUGGING
2941 uw->regindent = ++PL_regindent;
2942#endif
c277df42 2943
02db2b7b
IZ
2944 REGCP_SET(uw->lastcp);
2945
2946 /* Now go into the first branch */
2947 next = inner;
a687059c 2948 }
a0d0e21e
LW
2949 }
2950 break;
2951 case MINMOD:
2952 minmod = 1;
2953 break;
c277df42
IZ
2954 case CURLYM:
2955 {
00db4c45 2956 I32 l = 0;
c277df42
IZ
2957 CHECKPOINT lastcp;
2958
2959 /* We suppose that the next guy does not need
2960 backtracking: in particular, it is of constant length,
2961 and has no parenths to influence future backrefs. */
2962 ln = ARG1(scan); /* min to match */
2963 n = ARG2(scan); /* max to match */
c277df42
IZ
2964 paren = scan->flags;
2965 if (paren) {
3280af22
NIS
2966 if (paren > PL_regsize)
2967 PL_regsize = paren;
2968 if (paren > *PL_reglastparen)
2969 *PL_reglastparen = paren;
c277df42 2970 }
dc45a647 2971 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2972 if (paren)
2973 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2974 PL_reginput = locinput;
c277df42
IZ
2975 if (minmod) {
2976 minmod = 0;
2977 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2978 sayNO;
5f4b28b2 2979 if (ln && l == 0 && n >= ln
c277df42
IZ
2980 /* In fact, this is tricky. If paren, then the
2981 fact that we did/didnot match may influence
2982 future execution. */
2983 && !(paren && ln == 0))
2984 ln = n;
3280af22 2985 locinput = PL_reginput;
22c35a8c 2986 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2987 c1 = (U8)*STRING(next);
c277df42 2988 if (OP(next) == EXACTF)
22c35a8c 2989 c2 = PL_fold[c1];
c277df42 2990 else if (OP(next) == EXACTFL)
22c35a8c 2991 c2 = PL_fold_locale[c1];
c277df42
IZ
2992 else
2993 c2 = c1;
a0ed51b3
LW
2994 }
2995 else
c277df42 2996 c1 = c2 = -1000;
02db2b7b 2997 REGCP_SET(lastcp);
5f4b28b2 2998 /* This may be improved if l == 0. */
c277df42
IZ
2999 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3000 /* If it could work, try it. */
3001 if (c1 == -1000 ||
3280af22
NIS
3002 UCHARAT(PL_reginput) == c1 ||
3003 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3004 {
3005 if (paren) {
3006 if (n) {
cf93c79d
IZ
3007 PL_regstartp[paren] =
3008 HOPc(PL_reginput, -l) - PL_bostr;
3009 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3010 }
3011 else
cf93c79d 3012 PL_regendp[paren] = -1;
c277df42
IZ
3013 }
3014 if (regmatch(next))
3015 sayYES;
02db2b7b 3016 REGCP_UNWIND(lastcp);
c277df42
IZ
3017 }
3018 /* Couldn't or didn't -- move forward. */
3280af22 3019 PL_reginput = locinput;
c277df42
IZ
3020 if (regrepeat_hard(scan, 1, &l)) {
3021 ln++;
3280af22 3022 locinput = PL_reginput;
c277df42
IZ
3023 }
3024 else
3025 sayNO;
3026 }
a0ed51b3
LW
3027 }
3028 else {
c277df42
IZ
3029 n = regrepeat_hard(scan, n, &l);
3030 if (n != 0 && l == 0
3031 /* In fact, this is tricky. If paren, then the
3032 fact that we did/didnot match may influence
3033 future execution. */
3034 && !(paren && ln == 0))
3035 ln = n;
3280af22 3036 locinput = PL_reginput;
c277df42 3037 DEBUG_r(
5c0ca799 3038 PerlIO_printf(Perl_debug_log,
faccc32b 3039 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3040 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3041 (IV) n, (IV)l)
c277df42
IZ
3042 );
3043 if (n >= ln) {
22c35a8c 3044 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3045 c1 = (U8)*STRING(next);
c277df42 3046 if (OP(next) == EXACTF)
22c35a8c 3047 c2 = PL_fold[c1];
c277df42 3048 else if (OP(next) == EXACTFL)
22c35a8c 3049 c2 = PL_fold_locale[c1];
c277df42
IZ
3050 else
3051 c2 = c1;
a0ed51b3
LW
3052 }
3053 else
c277df42
IZ
3054 c1 = c2 = -1000;
3055 }
02db2b7b 3056 REGCP_SET(lastcp);
c277df42
IZ
3057 while (n >= ln) {
3058 /* If it could work, try it. */
3059 if (c1 == -1000 ||
3280af22
NIS
3060 UCHARAT(PL_reginput) == c1 ||
3061 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3062 {
3063 DEBUG_r(
c3464db5 3064 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3065 "%*s trying tail with n=%"IVdf"...\n",
3066 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3067 );
3068 if (paren) {
3069 if (n) {
cf93c79d
IZ
3070 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3071 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3072 }
a0ed51b3 3073 else
cf93c79d 3074 PL_regendp[paren] = -1;
c277df42 3075 }
a0ed51b3
LW
3076 if (regmatch(next))
3077 sayYES;
02db2b7b 3078 REGCP_UNWIND(lastcp);
a0ed51b3 3079 }
c277df42
IZ
3080 /* Couldn't or didn't -- back up. */
3081 n--;
dfe13c55 3082 locinput = HOPc(locinput, -l);
3280af22 3083 PL_reginput = locinput;
c277df42
IZ
3084 }
3085 }
3086 sayNO;
3087 break;
3088 }
3089 case CURLYN:
3090 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3091 if (paren > PL_regsize)
3092 PL_regsize = paren;
3093 if (paren > *PL_reglastparen)
3094 *PL_reglastparen = paren;
c277df42
IZ
3095 ln = ARG1(scan); /* min to match */
3096 n = ARG2(scan); /* max to match */
dc45a647 3097 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3098 goto repeat;
a0d0e21e 3099 case CURLY:
c277df42 3100 paren = 0;
a0d0e21e
LW
3101 ln = ARG1(scan); /* min to match */
3102 n = ARG2(scan); /* max to match */
dc45a647 3103 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3104 goto repeat;
3105 case STAR:
3106 ln = 0;
c277df42 3107 n = REG_INFTY;
a0d0e21e 3108 scan = NEXTOPER(scan);
c277df42 3109 paren = 0;
a0d0e21e
LW
3110 goto repeat;
3111 case PLUS:
c277df42
IZ
3112 ln = 1;
3113 n = REG_INFTY;
3114 scan = NEXTOPER(scan);
3115 paren = 0;
3116 repeat:
a0d0e21e
LW
3117 /*
3118 * Lookahead to avoid useless match attempts
3119 * when we know what character comes next.
3120 */
22c35a8c 3121 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3122 U8 *s = (U8*)STRING(next);
3123 if (!UTF) {
3124 c2 = c1 = *s;
3125 if (OP(next) == EXACTF)
3126 c2 = PL_fold[c1];
3127 else if (OP(next) == EXACTFL)
3128 c2 = PL_fold_locale[c1];
3129 }
3130 else { /* UTF */
3131 if (OP(next) == EXACTF) {
3132 c1 = to_utf8_lower(s);
3133 c2 = to_utf8_upper(s);
3134 }
3135 else {
3136 c2 = c1 = utf8_to_uv_simple(s, NULL);
3137 }
3138 }
bbce6d69 3139 }
a0d0e21e 3140 else
bbce6d69 3141 c1 = c2 = -1000;
3280af22 3142 PL_reginput = locinput;
a0d0e21e 3143 if (minmod) {
c277df42 3144 CHECKPOINT lastcp;
a0d0e21e
LW
3145 minmod = 0;
3146 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3147 sayNO;
a0ed51b3 3148 locinput = PL_reginput;
02db2b7b 3149 REGCP_SET(lastcp);
0fe9bf95 3150 if (c1 != -1000) {
1aa99e6b 3151 char *e; /* Should not check after this */
0fe9bf95
IZ
3152 char *old = locinput;
3153
1aa99e6b 3154 if (n == REG_INFTY) {
0fe9bf95 3155 e = PL_regeol - 1;
1aa99e6b
IH
3156 if (do_utf8)
3157 while (UTF8_IS_CONTINUATION(*(U8*)e))
3158 e--;
3159 }
3160 else if (do_utf8) {
3161 int m = n - ln;
3162 for (e = locinput;
3163 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3164 e += UTF8SKIP(e);
3165 }
3166 else {
3167 e = locinput + n - ln;
3168 if (e >= PL_regeol)
3169 e = PL_regeol - 1;
3170 }
0fe9bf95 3171 while (1) {
1aa99e6b 3172 int count;
0fe9bf95 3173 /* Find place 'next' could work */
1aa99e6b
IH
3174 if (!do_utf8) {
3175 if (c1 == c2) {
3176 while (locinput <= e && *locinput != c1)
3177 locinput++;
3178 } else {
3179 while (locinput <= e
3180 && *locinput != c1
3181 && *locinput != c2)
3182 locinput++;
3183 }
3184 count = locinput - old;
3185 }
3186 else {
3187 STRLEN len;
3188 if (c1 == c2) {
3189 for (count = 0;
3190 locinput <= e &&
3191 utf8_to_uv_simple((U8*)locinput, &len) != c1;
3192 count++)
3193 locinput += len;
3194
3195 } else {
3196 for (count = 0; locinput <= e; count++) {
3197 UV c = utf8_to_uv_simple((U8*)locinput, &len);
3198 if (c == c1 || c == c2)
3199 break;
3200 locinput += len;
3201 }
3202 }
0fe9bf95
IZ
3203 }
3204 if (locinput > e)
3205 sayNO;
3206 /* PL_reginput == old now */
3207 if (locinput != old) {
3208 ln = 1; /* Did some */
1aa99e6b 3209 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3210 sayNO;
3211 }
3212 /* PL_reginput == locinput now */
29d1e993 3213 TRYPAREN(paren, ln, locinput);
0fe9bf95 3214 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3215 REGCP_UNWIND(lastcp);
0fe9bf95 3216 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3217 old = locinput;
3218 if (do_utf8)
3219 locinput += UTF8SKIP(locinput);
3220 else
3221 locinput++;
0fe9bf95
IZ
3222 }
3223 }
3224 else
c277df42 3225 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3226 UV c;
3227 if (c1 != -1000) {
3228 if (do_utf8)
3229 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3230 else
3231 c = UCHARAT(PL_reginput);
3232 }
a0d0e21e 3233 /* If it could work, try it. */
1aa99e6b 3234 if (c1 == -1000 || c == c1 || c == c2)
bbce6d69 3235 {
29d1e993 3236 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3237 REGCP_UNWIND(lastcp);
bbce6d69 3238 }
c277df42 3239 /* Couldn't or didn't -- move forward. */
a0ed51b3 3240 PL_reginput = locinput;
a0d0e21e
LW
3241 if (regrepeat(scan, 1)) {
3242 ln++;
a0ed51b3
LW
3243 locinput = PL_reginput;
3244 }
3245 else
4633a7c4 3246 sayNO;
a0d0e21e
LW
3247 }
3248 }
3249 else {
c277df42 3250 CHECKPOINT lastcp;
a0d0e21e 3251 n = regrepeat(scan, n);
a0ed51b3 3252 locinput = PL_reginput;
22c35a8c 3253 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3254 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3255 ln = n; /* why back off? */
1aeab75a
GS
3256 /* ...because $ and \Z can match before *and* after
3257 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3258 We should back off by one in this case. */
3259 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3260 ln--;
3261 }
02db2b7b 3262 REGCP_SET(lastcp);
c277df42 3263 if (paren) {
1aa99e6b 3264 UV c;
c277df42 3265 while (n >= ln) {
1aa99e6b
IH
3266 if (c1 != -1000) {
3267 if (do_utf8)
3268 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3269 else
3270 c = UCHARAT(PL_reginput);
3271 }
c277df42 3272 /* If it could work, try it. */
1aa99e6b 3273 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3274 {
29d1e993 3275 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3276 REGCP_UNWIND(lastcp);
c277df42
IZ
3277 }
3278 /* Couldn't or didn't -- back up. */
3279 n--;
dfe13c55 3280 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3281 }
a0ed51b3
LW
3282 }
3283 else {
1aa99e6b 3284 UV c;
c277df42 3285 while (n >= ln) {
1aa99e6b
IH
3286 if (c1 != -1000) {
3287 if (do_utf8)
3288 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3289 else
3290 c = UCHARAT(PL_reginput);
3291 }
c277df42 3292 /* If it could work, try it. */
1aa99e6b 3293 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3294 {
29d1e993 3295 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3296 REGCP_UNWIND(lastcp);
c277df42
IZ
3297 }
3298 /* Couldn't or didn't -- back up. */
3299 n--;
dfe13c55 3300 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3301 }
a0d0e21e
LW
3302 }
3303 }
4633a7c4 3304 sayNO;
c277df42 3305 break;
a0d0e21e 3306 case END:
0f5d15d6
IZ
3307 if (PL_reg_call_cc) {
3308 re_cc_state *cur_call_cc = PL_reg_call_cc;
3309 CURCUR *cctmp = PL_regcc;
3310 regexp *re = PL_reg_re;
3311 CHECKPOINT cp, lastcp;
3312
3313 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3314 REGCP_SET(lastcp);
0f5d15d6
IZ
3315 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3316 the caller. */
3317 PL_reginput = locinput; /* Make position available to
3318 the callcc. */
3319 cache_re(PL_reg_call_cc->re);
3320 PL_regcc = PL_reg_call_cc->cc;
3321 PL_reg_call_cc = PL_reg_call_cc->prev;
3322 if (regmatch(cur_call_cc->node)) {
3323 PL_reg_call_cc = cur_call_cc;
3324 regcpblow(cp);
3325 sayYES;
3326 }
02db2b7b 3327 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3328 regcppop();
3329 PL_reg_call_cc = cur_call_cc;
3330 PL_regcc = cctmp;
3331 PL_reg_re = re;
3332 cache_re(re);
3333
3334 DEBUG_r(
3335 PerlIO_printf(Perl_debug_log,
3336 "%*s continuation failed...\n",
3337 REPORT_CODE_OFF+PL_regindent*2, "")
3338 );
7821416a 3339 sayNO_SILENT;
0f5d15d6 3340 }
7821416a
IZ
3341 if (locinput < PL_regtill) {
3342 DEBUG_r(PerlIO_printf(Perl_debug_log,
3343 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3344 PL_colors[4],
3345 (long)(locinput - PL_reg_starttry),
3346 (long)(PL_regtill - PL_reg_starttry),
3347 PL_colors[5]));
3348 sayNO_FINAL; /* Cannot match: too short. */
3349 }
3350 PL_reginput = locinput; /* put where regtry can find it */
3351 sayYES_FINAL; /* Success! */
7e5428c5 3352 case SUCCEED:
3280af22 3353 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3354 sayYES_LOUD; /* Success! */
c277df42
IZ
3355 case SUSPEND:
3356 n = 1;
9fe1d20c 3357 PL_reginput = locinput;
c277df42 3358 goto do_ifmatch;
a0d0e21e 3359 case UNLESSM:
c277df42 3360 n = 0;
a0ed51b3 3361 if (scan->flags) {
0fe9bf95
IZ
3362 if (UTF) { /* XXXX This is absolutely
3363 broken, we read before
3364 start of string. */
3365 s = HOPMAYBEc(locinput, -scan->flags);
3366 if (!s)
3367 goto say_yes;
3368 PL_reginput = s;
3369 }
3370 else {
3371 if (locinput < PL_bostr + scan->flags)
3372 goto say_yes;
3373 PL_reginput = locinput - scan->flags;
3374 goto do_ifmatch;
3375 }
a0ed51b3
LW
3376 }
3377 else
3378 PL_reginput = locinput;
c277df42
IZ
3379 goto do_ifmatch;
3380 case IFMATCH:
3381 n = 1;
a0ed51b3 3382 if (scan->flags) {
0fe9bf95
IZ
3383 if (UTF) { /* XXXX This is absolutely
3384 broken, we read before
3385 start of string. */
3386 s = HOPMAYBEc(locinput, -scan->flags);
3387 if (!s || s < PL_bostr)
3388 goto say_no;
3389 PL_reginput = s;
3390 }
3391 else {
3392 if (locinput < PL_bostr + scan->flags)
3393 goto say_no;
3394 PL_reginput = locinput - scan->flags;
3395 goto do_ifmatch;
3396 }
a0ed51b3
LW
3397 }
3398 else
3399 PL_reginput = locinput;
3400
c277df42 3401 do_ifmatch:
c277df42
IZ
3402 inner = NEXTOPER(NEXTOPER(scan));
3403 if (regmatch(inner) != n) {
3404 say_no:
3405 if (logical) {
3406 logical = 0;
3407 sw = 0;
3408 goto do_longjump;
a0ed51b3
LW
3409 }
3410 else
c277df42
IZ
3411 sayNO;
3412 }
3413 say_yes:
3414 if (logical) {
3415 logical = 0;
3416 sw = 1;
3417 }
fe44a5e8 3418 if (OP(scan) == SUSPEND) {
3280af22 3419 locinput = PL_reginput;
565764a8 3420 nextchr = UCHARAT(locinput);
fe44a5e8 3421 }
c277df42
IZ
3422 /* FALL THROUGH. */
3423 case LONGJMP:
3424 do_longjump:
3425 next = scan + ARG(scan);
3426 if (next == scan)
3427 next = NULL;
a0d0e21e
LW
3428 break;
3429 default:
b900a521 3430 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3431 PTR2UV(scan), OP(scan));
cea2e8a9 3432 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3433 }
02db2b7b 3434 reenter:
a0d0e21e
LW
3435 scan = next;
3436 }
a687059c 3437
a0d0e21e
LW
3438 /*
3439 * We get here only if there's trouble -- normally "case END" is
3440 * the terminating point.
3441 */
cea2e8a9 3442 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3443 /*NOTREACHED*/
4633a7c4
LW
3444 sayNO;
3445
7821416a
IZ
3446yes_loud:
3447 DEBUG_r(
3448 PerlIO_printf(Perl_debug_log,
3449 "%*s %scould match...%s\n",
3450 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3451 );
3452 goto yes;
3453yes_final:
3454 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3455 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3456yes:
3457#ifdef DEBUGGING
3280af22 3458 PL_regindent--;
4633a7c4 3459#endif
02db2b7b
IZ
3460
3461#if 0 /* Breaks $^R */
3462 if (unwind)
3463 regcpblow(firstcp);
3464#endif
4633a7c4
LW
3465 return 1;
3466
3467no:
7821416a
IZ
3468 DEBUG_r(
3469 PerlIO_printf(Perl_debug_log,
3470 "%*s %sfailed...%s\n",
3471 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3472 );
3473 goto do_no;
3474no_final:
3475do_no:
02db2b7b
IZ
3476 if (unwind) {
3477 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3478
3479 switch (uw->type) {
3480 case RE_UNWIND_BRANCH:
3481 case RE_UNWIND_BRANCHJ:
3482 {
3483 re_unwind_branch_t *uwb = &(uw->branch);
3484 I32 lastparen = uwb->lastparen;
3485
3486 REGCP_UNWIND(uwb->lastcp);
3487 for (n = *PL_reglastparen; n > lastparen; n--)
3488 PL_regendp[n] = -1;
3489 *PL_reglastparen = n;
3490 scan = next = uwb->next;
3491 if ( !scan ||
3492 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3493 ? BRANCH : BRANCHJ) ) { /* Failure */
3494 unwind = uwb->prev;
3495#ifdef DEBUGGING
3496 PL_regindent--;
3497#endif
3498 goto do_no;
3499 }
3500 /* Have more choice yet. Reuse the same uwb. */
3501 /*SUPPRESS 560*/
3502 if ((n = (uwb->type == RE_UNWIND_BRANCH
3503 ? NEXT_OFF(next) : ARG(next))))
3504 next += n;
3505 else
3506 next = NULL; /* XXXX Needn't unwinding in this case... */
3507 uwb->next = next;
3508 next = NEXTOPER(scan);
3509 if (uwb->type == RE_UNWIND_BRANCHJ)
3510 next = NEXTOPER(next);
3511 locinput = uwb->locinput;
3512 nextchr = uwb->nextchr;
3513#ifdef DEBUGGING
3514 PL_regindent = uwb->regindent;
3515#endif
3516
3517 goto reenter;
3518 }
3519 /* NOT REACHED */
3520 default:
3521 Perl_croak(aTHX_ "regexp unwind memory corruption");
3522 }
3523 /* NOT REACHED */
3524 }
4633a7c4 3525#ifdef DEBUGGING
3280af22 3526 PL_regindent--;
4633a7c4 3527#endif
a0d0e21e 3528 return 0;
a687059c
LW
3529}
3530
3531/*
3532 - regrepeat - repeatedly match something simple, report how many
3533 */
3534/*
3535 * [This routine now assumes that it will only match on things of length 1.
3536 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3537 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3538 */
76e3520e 3539STATIC I32
cea2e8a9 3540S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3541{
a0d0e21e 3542 register char *scan;
a0d0e21e 3543 register I32 c;
3280af22 3544 register char *loceol = PL_regeol;
a0ed51b3 3545 register I32 hardcount = 0;
ffc61ed2 3546 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3547
3280af22 3548 scan = PL_reginput;
c277df42 3549 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3550 loceol = scan + max;
a0d0e21e 3551 switch (OP(p)) {
22c35a8c 3552 case REG_ANY:
1aa99e6b 3553 if (do_utf8) {
ffc61ed2 3554 loceol = PL_regeol;
1aa99e6b 3555 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3556 scan += UTF8SKIP(scan);
3557 hardcount++;
3558 }
3559 } else {
3560 while (scan < loceol && *scan != '\n')
3561 scan++;
a0ed51b3
LW
3562 }
3563 break;
ffc61ed2 3564 case SANY:
3baa4c62 3565 scan = loceol;
a0ed51b3 3566 break;
bbce6d69 3567 case EXACT: /* length of string is 1 */
cd439c50 3568 c = (U8)*STRING(p);
bbce6d69 3569 while (scan < loceol && UCHARAT(scan) == c)
3570 scan++;
3571 break;
3572 case EXACTF: /* length of string is 1 */
cd439c50 3573 c = (U8)*STRING(p);
bbce6d69 3574 while (scan < loceol &&
22c35a8c 3575 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3576 scan++;
3577 break;
3578 case EXACTFL: /* length of string is 1 */
3280af22 3579 PL_reg_flags |= RF_tainted;
cd439c50 3580 c = (U8)*STRING(p);
bbce6d69 3581 while (scan < loceol &&
22c35a8c 3582 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3583 scan++;
3584 break;
3585 case ANYOF:
ffc61ed2
JH
3586 if (do_utf8) {
3587 loceol = PL_regeol;
1aa99e6b
IH
3588 while (hardcount < max && scan < loceol &&
3589 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3590 scan += UTF8SKIP(scan);
3591 hardcount++;
3592 }
3593 } else {
3594 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3595 scan++;
3596 }
a0d0e21e
LW
3597 break;
3598 case ALNUM:
1aa99e6b 3599 if (do_utf8) {
ffc61ed2 3600 loceol = PL_regeol;
1aa99e6b
IH
3601 while (hardcount < max && scan < loceol &&
3602 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3603 scan += UTF8SKIP(scan);
3604 hardcount++;
3605 }
3606 } else {
3607 while (scan < loceol && isALNUM(*scan))
3608 scan++;
a0ed51b3
LW
3609 }
3610 break;
bbce6d69 3611 case ALNUML:
3280af22 3612 PL_reg_flags |= RF_tainted;
1aa99e6b 3613 if (do_utf8) {
ffc61ed2 3614 loceol = PL_regeol;
1aa99e6b
IH
3615 while (hardcount < max && scan < loceol &&
3616 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3617 scan += UTF8SKIP(scan);
3618 hardcount++;
3619 }
3620 } else {
3621 while (scan < loceol && isALNUM_LC(*scan))
3622 scan++;
a0ed51b3
LW
3623 }
3624 break;
a0d0e21e 3625 case NALNUM:
1aa99e6b 3626 if (do_utf8) {
ffc61ed2 3627 loceol = PL_regeol;
1aa99e6b
IH
3628 while (hardcount < max && scan < loceol &&
3629 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3630 scan += UTF8SKIP(scan);
3631 hardcount++;
3632 }
3633 } else {
3634 while (scan < loceol && !isALNUM(*scan))
3635 scan++;
a0ed51b3
LW
3636 }
3637 break;
bbce6d69 3638 case NALNUML:
3280af22 3639 PL_reg_flags |= RF_tainted;
1aa99e6b 3640 if (do_utf8) {
ffc61ed2 3641 loceol = PL_regeol;
1aa99e6b
IH
3642 while (hardcount < max && scan < loceol &&
3643 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3644 scan += UTF8SKIP(scan);
3645 hardcount++;
3646 }
3647 } else {
3648 while (scan < loceol && !isALNUM_LC(*scan))
3649 scan++;
a0ed51b3
LW
3650 }
3651 break;
a0d0e21e 3652 case SPACE:
1aa99e6b 3653 if (do_utf8) {
ffc61ed2 3654 loceol = PL_regeol;
1aa99e6b 3655 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3656 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3657 scan += UTF8SKIP(scan);
3658 hardcount++;
3659 }
3660 } else {
3661 while (scan < loceol && isSPACE(*scan))
3662 scan++;
a0ed51b3
LW
3663 }
3664 break;
bbce6d69 3665 case SPACEL:
3280af22 3666 PL_reg_flags |= RF_tainted;
1aa99e6b 3667 if (do_utf8) {
ffc61ed2 3668 loceol = PL_regeol;
1aa99e6b 3669 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3670 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3671 scan += UTF8SKIP(scan);
3672 hardcount++;
3673 }
3674 } else {
3675 while (scan < loceol && isSPACE_LC(*scan))
3676 scan++;
a0ed51b3
LW
3677 }
3678 break;
a0d0e21e 3679 case NSPACE:
1aa99e6b 3680 if (do_utf8) {
ffc61ed2 3681 loceol = PL_regeol;
1aa99e6b 3682 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3683 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3684 scan += UTF8SKIP(scan);
3685 hardcount++;
3686 }
3687 } else {
3688 while (scan < loceol && !isSPACE(*scan))
3689 scan++;
3690 break;
a0ed51b3 3691 }
bbce6d69 3692 case NSPACEL:
3280af22 3693 PL_reg_flags |= RF_tainted;
1aa99e6b 3694 if (do_utf8) {
ffc61ed2 3695 loceol = PL_regeol;
1aa99e6b 3696 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3697 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3698 scan += UTF8SKIP(scan);
3699 hardcount++;
3700 }
3701 } else {
3702 while (scan < loceol && !isSPACE_LC(*scan))
3703 scan++;
a0ed51b3
LW
3704 }
3705 break;
a0d0e21e 3706 case DIGIT:
1aa99e6b 3707 if (do_utf8) {
ffc61ed2 3708 loceol = PL_regeol;
1aa99e6b
IH
3709 while (hardcount < max && scan < loceol &&
3710 swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3711 scan += UTF8SKIP(scan);
3712 hardcount++;
3713 }
3714 } else {
3715 while (scan < loceol && isDIGIT(*scan))
3716 scan++;
a0ed51b3
LW
3717 }
3718 break;
a0d0e21e 3719 case NDIGIT:
1aa99e6b 3720 if (do_utf8) {
ffc61ed2 3721 loceol = PL_regeol;
1aa99e6b
IH
3722 while (hardcount < max && scan < loceol &&
3723 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3724 scan += UTF8SKIP(scan);
3725 hardcount++;
3726 }
3727 } else {
3728 while (scan < loceol && !isDIGIT(*scan))
3729 scan++;
a0ed51b3
LW
3730 }
3731 break;
a0d0e21e
LW
3732 default: /* Called on something of 0 width. */
3733 break; /* So match right here or not at all. */
3734 }
a687059c 3735
a0ed51b3
LW
3736 if (hardcount)
3737 c = hardcount;
3738 else
3739 c = scan - PL_reginput;
3280af22 3740 PL_reginput = scan;
a687059c 3741
c277df42
IZ
3742 DEBUG_r(
3743 {
3744 SV *prop = sv_newmortal();
3745
3746 regprop(prop, p);
3747 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3748 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3749 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3750 });
3751
a0d0e21e 3752 return(c);
a687059c
LW
3753}
3754
3755/*
c277df42
IZ
3756 - regrepeat_hard - repeatedly match something, report total lenth and length
3757 *
3758 * The repeater is supposed to have constant length.
3759 */
3760
76e3520e 3761STATIC I32
cea2e8a9 3762S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3763{
3764 register char *scan;
3765 register char *start;
3280af22 3766 register char *loceol = PL_regeol;
a0ed51b3 3767 I32 l = 0;
708e3b05 3768 I32 count = 0, res = 1;
a0ed51b3
LW
3769
3770 if (!max)
3771 return 0;
c277df42 3772
3280af22 3773 start = PL_reginput;
1aa99e6b 3774 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3775 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3776 if (!count++) {
3777 l = 0;
3778 while (start < PL_reginput) {
3779 l++;
3780 start += UTF8SKIP(start);
3781 }
3782 *lp = l;
3783 if (l == 0)
3784 return max;
3785 }
3786 if (count == max)
3787 return count;
3788 }
3789 }
3790 else {
708e3b05 3791 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3792 if (!count++) {
3793 *lp = l = PL_reginput - start;
3794 if (max != REG_INFTY && l*max < loceol - scan)
3795 loceol = scan + l*max;
3796 if (l == 0)
3797 return max;
c277df42
IZ
3798 }
3799 }
3800 }
708e3b05 3801 if (!res)
3280af22 3802 PL_reginput = scan;
c277df42 3803
a0ed51b3 3804 return count;
c277df42
IZ
3805}
3806
3807/*
ffc61ed2
JH
3808- regclass_swash - prepare the utf8 swash
3809*/
3810
3811SV *
3812Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3813{
3814 SV *sw = NULL;
3815 SV *si = NULL;
3816
3817 if (PL_regdata && PL_regdata->count) {
3818 U32 n = ARG(node);
3819
3820 if (PL_regdata->what[n] == 's') {
3821 SV *rv = (SV*)PL_regdata->data[n];
3822 AV *av = (AV*)SvRV((SV*)rv);
3823 SV **a;
3824
3825 si = *av_fetch(av, 0, FALSE);
3826 a = av_fetch(av, 1, FALSE);
3827
3828 if (a)
3829 sw = *a;
3830 else if (si && doinit) {
3831 sw = swash_init("utf8", "", si, 1, 0);
3832 (void)av_store(av, 1, sw);
3833 }
3834 }
3835 }
3836
3837 if (initsvp)
3838 *initsvp = si;
3839
3840 return sw;
3841}
3842
3843/*
cb8d8820 3844 - reginclass - determine if a character falls into a character class
bbce6d69 3845 */
3846
76e3520e 3847STATIC bool
ffc61ed2 3848S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3849{
ffc61ed2 3850 char flags = ANYOF_FLAGS(n);
bbce6d69 3851 bool match = FALSE;
1aa99e6b
IH
3852 UV c;
3853 STRLEN len;
3854
3855 if (do_utf8)
3856 c = utf8_to_uv_simple(p, &len);
3857 else
3858 c = *p;
bbce6d69 3859
ffc61ed2
JH
3860 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3861 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3862 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3863 match = TRUE;
bbce6d69 3864 }
1aa99e6b
IH
3865 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3866 match = TRUE;
ffc61ed2
JH
3867 if (!match) {
3868 SV *sw = regclass_swash(n, TRUE, 0);
3869
3870 if (sw) {
3871 if (swash_fetch(sw, p))
3872 match = TRUE;
3873 else if (flags & ANYOF_FOLD) {
3874 U8 tmpbuf[UTF8_MAXLEN+1];
3875
3876 if (flags & ANYOF_LOCALE) {
3877 PL_reg_flags |= RF_tainted;
3878 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3879 }
1aa99e6b
IH
3880 else
3881 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
ffc61ed2
JH
3882 if (swash_fetch(sw, tmpbuf))
3883 match = TRUE;
3884 }
3885 }
bbce6d69 3886 }
3887 }
1aa99e6b 3888 if (!match && c < 256) {
ffc61ed2
JH
3889 if (ANYOF_BITMAP_TEST(n, c))
3890 match = TRUE;
3891 else if (flags & ANYOF_FOLD) {
3892 I32 f;
a0ed51b3 3893
ffc61ed2
JH
3894 if (flags & ANYOF_LOCALE) {
3895 PL_reg_flags |= RF_tainted;
3896 f = PL_fold_locale[c];
3897 }
3898 else
3899 f = PL_fold[c];
3900 if (f != c && ANYOF_BITMAP_TEST(n, f))
3901 match = TRUE;
3902 }
3903
3904 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3905 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3906 if (
3907 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3908 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3909 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3910 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3911 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3912 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3913 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3914 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3915 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3916 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3917 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3933 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3937 ) /* How's that for a conditional? */
3938 {
3939 match = TRUE;
3940 }
a0ed51b3 3941 }
a0ed51b3
LW
3942 }
3943
a0ed51b3
LW
3944 return (flags & ANYOF_INVERT) ? !match : match;
3945}
161b471a 3946
dfe13c55 3947STATIC U8 *
cea2e8a9 3948S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3949{
1aa99e6b
IH
3950 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3951}
3952
3953STATIC U8 *
3954S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
3955{
a0ed51b3 3956 if (off >= 0) {
1aa99e6b 3957 while (off-- && s < lim) {
ffc61ed2 3958 /* XXX could check well-formedness here */
a0ed51b3 3959 s += UTF8SKIP(s);
ffc61ed2 3960 }
a0ed51b3
LW
3961 }
3962 else {
3963 while (off++) {
1aa99e6b 3964 if (s > lim) {
a0ed51b3 3965 s--;
ffc61ed2 3966 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3967 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3968 s--;
ffc61ed2
JH
3969 }
3970 /* XXX could check well-formedness here */
a0ed51b3
LW
3971 }
3972 }
3973 }
3974 return s;
3975}
161b471a 3976
dfe13c55 3977STATIC U8 *
1aa99e6b
IH
3978S_reghopmaybe(pTHX_ U8 *s, I32 off)
3979{
3980 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3981}
3982
3983STATIC U8 *
3984S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
3985{
3986 if (off >= 0) {
1aa99e6b 3987 while (off-- && s < lim) {
ffc61ed2 3988 /* XXX could check well-formedness here */
a0ed51b3 3989 s += UTF8SKIP(s);
ffc61ed2 3990 }
a0ed51b3
LW
3991 if (off >= 0)
3992 return 0;
3993 }
3994 else {
3995 while (off++) {
1aa99e6b 3996 if (s > lim) {
a0ed51b3 3997 s--;
ffc61ed2 3998 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3999 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4000 s--;
ffc61ed2
JH
4001 }
4002 /* XXX could check well-formedness here */
a0ed51b3
LW
4003 }
4004 else
4005 break;
4006 }
4007 if (off <= 0)
4008 return 0;
4009 }
4010 return s;
4011}
51371543
GS
4012
4013#ifdef PERL_OBJECT
51371543
GS
4014#include "XSUB.h"
4015#endif
4016
4017static void
4018restore_pos(pTHXo_ void *arg)
4019{
51371543
GS
4020 if (PL_reg_eval_set) {
4021 if (PL_reg_oldsaved) {
4022 PL_reg_re->subbeg = PL_reg_oldsaved;
4023 PL_reg_re->sublen = PL_reg_oldsavedlen;
4024 RX_MATCH_COPIED_on(PL_reg_re);
4025 }
4026 PL_reg_magic->mg_len = PL_reg_oldpos;
4027 PL_reg_eval_set = 0;
4028 PL_curpm = PL_reg_oldcurpm;
4029 }
4030}