This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
RE: [PATCH] Add a nextstate into empty blocks
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
d9f424b2 104#define CHR_DIST(a,b) (PL_reg_sv_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
d9f424b2
JH
108#define HOP(pos,off) (PL_reg_sv_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_sv_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
d9f424b2 114 (UTF && PL_reg_sv_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
d9f424b2
JH
124#define HOP3(pos,off,lim) (PL_reg_sv_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_sv_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
acfe0abc 131static void restore_pos(pTHX_ void *arg);
51371543 132
76e3520e 133STATIC CHECKPOINT
cea2e8a9 134S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 135{
3280af22 136 int retval = PL_savestack_ix;
b1ce53c5
JH
137#define REGCP_PAREN_ELEMS 4
138 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
139 int p;
140
e49a9654
IH
141 if (paren_elems_to_push < 0)
142 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
143
a01268b5 144#define REGCP_OTHER_ELEMS 6
b1ce53c5 145 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 146 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 147/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
148 SSPUSHINT(PL_regendp[p]);
149 SSPUSHINT(PL_regstartp[p]);
3280af22 150 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
151 SSPUSHINT(p);
152 }
b1ce53c5 153/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
154 SSPUSHINT(PL_regsize);
155 SSPUSHINT(*PL_reglastparen);
a01268b5 156 SSPUSHINT(*PL_reglastcloseparen);
3280af22 157 SSPUSHPTR(PL_reginput);
41123dfd
JH
158#define REGCP_FRAME_ELEMS 2
159/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
160 * are needed for the regexp context stack bookkeeping. */
161 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 162 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 163
a0d0e21e
LW
164 return retval;
165}
166
c277df42 167/* These are needed since we do not localize EVAL nodes: */
02db2b7b 168# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 169 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 170 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 171
02db2b7b 172# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 173 PerlIO_printf(Perl_debug_log, \
faccc32b 174 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 175 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 176
76e3520e 177STATIC char *
cea2e8a9 178S_regcppop(pTHX)
a0d0e21e 179{
b1ce53c5 180 I32 i;
a0d0e21e
LW
181 U32 paren = 0;
182 char *input;
cf93c79d 183 I32 tmps;
b1ce53c5
JH
184
185 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 186 i = SSPOPINT;
b1ce53c5
JH
187 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
188 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 189 input = (char *) SSPOPPTR;
a01268b5 190 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
191 *PL_reglastparen = SSPOPINT;
192 PL_regsize = SSPOPINT;
b1ce53c5
JH
193
194 /* Now restore the parentheses context. */
41123dfd
JH
195 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
196 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 197 paren = (U32)SSPOPINT;
3280af22 198 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
199 PL_regstartp[paren] = SSPOPINT;
200 tmps = SSPOPINT;
3280af22
NIS
201 if (paren <= *PL_reglastparen)
202 PL_regendp[paren] = tmps;
c277df42 203 DEBUG_r(
c3464db5 204 PerlIO_printf(Perl_debug_log,
b900a521 205 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 206 (UV)paren, (IV)PL_regstartp[paren],
b900a521 207 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 208 (IV)PL_regendp[paren],
3280af22 209 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 210 );
a0d0e21e 211 }
c277df42 212 DEBUG_r(
3280af22 213 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 214 PerlIO_printf(Perl_debug_log,
faccc32b
JH
215 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
216 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
217 }
218 );
daf18116 219#if 1
dafc8851
JH
220 /* It would seem that the similar code in regtry()
221 * already takes care of this, and in fact it is in
222 * a better location to since this code can #if 0-ed out
223 * but the code in regtry() is needed or otherwise tests
224 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
225 * (as of patchlevel 7877) will fail. Then again,
226 * this code seems to be necessary or otherwise
227 * building DynaLoader will fail:
228 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
229 * --jhi */
3280af22
NIS
230 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
231 if (paren > PL_regsize)
cf93c79d
IZ
232 PL_regstartp[paren] = -1;
233 PL_regendp[paren] = -1;
a0d0e21e 234 }
dafc8851 235#endif
a0d0e21e
LW
236 return input;
237}
238
0f5d15d6 239STATIC char *
cea2e8a9 240S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
241{
242 I32 tmp = PL_savestack_ix;
243
244 PL_savestack_ix = ss;
245 regcppop();
246 PL_savestack_ix = tmp;
942e002e 247 return Nullch;
0f5d15d6
IZ
248}
249
250typedef struct re_cc_state
251{
252 I32 ss;
253 regnode *node;
254 struct re_cc_state *prev;
255 CURCUR *cc;
256 regexp *re;
257} re_cc_state;
258
02db2b7b 259#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 260
29d1e993
HS
261#define TRYPAREN(paren, n, input) { \
262 if (paren) { \
263 if (n) { \
264 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
265 PL_regendp[paren] = input - PL_bostr; \
266 } \
267 else \
268 PL_regendp[paren] = -1; \
269 } \
270 if (regmatch(next)) \
271 sayYES; \
272 if (paren && n) \
273 PL_regendp[paren] = -1; \
274}
275
276
a687059c 277/*
e50aee73 278 * pregexec and friends
a687059c
LW
279 */
280
281/*
c277df42 282 - pregexec - match a regexp against a string
a687059c 283 */
c277df42 284I32
864dbfa3 285Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 286 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
287/* strend: pointer to null at end of string */
288/* strbeg: real beginning of string */
289/* minend: end of match must be >=minend after stringarg. */
290/* nosave: For optimizations. */
291{
292 return
9041c2e3 293 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
294 nosave ? 0 : REXEC_COPY_STR);
295}
0f5d15d6
IZ
296
297STATIC void
cea2e8a9 298S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
299{
300 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
301#ifdef DEBUGGING
302 PL_regprogram = prog->program;
303#endif
304 PL_regnpar = prog->nparens;
9041c2e3
NIS
305 PL_regdata = prog->data;
306 PL_reg_re = prog;
0f5d15d6 307}
22e551b9 308
9041c2e3 309/*
cad2e5aa
JH
310 * Need to implement the following flags for reg_anch:
311 *
312 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
313 * USE_INTUIT_ML
314 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
315 * INTUIT_AUTORITATIVE_ML
316 * INTUIT_ONCE_NOML - Intuit can match in one location only.
317 * INTUIT_ONCE_ML
318 *
319 * Another flag for this function: SECOND_TIME (so that float substrs
320 * with giant delta may be not rechecked).
321 */
322
323/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
324
2c2d71f5 325/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
326 Otherwise, only SvCUR(sv) is used to get strbeg. */
327
328/* XXXX We assume that strpos is strbeg unless sv. */
329
6eb5f6b9
JH
330/* XXXX Some places assume that there is a fixed substring.
331 An update may be needed if optimizer marks as "INTUITable"
332 RExen without fixed substrings. Similarly, it is assumed that
333 lengths of all the strings are no more than minlen, thus they
334 cannot come from lookahead.
335 (Or minlen should take into account lookahead.) */
336
2c2d71f5
JH
337/* A failure to find a constant substring means that there is no need to make
338 an expensive call to REx engine, thus we celebrate a failure. Similarly,
339 finding a substring too deep into the string means that less calls to
30944b6d
IZ
340 regtry() should be needed.
341
342 REx compiler's optimizer found 4 possible hints:
343 a) Anchored substring;
344 b) Fixed substring;
345 c) Whether we are anchored (beginning-of-line or \G);
346 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 347 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
348 string which does not contradict any of them.
349 */
2c2d71f5 350
6eb5f6b9
JH
351/* Most of decisions we do here should have been done at compile time.
352 The nodes of the REx which we used for the search should have been
353 deleted from the finite automaton. */
354
cad2e5aa
JH
355char *
356Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
357 char *strend, U32 flags, re_scream_pos_data *data)
358{
b7953727 359 register I32 start_shift = 0;
cad2e5aa 360 /* Should be nonnegative! */
b7953727 361 register I32 end_shift = 0;
2c2d71f5
JH
362 register char *s;
363 register SV *check;
a1933d95 364 char *strbeg;
cad2e5aa
JH
365 char *t;
366 I32 ml_anch;
6eb5f6b9 367 register char *other_last = Nullch; /* other substr checked before this */
b7953727 368 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
369#ifdef DEBUGGING
370 char *i_strpos = strpos;
371#endif
cad2e5aa
JH
372
373 DEBUG_r( if (!PL_colorset) reginitcolors() );
374 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 375 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
376 PL_colors[4],PL_colors[5],PL_colors[0],
377 prog->precomp,
378 PL_colors[1],
379 (strlen(prog->precomp) > 60 ? "..." : ""),
380 PL_colors[0],
b900a521 381 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
382 strpos, PL_colors[1],
383 (strend - strpos > 60 ? "..." : ""))
384 );
385
1aa99e6b
IH
386 if (prog->reganch & ROPT_UTF8)
387 PL_reg_flags |= RF_utf8;
388
389 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 390 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 391 goto fail;
2c2d71f5 392 }
a1933d95 393 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 394 PL_regeol = strend;
653099ff 395 check = prog->check_substr;
2c2d71f5 396 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
397 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
398 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 399 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 400
7e25d62c
JH
401 if (!ml_anch) {
402 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
403 | ROPT_IMPLICIT)) /* not a real BOL */
404 /* SvCUR is not set on references: SvRV and SvPVX overlap */
405 && sv && !SvROK(sv)
406 && (strpos != strbeg)) {
407 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
408 goto fail;
409 }
410 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 411 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 412 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
413 I32 slen;
414
1aa99e6b 415 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
416 if (SvTAIL(check)) {
417 slen = SvCUR(check); /* >= 1 */
cad2e5aa 418
9041c2e3 419 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
420 || (strend - s == slen && strend[-1] != '\n')) {
421 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
422 goto fail_finish;
cad2e5aa
JH
423 }
424 /* Now should match s[0..slen-2] */
425 slen--;
653099ff 426 if (slen && (*SvPVX(check) != *s
cad2e5aa 427 || (slen > 1
653099ff 428 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
429 report_neq:
430 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
431 goto fail_finish;
432 }
cad2e5aa 433 }
653099ff
GS
434 else if (*SvPVX(check) != *s
435 || ((slen = SvCUR(check)) > 1
436 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
437 goto report_neq;
438 goto success_at_start;
7e25d62c 439 }
cad2e5aa 440 }
2c2d71f5 441 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 442 s = strpos;
2c2d71f5 443 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 444 end_shift = prog->minlen - start_shift -
653099ff 445 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 446 if (!ml_anch) {
653099ff
GS
447 I32 end = prog->check_offset_max + CHR_SVLEN(check)
448 - (SvTAIL(check) != 0);
1aa99e6b 449 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
450
451 if (end_shift < eshift)
452 end_shift = eshift;
453 }
cad2e5aa 454 }
2c2d71f5 455 else { /* Can match at random position */
cad2e5aa
JH
456 ml_anch = 0;
457 s = strpos;
2c2d71f5
JH
458 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
459 /* Should be nonnegative! */
460 end_shift = prog->minlen - start_shift -
653099ff 461 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
462 }
463
2c2d71f5 464#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 465 if (end_shift < 0)
6bbae5e6 466 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
467#endif
468
2c2d71f5
JH
469 restart:
470 /* Find a possible match in the region s..strend by looking for
471 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 472 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
473 I32 p = -1; /* Internal iterator of scream. */
474 I32 *pp = data ? data->scream_pos : &p;
475
2c2d71f5
JH
476 if (PL_screamfirst[BmRARE(check)] >= 0
477 || ( BmRARE(check) == '\n'
478 && (BmPREVIOUS(check) == SvCUR(check) - 1)
479 && SvTAIL(check) ))
9041c2e3 480 s = screaminstr(sv, check,
2c2d71f5 481 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 482 else
2c2d71f5 483 goto fail_finish;
cad2e5aa
JH
484 if (data)
485 *data->scream_olds = s;
486 }
f33976b4 487 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
488 s = fbm_instr((U8*)(s + start_shift),
489 (U8*)(strend - end_shift),
490 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 491 else
1aa99e6b
IH
492 s = fbm_instr(HOP3(s, start_shift, strend),
493 HOP3(strend, -end_shift, strbeg),
2c2d71f5 494 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
495
496 /* Update the count-of-usability, remove useless subpatterns,
497 unshift s. */
2c2d71f5
JH
498
499 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
500 (s ? "Found" : "Did not find"),
501 ((check == prog->anchored_substr) ? "anchored" : "floating"),
502 PL_colors[0],
7b0972df
JH
503 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
504 SvPVX(check),
2c2d71f5
JH
505 PL_colors[1], (SvTAIL(check) ? "$" : ""),
506 (s ? " at offset " : "...\n") ) );
507
508 if (!s)
509 goto fail_finish;
510
6eb5f6b9
JH
511 check_at = s;
512
2c2d71f5 513 /* Finish the diagnostic message */
30944b6d 514 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
515
516 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
517 Start with the other substr.
518 XXXX no SCREAM optimization yet - and a very coarse implementation
519 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
520 *always* match. Probably should be marked during compile...
521 Probably it is right to do no SCREAM here...
522 */
523
524 if (prog->float_substr && prog->anchored_substr) {
30944b6d 525 /* Take into account the "other" substring. */
2c2d71f5
JH
526 /* XXXX May be hopelessly wrong for UTF... */
527 if (!other_last)
6eb5f6b9 528 other_last = strpos;
2c2d71f5 529 if (check == prog->float_substr) {
30944b6d
IZ
530 do_other_anchored:
531 {
1aa99e6b 532 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
533 char *s1 = s;
534
2c2d71f5
JH
535 t = s - prog->check_offset_max;
536 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
537 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 538 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 539 && t > strpos)))
30944b6d 540 /* EMPTY */;
2c2d71f5
JH
541 else
542 t = strpos;
1aa99e6b 543 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
544 if (t < other_last) /* These positions already checked */
545 t = other_last;
1aa99e6b 546 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
547 if (last < last1)
548 last1 = last;
549 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
550 /* On end-of-str: see comment below. */
551 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
552 HOP3(HOP3(last1, prog->anchored_offset, strend)
553 + SvCUR(prog->anchored_substr),
554 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
555 prog->anchored_substr,
556 PL_multiline ? FBMrf_MULTILINE : 0);
557 DEBUG_r(PerlIO_printf(Perl_debug_log,
558 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
559 (s ? "Found" : "Contradicts"),
560 PL_colors[0],
7b0972df
JH
561 (int)(SvCUR(prog->anchored_substr)
562 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
563 SvPVX(prog->anchored_substr),
564 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
565 if (!s) {
566 if (last1 >= last2) {
567 DEBUG_r(PerlIO_printf(Perl_debug_log,
568 ", giving up...\n"));
569 goto fail_finish;
570 }
571 DEBUG_r(PerlIO_printf(Perl_debug_log,
572 ", trying floating at offset %ld...\n",
1aa99e6b
IH
573 (long)(HOP3c(s1, 1, strend) - i_strpos)));
574 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
575 s = HOP3c(last, 1, strend);
2c2d71f5
JH
576 goto restart;
577 }
578 else {
579 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 580 (long)(s - i_strpos)));
1aa99e6b
IH
581 t = HOP3c(s, -prog->anchored_offset, strbeg);
582 other_last = HOP3c(s, 1, strend);
30944b6d 583 s = s1;
2c2d71f5
JH
584 if (t == strpos)
585 goto try_at_start;
2c2d71f5
JH
586 goto try_at_offset;
587 }
30944b6d 588 }
2c2d71f5
JH
589 }
590 else { /* Take into account the floating substring. */
591 char *last, *last1;
592 char *s1 = s;
593
1aa99e6b
IH
594 t = HOP3c(s, -start_shift, strbeg);
595 last1 = last =
596 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
597 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
598 last = HOP3c(t, prog->float_max_offset, strend);
599 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
600 if (s < other_last)
601 s = other_last;
2c2d71f5
JH
602 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
603 /* fbm_instr() takes into account exact value of end-of-str
604 if the check is SvTAIL(ed). Since false positives are OK,
605 and end-of-str is not later than strend we are OK. */
606 s = fbm_instr((unsigned char*)s,
607 (unsigned char*)last + SvCUR(prog->float_substr)
608 - (SvTAIL(prog->float_substr)!=0),
609 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
610 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
611 (s ? "Found" : "Contradicts"),
612 PL_colors[0],
7b0972df
JH
613 (int)(SvCUR(prog->float_substr)
614 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
615 SvPVX(prog->float_substr),
616 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
617 if (!s) {
618 if (last1 == last) {
619 DEBUG_r(PerlIO_printf(Perl_debug_log,
620 ", giving up...\n"));
621 goto fail_finish;
622 }
623 DEBUG_r(PerlIO_printf(Perl_debug_log,
624 ", trying anchored starting at offset %ld...\n",
30944b6d 625 (long)(s1 + 1 - i_strpos)));
803ff556 626 other_last = last;
1aa99e6b 627 s = HOP3c(t, 1, strend);
2c2d71f5
JH
628 goto restart;
629 }
630 else {
631 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 632 (long)(s - i_strpos)));
803ff556 633 other_last = s; /* Fix this later. --Hugo */
30944b6d 634 s = s1;
2c2d71f5
JH
635 if (t == strpos)
636 goto try_at_start;
2c2d71f5
JH
637 goto try_at_offset;
638 }
639 }
cad2e5aa 640 }
2c2d71f5
JH
641
642 t = s - prog->check_offset_max;
2c2d71f5
JH
643 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
644 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
645 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
646 && t > strpos))) {
2c2d71f5
JH
647 /* Fixed substring is found far enough so that the match
648 cannot start at strpos. */
649 try_at_offset:
cad2e5aa 650 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
651 /* Eventually fbm_*() should handle this, but often
652 anchored_offset is not 0, so this check will not be wasted. */
653 /* XXXX In the code below we prefer to look for "^" even in
654 presence of anchored substrings. And we search even
655 beyond the found float position. These pessimizations
656 are historical artefacts only. */
657 find_anchor:
2c2d71f5 658 while (t < strend - prog->minlen) {
cad2e5aa 659 if (*t == '\n') {
4ee3650e 660 if (t < check_at - prog->check_offset_min) {
30944b6d 661 if (prog->anchored_substr) {
4ee3650e
GS
662 /* Since we moved from the found position,
663 we definitely contradict the found anchored
30944b6d
IZ
664 substr. Due to the above check we do not
665 contradict "check" substr.
666 Thus we can arrive here only if check substr
667 is float. Redo checking for "other"=="fixed".
668 */
9041c2e3 669 strpos = t + 1;
30944b6d
IZ
670 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
671 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
672 goto do_other_anchored;
673 }
4ee3650e
GS
674 /* We don't contradict the found floating substring. */
675 /* XXXX Why not check for STCLASS? */
cad2e5aa 676 s = t + 1;
2c2d71f5 677 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 678 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
679 goto set_useful;
680 }
4ee3650e
GS
681 /* Position contradicts check-string */
682 /* XXXX probably better to look for check-string
683 than for "\n", so one should lower the limit for t? */
684 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 685 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 686 other_last = strpos = s = t + 1;
cad2e5aa
JH
687 goto restart;
688 }
689 t++;
690 }
2c2d71f5
JH
691 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
692 PL_colors[0],PL_colors[1]));
693 goto fail_finish;
cad2e5aa 694 }
f5952150
GS
695 else {
696 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
697 PL_colors[0],PL_colors[1]));
698 }
cad2e5aa
JH
699 s = t;
700 set_useful:
2c2d71f5 701 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
702 }
703 else {
f5952150 704 /* The found string does not prohibit matching at strpos,
2c2d71f5 705 - no optimization of calling REx engine can be performed,
f5952150
GS
706 unless it was an MBOL and we are not after MBOL,
707 or a future STCLASS check will fail this. */
2c2d71f5
JH
708 try_at_start:
709 /* Even in this situation we may use MBOL flag if strpos is offset
710 wrt the start of the string. */
05b4157f 711 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 712 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
713 /* May be due to an implicit anchor of m{.*foo} */
714 && !(prog->reganch & ROPT_IMPLICIT))
715 {
cad2e5aa
JH
716 t = strpos;
717 goto find_anchor;
718 }
30944b6d 719 DEBUG_r( if (ml_anch)
f5952150
GS
720 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
721 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 722 );
2c2d71f5 723 success_at_start:
30944b6d 724 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 725 && prog->check_substr /* Could be deleted already */
cad2e5aa 726 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
727 && prog->check_substr == prog->float_substr)
728 {
cad2e5aa 729 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 730 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
731 SvREFCNT_dec(prog->check_substr);
732 prog->check_substr = Nullsv; /* disable */
733 prog->float_substr = Nullsv; /* clear */
5e39e1e5 734 check = Nullsv; /* abort */
cad2e5aa 735 s = strpos;
3cf5c195
IZ
736 /* XXXX This is a remnant of the old implementation. It
737 looks wasteful, since now INTUIT can use many
6eb5f6b9 738 other heuristics. */
cad2e5aa
JH
739 prog->reganch &= ~RE_USE_INTUIT;
740 }
741 else
742 s = strpos;
743 }
744
6eb5f6b9
JH
745 /* Last resort... */
746 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
747 if (prog->regstclass) {
748 /* minlen == 0 is possible if regstclass is \b or \B,
749 and the fixed substr is ''$.
750 Since minlen is already taken into account, s+1 is before strend;
751 accidentally, minlen >= 1 guaranties no false positives at s + 1
752 even for \b or \B. But (minlen? 1 : 0) below assumes that
753 regstclass does not come from lookahead... */
754 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
755 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 756 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 757 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 758 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 759 : 1);
6eb5f6b9 760 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
761 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
762 : (prog->float_substr
763 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
764 cl_l, strend)
765 : strend);
a1933d95 766 char *startpos = strbeg;
6eb5f6b9
JH
767
768 t = s;
76384e4a 769 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 770 PL_regdata = prog->data;
76384e4a
GS
771 PL_bostr = startpos;
772 }
f33976b4 773 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
774 if (!s) {
775#ifdef DEBUGGING
b7953727 776 char *what = 0;
6eb5f6b9
JH
777#endif
778 if (endpos == strend) {
779 DEBUG_r( PerlIO_printf(Perl_debug_log,
780 "Could not match STCLASS...\n") );
781 goto fail;
782 }
66e933ab
GS
783 DEBUG_r( PerlIO_printf(Perl_debug_log,
784 "This position contradicts STCLASS...\n") );
653099ff
GS
785 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
786 goto fail;
6eb5f6b9
JH
787 /* Contradict one of substrings */
788 if (prog->anchored_substr) {
6eb5f6b9
JH
789 if (prog->anchored_substr == check) {
790 DEBUG_r( what = "anchored" );
791 hop_and_restart:
1aa99e6b 792 s = HOP3c(t, 1, strend);
66e933ab
GS
793 if (s + start_shift + end_shift > strend) {
794 /* XXXX Should be taken into account earlier? */
795 DEBUG_r( PerlIO_printf(Perl_debug_log,
796 "Could not match STCLASS...\n") );
797 goto fail;
798 }
5e39e1e5
HS
799 if (!check)
800 goto giveup;
6eb5f6b9 801 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 802 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
803 what, (long)(s + start_shift - i_strpos)) );
804 goto restart;
805 }
66e933ab 806 /* Have both, check_string is floating */
6eb5f6b9
JH
807 if (t + start_shift >= check_at) /* Contradicts floating=check */
808 goto retry_floating_check;
809 /* Recheck anchored substring, but not floating... */
9041c2e3 810 s = check_at;
5e39e1e5
HS
811 if (!check)
812 goto giveup;
6eb5f6b9 813 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 814 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
815 (long)(other_last - i_strpos)) );
816 goto do_other_anchored;
817 }
60e71179
GS
818 /* Another way we could have checked stclass at the
819 current position only: */
820 if (ml_anch) {
821 s = t = t + 1;
5e39e1e5
HS
822 if (!check)
823 goto giveup;
60e71179 824 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
825 "Looking for /%s^%s/m starting at offset %ld...\n",
826 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 827 goto try_at_offset;
66e933ab 828 }
60e71179
GS
829 if (!prog->float_substr) /* Could have been deleted */
830 goto fail;
6eb5f6b9
JH
831 /* Check is floating subtring. */
832 retry_floating_check:
833 t = check_at - start_shift;
834 DEBUG_r( what = "floating" );
835 goto hop_and_restart;
836 }
b7953727
JH
837 if (t != s) {
838 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 839 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
840 (long)(t - i_strpos), (long)(s - i_strpos))
841 );
842 }
843 else {
844 DEBUG_r(PerlIO_printf(Perl_debug_log,
845 "Does not contradict STCLASS...\n");
846 );
847 }
6eb5f6b9 848 }
5e39e1e5
HS
849 giveup:
850 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
851 PL_colors[4], (check ? "Guessed" : "Giving up"),
852 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 853 return s;
2c2d71f5
JH
854
855 fail_finish: /* Substring not found */
66e933ab
GS
856 if (prog->check_substr) /* could be removed already */
857 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 858 fail:
2c2d71f5 859 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
860 PL_colors[4],PL_colors[5]));
861 return Nullch;
862}
9661b544 863
6eb5f6b9 864/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
865STATIC char *
866S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 867{
6eb5f6b9
JH
868 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
869 char *m;
d8093b23
G
870 STRLEN ln;
871 unsigned int c1;
872 unsigned int c2;
6eb5f6b9
JH
873 char *e;
874 register I32 tmp = 1; /* Scratch variable? */
d9f424b2 875 register bool do_utf8 = PL_reg_sv_utf8;
cad2e5aa 876
6eb5f6b9
JH
877 /* We know what class it must start with. */
878 switch (OP(c)) {
6eb5f6b9 879 case ANYOF:
a0ed51b3 880 while (s < strend) {
ffc61ed2 881 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
882 if (tmp && (norun || regtry(prog, s)))
883 goto got_it;
884 else
885 tmp = doevery;
a0ed51b3 886 }
6eb5f6b9
JH
887 else
888 tmp = 1;
ffc61ed2 889 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 890 }
6eb5f6b9 891 break;
f33976b4
DB
892 case CANY:
893 while (s < strend) {
894 if (tmp && (norun || regtry(prog, s)))
895 goto got_it;
896 else
897 tmp = doevery;
898 s++;
899 }
900 break;
6eb5f6b9
JH
901 case EXACTF:
902 m = STRING(c);
903 ln = STR_LEN(c);
1aa99e6b
IH
904 if (UTF) {
905 c1 = to_utf8_lower((U8*)m);
906 c2 = to_utf8_upper((U8*)m);
907 }
908 else {
909 c1 = *(U8*)m;
910 c2 = PL_fold[c1];
911 }
6eb5f6b9
JH
912 goto do_exactf;
913 case EXACTFL:
914 m = STRING(c);
915 ln = STR_LEN(c);
d8093b23 916 c1 = *(U8*)m;
6eb5f6b9
JH
917 c2 = PL_fold_locale[c1];
918 do_exactf:
919 e = strend - ln;
b3c9acc1 920
6eb5f6b9
JH
921 if (norun && e < s)
922 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
923
924 if (do_utf8) {
925 STRLEN len;
926 if (c1 == c2)
927 while (s <= e) {
9041c2e3 928 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
929 && regtry(prog, s) )
930 goto got_it;
931 s += len;
932 }
933 else
934 while (s <= e) {
9041c2e3 935 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
936 if ( (c == c1 || c == c2) && regtry(prog, s) )
937 goto got_it;
938 s += len;
939 }
940 }
941 else {
942 if (c1 == c2)
943 while (s <= e) {
944 if ( *(U8*)s == c1
945 && (ln == 1 || !(OP(c) == EXACTF
946 ? ibcmp(s, m, ln)
947 : ibcmp_locale(s, m, ln)))
948 && (norun || regtry(prog, s)) )
949 goto got_it;
950 s++;
951 }
952 else
953 while (s <= e) {
954 if ( (*(U8*)s == c1 || *(U8*)s == c2)
955 && (ln == 1 || !(OP(c) == EXACTF
956 ? ibcmp(s, m, ln)
957 : ibcmp_locale(s, m, ln)))
958 && (norun || regtry(prog, s)) )
959 goto got_it;
960 s++;
961 }
b3c9acc1
IZ
962 }
963 break;
bbce6d69 964 case BOUNDL:
3280af22 965 PL_reg_flags |= RF_tainted;
bbce6d69 966 /* FALL THROUGH */
a0d0e21e 967 case BOUND:
ffc61ed2 968 if (do_utf8) {
12d33761 969 if (s == PL_bostr)
ffc61ed2
JH
970 tmp = '\n';
971 else {
1aa99e6b 972 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 973
0064a8a9
JH
974 if (s > (char*)r)
975 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
976 }
977 tmp = ((OP(c) == BOUND ?
9041c2e3 978 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 979 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
980 while (s < strend) {
981 if (tmp == !(OP(c) == BOUND ?
3568d838 982 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
983 isALNUM_LC_utf8((U8*)s)))
984 {
985 tmp = !tmp;
986 if ((norun || regtry(prog, s)))
987 goto got_it;
988 }
989 s += UTF8SKIP(s);
a687059c 990 }
a0d0e21e 991 }
667bb95a 992 else {
12d33761 993 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
994 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
995 while (s < strend) {
996 if (tmp ==
997 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
998 tmp = !tmp;
999 if ((norun || regtry(prog, s)))
1000 goto got_it;
1001 }
1002 s++;
a0ed51b3 1003 }
a0ed51b3 1004 }
6eb5f6b9 1005 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1006 goto got_it;
1007 break;
bbce6d69 1008 case NBOUNDL:
3280af22 1009 PL_reg_flags |= RF_tainted;
bbce6d69 1010 /* FALL THROUGH */
a0d0e21e 1011 case NBOUND:
ffc61ed2 1012 if (do_utf8) {
12d33761 1013 if (s == PL_bostr)
ffc61ed2
JH
1014 tmp = '\n';
1015 else {
1aa99e6b 1016 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1017
0064a8a9
JH
1018 if (s > (char*)r)
1019 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1020 }
1021 tmp = ((OP(c) == NBOUND ?
9041c2e3 1022 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1023 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1024 while (s < strend) {
1025 if (tmp == !(OP(c) == NBOUND ?
3568d838 1026 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1027 isALNUM_LC_utf8((U8*)s)))
1028 tmp = !tmp;
1029 else if ((norun || regtry(prog, s)))
1030 goto got_it;
1031 s += UTF8SKIP(s);
1032 }
a0d0e21e 1033 }
667bb95a 1034 else {
12d33761 1035 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1036 tmp = ((OP(c) == NBOUND ?
1037 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1038 while (s < strend) {
1039 if (tmp ==
1040 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1041 tmp = !tmp;
1042 else if ((norun || regtry(prog, s)))
1043 goto got_it;
1044 s++;
1045 }
a0ed51b3 1046 }
6eb5f6b9 1047 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1048 goto got_it;
1049 break;
a0d0e21e 1050 case ALNUM:
ffc61ed2 1051 if (do_utf8) {
8269fa76 1052 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1053 while (s < strend) {
3568d838 1054 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1055 if (tmp && (norun || regtry(prog, s)))
1056 goto got_it;
1057 else
1058 tmp = doevery;
1059 }
bbce6d69 1060 else
ffc61ed2
JH
1061 tmp = 1;
1062 s += UTF8SKIP(s);
bbce6d69 1063 }
bbce6d69 1064 }
ffc61ed2
JH
1065 else {
1066 while (s < strend) {
1067 if (isALNUM(*s)) {
1068 if (tmp && (norun || regtry(prog, s)))
1069 goto got_it;
1070 else
1071 tmp = doevery;
1072 }
a0ed51b3 1073 else
ffc61ed2
JH
1074 tmp = 1;
1075 s++;
a0ed51b3 1076 }
a0ed51b3
LW
1077 }
1078 break;
bbce6d69 1079 case ALNUML:
3280af22 1080 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1081 if (do_utf8) {
1082 while (s < strend) {
1083 if (isALNUM_LC_utf8((U8*)s)) {
1084 if (tmp && (norun || regtry(prog, s)))
1085 goto got_it;
1086 else
1087 tmp = doevery;
1088 }
a687059c 1089 else
ffc61ed2
JH
1090 tmp = 1;
1091 s += UTF8SKIP(s);
a0d0e21e 1092 }
a0d0e21e 1093 }
ffc61ed2
JH
1094 else {
1095 while (s < strend) {
1096 if (isALNUM_LC(*s)) {
1097 if (tmp && (norun || regtry(prog, s)))
1098 goto got_it;
1099 else
1100 tmp = doevery;
1101 }
a0ed51b3 1102 else
ffc61ed2
JH
1103 tmp = 1;
1104 s++;
a0ed51b3 1105 }
a0ed51b3
LW
1106 }
1107 break;
a0d0e21e 1108 case NALNUM:
ffc61ed2 1109 if (do_utf8) {
8269fa76 1110 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1111 while (s < strend) {
3568d838 1112 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1113 if (tmp && (norun || regtry(prog, s)))
1114 goto got_it;
1115 else
1116 tmp = doevery;
1117 }
bbce6d69 1118 else
ffc61ed2
JH
1119 tmp = 1;
1120 s += UTF8SKIP(s);
bbce6d69 1121 }
bbce6d69 1122 }
ffc61ed2
JH
1123 else {
1124 while (s < strend) {
1125 if (!isALNUM(*s)) {
1126 if (tmp && (norun || regtry(prog, s)))
1127 goto got_it;
1128 else
1129 tmp = doevery;
1130 }
a0ed51b3 1131 else
ffc61ed2
JH
1132 tmp = 1;
1133 s++;
a0ed51b3 1134 }
a0ed51b3
LW
1135 }
1136 break;
bbce6d69 1137 case NALNUML:
3280af22 1138 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1139 if (do_utf8) {
1140 while (s < strend) {
1141 if (!isALNUM_LC_utf8((U8*)s)) {
1142 if (tmp && (norun || regtry(prog, s)))
1143 goto got_it;
1144 else
1145 tmp = doevery;
1146 }
a687059c 1147 else
ffc61ed2
JH
1148 tmp = 1;
1149 s += UTF8SKIP(s);
a687059c 1150 }
a0d0e21e 1151 }
ffc61ed2
JH
1152 else {
1153 while (s < strend) {
1154 if (!isALNUM_LC(*s)) {
1155 if (tmp && (norun || regtry(prog, s)))
1156 goto got_it;
1157 else
1158 tmp = doevery;
1159 }
a0ed51b3 1160 else
ffc61ed2
JH
1161 tmp = 1;
1162 s++;
a0ed51b3 1163 }
a0ed51b3
LW
1164 }
1165 break;
a0d0e21e 1166 case SPACE:
ffc61ed2 1167 if (do_utf8) {
8269fa76 1168 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1169 while (s < strend) {
3568d838 1170 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1171 if (tmp && (norun || regtry(prog, s)))
1172 goto got_it;
1173 else
1174 tmp = doevery;
1175 }
a0d0e21e 1176 else
ffc61ed2
JH
1177 tmp = 1;
1178 s += UTF8SKIP(s);
2304df62 1179 }
a0d0e21e 1180 }
ffc61ed2
JH
1181 else {
1182 while (s < strend) {
1183 if (isSPACE(*s)) {
1184 if (tmp && (norun || regtry(prog, s)))
1185 goto got_it;
1186 else
1187 tmp = doevery;
1188 }
a0ed51b3 1189 else
ffc61ed2
JH
1190 tmp = 1;
1191 s++;
a0ed51b3 1192 }
a0ed51b3
LW
1193 }
1194 break;
bbce6d69 1195 case SPACEL:
3280af22 1196 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1197 if (do_utf8) {
1198 while (s < strend) {
1199 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1200 if (tmp && (norun || regtry(prog, s)))
1201 goto got_it;
1202 else
1203 tmp = doevery;
1204 }
bbce6d69 1205 else
ffc61ed2
JH
1206 tmp = 1;
1207 s += UTF8SKIP(s);
bbce6d69 1208 }
bbce6d69 1209 }
ffc61ed2
JH
1210 else {
1211 while (s < strend) {
1212 if (isSPACE_LC(*s)) {
1213 if (tmp && (norun || regtry(prog, s)))
1214 goto got_it;
1215 else
1216 tmp = doevery;
1217 }
a0ed51b3 1218 else
ffc61ed2
JH
1219 tmp = 1;
1220 s++;
a0ed51b3 1221 }
a0ed51b3
LW
1222 }
1223 break;
a0d0e21e 1224 case NSPACE:
ffc61ed2 1225 if (do_utf8) {
8269fa76 1226 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1227 while (s < strend) {
3568d838 1228 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1229 if (tmp && (norun || regtry(prog, s)))
1230 goto got_it;
1231 else
1232 tmp = doevery;
1233 }
a0d0e21e 1234 else
ffc61ed2
JH
1235 tmp = 1;
1236 s += UTF8SKIP(s);
a687059c 1237 }
a0d0e21e 1238 }
ffc61ed2
JH
1239 else {
1240 while (s < strend) {
1241 if (!isSPACE(*s)) {
1242 if (tmp && (norun || regtry(prog, s)))
1243 goto got_it;
1244 else
1245 tmp = doevery;
1246 }
a0ed51b3 1247 else
ffc61ed2
JH
1248 tmp = 1;
1249 s++;
a0ed51b3 1250 }
a0ed51b3
LW
1251 }
1252 break;
bbce6d69 1253 case NSPACEL:
3280af22 1254 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1255 if (do_utf8) {
1256 while (s < strend) {
1257 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1258 if (tmp && (norun || regtry(prog, s)))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
bbce6d69 1263 else
ffc61ed2
JH
1264 tmp = 1;
1265 s += UTF8SKIP(s);
bbce6d69 1266 }
bbce6d69 1267 }
ffc61ed2
JH
1268 else {
1269 while (s < strend) {
1270 if (!isSPACE_LC(*s)) {
1271 if (tmp && (norun || regtry(prog, s)))
1272 goto got_it;
1273 else
1274 tmp = doevery;
1275 }
a0ed51b3 1276 else
ffc61ed2
JH
1277 tmp = 1;
1278 s++;
a0ed51b3 1279 }
a0ed51b3
LW
1280 }
1281 break;
a0d0e21e 1282 case DIGIT:
ffc61ed2 1283 if (do_utf8) {
8269fa76 1284 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1285 while (s < strend) {
3568d838 1286 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1287 if (tmp && (norun || regtry(prog, s)))
1288 goto got_it;
1289 else
1290 tmp = doevery;
1291 }
a0d0e21e 1292 else
ffc61ed2
JH
1293 tmp = 1;
1294 s += UTF8SKIP(s);
2b69d0c2 1295 }
a0d0e21e 1296 }
ffc61ed2
JH
1297 else {
1298 while (s < strend) {
1299 if (isDIGIT(*s)) {
1300 if (tmp && (norun || regtry(prog, s)))
1301 goto got_it;
1302 else
1303 tmp = doevery;
1304 }
a0ed51b3 1305 else
ffc61ed2
JH
1306 tmp = 1;
1307 s++;
a0ed51b3 1308 }
a0ed51b3
LW
1309 }
1310 break;
b8c5462f
JH
1311 case DIGITL:
1312 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1313 if (do_utf8) {
1314 while (s < strend) {
1315 if (isDIGIT_LC_utf8((U8*)s)) {
1316 if (tmp && (norun || regtry(prog, s)))
1317 goto got_it;
1318 else
1319 tmp = doevery;
1320 }
b8c5462f 1321 else
ffc61ed2
JH
1322 tmp = 1;
1323 s += UTF8SKIP(s);
b8c5462f 1324 }
b8c5462f 1325 }
ffc61ed2
JH
1326 else {
1327 while (s < strend) {
1328 if (isDIGIT_LC(*s)) {
1329 if (tmp && (norun || regtry(prog, s)))
1330 goto got_it;
1331 else
1332 tmp = doevery;
1333 }
b8c5462f 1334 else
ffc61ed2
JH
1335 tmp = 1;
1336 s++;
b8c5462f 1337 }
b8c5462f
JH
1338 }
1339 break;
a0d0e21e 1340 case NDIGIT:
ffc61ed2 1341 if (do_utf8) {
8269fa76 1342 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1343 while (s < strend) {
3568d838 1344 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1345 if (tmp && (norun || regtry(prog, s)))
1346 goto got_it;
1347 else
1348 tmp = doevery;
1349 }
a0d0e21e 1350 else
ffc61ed2
JH
1351 tmp = 1;
1352 s += UTF8SKIP(s);
a687059c 1353 }
a0d0e21e 1354 }
ffc61ed2
JH
1355 else {
1356 while (s < strend) {
1357 if (!isDIGIT(*s)) {
1358 if (tmp && (norun || regtry(prog, s)))
1359 goto got_it;
1360 else
1361 tmp = doevery;
1362 }
a0ed51b3 1363 else
ffc61ed2
JH
1364 tmp = 1;
1365 s++;
a0ed51b3 1366 }
a0ed51b3
LW
1367 }
1368 break;
b8c5462f
JH
1369 case NDIGITL:
1370 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1371 if (do_utf8) {
1372 while (s < strend) {
1373 if (!isDIGIT_LC_utf8((U8*)s)) {
1374 if (tmp && (norun || regtry(prog, s)))
1375 goto got_it;
1376 else
1377 tmp = doevery;
1378 }
b8c5462f 1379 else
ffc61ed2
JH
1380 tmp = 1;
1381 s += UTF8SKIP(s);
b8c5462f 1382 }
a0ed51b3 1383 }
ffc61ed2
JH
1384 else {
1385 while (s < strend) {
1386 if (!isDIGIT_LC(*s)) {
1387 if (tmp && (norun || regtry(prog, s)))
1388 goto got_it;
1389 else
1390 tmp = doevery;
1391 }
cf93c79d 1392 else
ffc61ed2
JH
1393 tmp = 1;
1394 s++;
b8c5462f 1395 }
b8c5462f
JH
1396 }
1397 break;
b3c9acc1 1398 default:
3c3eec57
GS
1399 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1400 break;
d6a28714 1401 }
6eb5f6b9
JH
1402 return 0;
1403 got_it:
1404 return s;
1405}
1406
1407/*
1408 - regexec_flags - match a regexp against a string
1409 */
1410I32
1411Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1412 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1413/* strend: pointer to null at end of string */
1414/* strbeg: real beginning of string */
1415/* minend: end of match must be >=minend after stringarg. */
1416/* data: May be used for some additional optimizations. */
1417/* nosave: For optimizations. */
1418{
6eb5f6b9
JH
1419 register char *s;
1420 register regnode *c;
1421 register char *startpos = stringarg;
6eb5f6b9
JH
1422 I32 minlen; /* must match at least this many chars */
1423 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1424 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1425 constant substr. */ /* CC */
1426 I32 end_shift = 0; /* Same for the end. */ /* CC */
1427 I32 scream_pos = -1; /* Internal iterator of scream. */
1428 char *scream_olds;
1429 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1430 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1431
1432 PL_regcc = 0;
1433
1434 cache_re(prog);
1435#ifdef DEBUGGING
aea4f609 1436 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1437#endif
1438
1439 /* Be paranoid... */
1440 if (prog == NULL || startpos == NULL) {
1441 Perl_croak(aTHX_ "NULL regexp parameter");
1442 return 0;
1443 }
1444
1445 minlen = prog->minlen;
f33976b4 1446 if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
3baa4c62 1447 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1448 }
1449 else {
f33976b4 1450 if (strend - startpos < minlen) goto phooey;
1aa99e6b 1451 }
6eb5f6b9 1452
6eb5f6b9
JH
1453 /* Check validity of program. */
1454 if (UCHARAT(prog->program) != REG_MAGIC) {
1455 Perl_croak(aTHX_ "corrupted regexp program");
1456 }
1457
1458 PL_reg_flags = 0;
1459 PL_reg_eval_set = 0;
1460 PL_reg_maxiter = 0;
1461
1462 if (prog->reganch & ROPT_UTF8)
1463 PL_reg_flags |= RF_utf8;
1464
1465 /* Mark beginning of line for ^ and lookbehind. */
1466 PL_regbol = startpos;
1467 PL_bostr = strbeg;
1468 PL_reg_sv = sv;
1469
1470 /* Mark end of line for $ (and such) */
1471 PL_regeol = strend;
1472
1473 /* see how far we have to get to not match where we matched before */
1474 PL_regtill = startpos+minend;
1475
1476 /* We start without call_cc context. */
1477 PL_reg_call_cc = 0;
1478
1479 /* If there is a "must appear" string, look for it. */
1480 s = startpos;
1481
1482 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1483 MAGIC *mg;
1484
1485 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1486 PL_reg_ganch = startpos;
1487 else if (sv && SvTYPE(sv) >= SVt_PVMG
1488 && SvMAGIC(sv)
14befaf4
DM
1489 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1490 && mg->mg_len >= 0) {
6eb5f6b9
JH
1491 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1492 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1493 if (s > PL_reg_ganch)
6eb5f6b9
JH
1494 goto phooey;
1495 s = PL_reg_ganch;
1496 }
1497 }
1498 else /* pos() not defined */
1499 PL_reg_ganch = strbeg;
1500 }
1501
699c3c34
JH
1502 if (do_utf8 == (UTF!=0) &&
1503 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1504 re_scream_pos_data d;
1505
1506 d.scream_olds = &scream_olds;
1507 d.scream_pos = &scream_pos;
1508 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1509 if (!s)
1510 goto phooey; /* not present */
1511 }
1512
1513 DEBUG_r( if (!PL_colorset) reginitcolors() );
1514 DEBUG_r(PerlIO_printf(Perl_debug_log,
1515 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1516 PL_colors[4],PL_colors[5],PL_colors[0],
1517 prog->precomp,
1518 PL_colors[1],
1519 (strlen(prog->precomp) > 60 ? "..." : ""),
1520 PL_colors[0],
1521 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1522 startpos, PL_colors[1],
1523 (strend - startpos > 60 ? "..." : ""))
1524 );
1525
1526 /* Simplest case: anchored match need be tried only once. */
1527 /* [unless only anchor is BOL and multiline is set] */
1528 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1529 if (s == startpos && regtry(prog, startpos))
1530 goto got_it;
1531 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1532 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1533 {
1534 char *end;
1535
1536 if (minlen)
1537 dontbother = minlen - 1;
1aa99e6b 1538 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1539 /* for multiline we only have to try after newlines */
1540 if (prog->check_substr) {
1541 if (s == startpos)
1542 goto after_try;
1543 while (1) {
1544 if (regtry(prog, s))
1545 goto got_it;
1546 after_try:
1547 if (s >= end)
1548 goto phooey;
1549 if (prog->reganch & RE_USE_INTUIT) {
1550 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1551 if (!s)
1552 goto phooey;
1553 }
1554 else
1555 s++;
1556 }
1557 } else {
1558 if (s > startpos)
1559 s--;
1560 while (s < end) {
1561 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1562 if (regtry(prog, s))
1563 goto got_it;
1564 }
1565 }
1566 }
1567 }
1568 goto phooey;
1569 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1570 if (regtry(prog, PL_reg_ganch))
1571 goto got_it;
1572 goto phooey;
1573 }
1574
1575 /* Messy cases: unanchored match. */
9041c2e3 1576 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1577 /* we have /x+whatever/ */
1578 /* it must be a one character string (XXXX Except UTF?) */
1579 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1580#ifdef DEBUGGING
1581 int did_match = 0;
1582#endif
1583
1aa99e6b 1584 if (do_utf8) {
6eb5f6b9
JH
1585 while (s < strend) {
1586 if (*s == ch) {
bf93d4cc 1587 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1588 if (regtry(prog, s)) goto got_it;
1589 s += UTF8SKIP(s);
1590 while (s < strend && *s == ch)
1591 s += UTF8SKIP(s);
1592 }
1593 s += UTF8SKIP(s);
1594 }
1595 }
1596 else {
1597 while (s < strend) {
1598 if (*s == ch) {
bf93d4cc 1599 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1600 if (regtry(prog, s)) goto got_it;
1601 s++;
1602 while (s < strend && *s == ch)
1603 s++;
1604 }
1605 s++;
1606 }
1607 }
b7953727 1608 DEBUG_r(if (!did_match)
bf93d4cc 1609 PerlIO_printf(Perl_debug_log,
b7953727
JH
1610 "Did not find anchored character...\n")
1611 );
6eb5f6b9
JH
1612 }
1613 /*SUPPRESS 560*/
1aa99e6b
IH
1614 else if (do_utf8 == (UTF!=0) &&
1615 (prog->anchored_substr != Nullsv
9041c2e3 1616 || (prog->float_substr != Nullsv
1aa99e6b 1617 && prog->float_max_offset < strend - s))) {
9041c2e3 1618 SV *must = prog->anchored_substr
6eb5f6b9 1619 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1620 I32 back_max =
6eb5f6b9 1621 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1622 I32 back_min =
6eb5f6b9 1623 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1624 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1625 -(I32)(CHR_SVLEN(must)
1aa99e6b 1626 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1627 char *last1; /* Last position checked before */
bf93d4cc
GS
1628#ifdef DEBUGGING
1629 int did_match = 0;
1630#endif
6eb5f6b9
JH
1631
1632 if (s > PL_bostr)
1633 last1 = HOPc(s, -1);
1634 else
1635 last1 = s - 1; /* bogus */
1636
1637 /* XXXX check_substr already used to find `s', can optimize if
1638 check_substr==must. */
1639 scream_pos = -1;
1640 dontbother = end_shift;
1641 strend = HOPc(strend, -dontbother);
1642 while ( (s <= last) &&
9041c2e3 1643 ((flags & REXEC_SCREAM)
1aa99e6b 1644 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1645 end_shift, &scream_pos, 0))
1aa99e6b 1646 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1647 (unsigned char*)strend, must,
6eb5f6b9 1648 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1649 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1650 if (HOPc(s, -back_max) > last1) {
1651 last1 = HOPc(s, -back_min);
1652 s = HOPc(s, -back_max);
1653 }
1654 else {
1655 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1656
1657 last1 = HOPc(s, -back_min);
1658 s = t;
1659 }
1aa99e6b 1660 if (do_utf8) {
6eb5f6b9
JH
1661 while (s <= last1) {
1662 if (regtry(prog, s))
1663 goto got_it;
1664 s += UTF8SKIP(s);
1665 }
1666 }
1667 else {
1668 while (s <= last1) {
1669 if (regtry(prog, s))
1670 goto got_it;
1671 s++;
1672 }
1673 }
1674 }
b7953727
JH
1675 DEBUG_r(if (!did_match)
1676 PerlIO_printf(Perl_debug_log,
1677 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1678 ((must == prog->anchored_substr)
1679 ? "anchored" : "floating"),
1680 PL_colors[0],
1681 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1682 SvPVX(must),
b7953727
JH
1683 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1684 );
6eb5f6b9
JH
1685 goto phooey;
1686 }
155aba94 1687 else if ((c = prog->regstclass)) {
66e933ab
GS
1688 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1689 /* don't bother with what can't match */
6eb5f6b9 1690 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1691 DEBUG_r({
1692 SV *prop = sv_newmortal();
1693 regprop(prop, c);
1694 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1695 });
6eb5f6b9
JH
1696 if (find_byclass(prog, c, s, strend, startpos, 0))
1697 goto got_it;
bf93d4cc 1698 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1699 }
1700 else {
1701 dontbother = 0;
1702 if (prog->float_substr != Nullsv) { /* Trim the end. */
1703 char *last;
d6a28714
JH
1704
1705 if (flags & REXEC_SCREAM) {
1706 last = screaminstr(sv, prog->float_substr, s - strbeg,
1707 end_shift, &scream_pos, 1); /* last one */
1708 if (!last)
ffc61ed2 1709 last = scream_olds; /* Only one occurrence. */
b8c5462f 1710 }
d6a28714
JH
1711 else {
1712 STRLEN len;
1713 char *little = SvPV(prog->float_substr, len);
1714
1715 if (SvTAIL(prog->float_substr)) {
1716 if (memEQ(strend - len + 1, little, len - 1))
1717 last = strend - len + 1;
1718 else if (!PL_multiline)
9041c2e3 1719 last = memEQ(strend - len, little, len)
d6a28714 1720 ? strend - len : Nullch;
b8c5462f 1721 else
d6a28714
JH
1722 goto find_last;
1723 } else {
1724 find_last:
9041c2e3 1725 if (len)
d6a28714 1726 last = rninstr(s, strend, little, little + len);
b8c5462f 1727 else
d6a28714 1728 last = strend; /* matching `$' */
b8c5462f 1729 }
b8c5462f 1730 }
bf93d4cc
GS
1731 if (last == NULL) {
1732 DEBUG_r(PerlIO_printf(Perl_debug_log,
1733 "%sCan't trim the tail, match fails (should not happen)%s\n",
1734 PL_colors[4],PL_colors[5]));
1735 goto phooey; /* Should not happen! */
1736 }
d6a28714
JH
1737 dontbother = strend - last + prog->float_min_offset;
1738 }
1739 if (minlen && (dontbother < minlen))
1740 dontbother = minlen - 1;
1741 strend -= dontbother; /* this one's always in bytes! */
1742 /* We don't know much -- general case. */
1aa99e6b 1743 if (do_utf8) {
d6a28714
JH
1744 for (;;) {
1745 if (regtry(prog, s))
1746 goto got_it;
1747 if (s >= strend)
1748 break;
b8c5462f 1749 s += UTF8SKIP(s);
d6a28714
JH
1750 };
1751 }
1752 else {
1753 do {
1754 if (regtry(prog, s))
1755 goto got_it;
1756 } while (s++ < strend);
1757 }
1758 }
1759
1760 /* Failure. */
1761 goto phooey;
1762
1763got_it:
1764 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1765
1766 if (PL_reg_eval_set) {
1767 /* Preserve the current value of $^R */
1768 if (oreplsv != GvSV(PL_replgv))
1769 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1770 restored, the value remains
1771 the same. */
acfe0abc 1772 restore_pos(aTHX_ 0);
d6a28714
JH
1773 }
1774
1775 /* make sure $`, $&, $', and $digit will work later */
1776 if ( !(flags & REXEC_NOT_FIRST) ) {
1777 if (RX_MATCH_COPIED(prog)) {
1778 Safefree(prog->subbeg);
1779 RX_MATCH_COPIED_off(prog);
1780 }
1781 if (flags & REXEC_COPY_STR) {
1782 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1783
1784 s = savepvn(strbeg, i);
1785 prog->subbeg = s;
1786 prog->sublen = i;
1787 RX_MATCH_COPIED_on(prog);
1788 }
1789 else {
1790 prog->subbeg = strbeg;
1791 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1792 }
1793 }
9041c2e3 1794
d6a28714
JH
1795 return 1;
1796
1797phooey:
bf93d4cc
GS
1798 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1799 PL_colors[4],PL_colors[5]));
d6a28714 1800 if (PL_reg_eval_set)
acfe0abc 1801 restore_pos(aTHX_ 0);
d6a28714
JH
1802 return 0;
1803}
1804
1805/*
1806 - regtry - try match at specific point
1807 */
1808STATIC I32 /* 0 failure, 1 success */
1809S_regtry(pTHX_ regexp *prog, char *startpos)
1810{
d6a28714
JH
1811 register I32 i;
1812 register I32 *sp;
1813 register I32 *ep;
1814 CHECKPOINT lastcp;
1815
02db2b7b
IZ
1816#ifdef DEBUGGING
1817 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1818#endif
d6a28714
JH
1819 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1820 MAGIC *mg;
1821
1822 PL_reg_eval_set = RS_init;
1823 DEBUG_r(DEBUG_s(
b900a521
JH
1824 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1825 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1826 ));
e8347627 1827 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1828 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1829 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1830 SAVETMPS;
1831 /* Apparently this is not needed, judging by wantarray. */
e8347627 1832 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1833 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1834
1835 if (PL_reg_sv) {
1836 /* Make $_ available to executed code. */
1837 if (PL_reg_sv != DEFSV) {
4d1ff10f 1838 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1839 SAVESPTR(DEFSV);
1840 DEFSV = PL_reg_sv;
b8c5462f 1841 }
d6a28714 1842
9041c2e3 1843 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1844 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1845 /* prepare for quick setting of pos */
14befaf4
DM
1846 sv_magic(PL_reg_sv, (SV*)0,
1847 PERL_MAGIC_regex_global, Nullch, 0);
1848 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1849 mg->mg_len = -1;
b8c5462f 1850 }
d6a28714
JH
1851 PL_reg_magic = mg;
1852 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1853 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1854 }
09687e5a 1855 if (!PL_reg_curpm) {
0f79a09d 1856 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1857#ifdef USE_ITHREADS
1858 {
1859 SV* repointer = newSViv(0);
577e12cc 1860 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1861 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1862 av_push(PL_regex_padav,repointer);
1863 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1864 PL_regex_pad = AvARRAY(PL_regex_padav);
1865 }
1866#endif
1867 }
aaa362c4 1868 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1869 PL_reg_oldcurpm = PL_curpm;
1870 PL_curpm = PL_reg_curpm;
1871 if (RX_MATCH_COPIED(prog)) {
1872 /* Here is a serious problem: we cannot rewrite subbeg,
1873 since it may be needed if this match fails. Thus
1874 $` inside (?{}) could fail... */
1875 PL_reg_oldsaved = prog->subbeg;
1876 PL_reg_oldsavedlen = prog->sublen;
1877 RX_MATCH_COPIED_off(prog);
1878 }
1879 else
1880 PL_reg_oldsaved = Nullch;
1881 prog->subbeg = PL_bostr;
1882 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1883 }
1884 prog->startp[0] = startpos - PL_bostr;
1885 PL_reginput = startpos;
1886 PL_regstartp = prog->startp;
1887 PL_regendp = prog->endp;
1888 PL_reglastparen = &prog->lastparen;
a01268b5 1889 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1890 prog->lastparen = 0;
1891 PL_regsize = 0;
1892 DEBUG_r(PL_reg_starttry = startpos);
1893 if (PL_reg_start_tmpl <= prog->nparens) {
1894 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1895 if(PL_reg_start_tmp)
1896 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1897 else
1898 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1899 }
1900
1901 /* XXXX What this code is doing here?!!! There should be no need
1902 to do this again and again, PL_reglastparen should take care of
3dd2943c 1903 this! --ilya*/
dafc8851
JH
1904
1905 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1906 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1907 * PL_reglastparen), is not needed at all by the test suite
1908 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1909 * enough, for building DynaLoader, or otherwise this
1910 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1911 * will happen. Meanwhile, this code *is* needed for the
1912 * above-mentioned test suite tests to succeed. The common theme
1913 * on those tests seems to be returning null fields from matches.
1914 * --jhi */
dafc8851 1915#if 1
d6a28714
JH
1916 sp = prog->startp;
1917 ep = prog->endp;
1918 if (prog->nparens) {
09e8ae3b 1919 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1920 *++sp = -1;
1921 *++ep = -1;
1922 }
1923 }
dafc8851 1924#endif
02db2b7b 1925 REGCP_SET(lastcp);
d6a28714
JH
1926 if (regmatch(prog->program + 1)) {
1927 prog->endp[0] = PL_reginput - PL_bostr;
1928 return 1;
1929 }
02db2b7b 1930 REGCP_UNWIND(lastcp);
d6a28714
JH
1931 return 0;
1932}
1933
02db2b7b
IZ
1934#define RE_UNWIND_BRANCH 1
1935#define RE_UNWIND_BRANCHJ 2
1936
1937union re_unwind_t;
1938
1939typedef struct { /* XX: makes sense to enlarge it... */
1940 I32 type;
1941 I32 prev;
1942 CHECKPOINT lastcp;
1943} re_unwind_generic_t;
1944
1945typedef struct {
1946 I32 type;
1947 I32 prev;
1948 CHECKPOINT lastcp;
1949 I32 lastparen;
1950 regnode *next;
1951 char *locinput;
1952 I32 nextchr;
1953#ifdef DEBUGGING
1954 int regindent;
1955#endif
1956} re_unwind_branch_t;
1957
1958typedef union re_unwind_t {
1959 I32 type;
1960 re_unwind_generic_t generic;
1961 re_unwind_branch_t branch;
1962} re_unwind_t;
1963
8ba1375e
MJD
1964#define sayYES goto yes
1965#define sayNO goto no
1966#define sayYES_FINAL goto yes_final
1967#define sayYES_LOUD goto yes_loud
1968#define sayNO_FINAL goto no_final
1969#define sayNO_SILENT goto do_no
1970#define saySAME(x) if (x) goto yes; else goto no
1971
1972#define REPORT_CODE_OFF 24
1973
d6a28714
JH
1974/*
1975 - regmatch - main matching routine
1976 *
1977 * Conceptually the strategy is simple: check to see whether the current
1978 * node matches, call self recursively to see whether the rest matches,
1979 * and then act accordingly. In practice we make some effort to avoid
1980 * recursion, in particular by going through "ordinary" nodes (that don't
1981 * need to know whether the rest of the match failed) by a loop instead of
1982 * by recursion.
1983 */
1984/* [lwall] I've hoisted the register declarations to the outer block in order to
1985 * maybe save a little bit of pushing and popping on the stack. It also takes
1986 * advantage of machines that use a register save mask on subroutine entry.
1987 */
1988STATIC I32 /* 0 failure, 1 success */
1989S_regmatch(pTHX_ regnode *prog)
1990{
d6a28714
JH
1991 register regnode *scan; /* Current node. */
1992 regnode *next; /* Next node. */
1993 regnode *inner; /* Next node in internal branch. */
1994 register I32 nextchr; /* renamed nextchr - nextchar colides with
1995 function of same name */
1996 register I32 n; /* no or next */
b7953727
JH
1997 register I32 ln = 0; /* len or last */
1998 register char *s = Nullch; /* operand or save */
d6a28714 1999 register char *locinput = PL_reginput;
b7953727 2000 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2001 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2002 I32 unwind = 0;
b7953727 2003#if 0
02db2b7b 2004 I32 firstcp = PL_savestack_ix;
b7953727 2005#endif
d9f424b2 2006 register bool do_utf8 = PL_reg_sv_utf8;
02db2b7b 2007
d6a28714
JH
2008#ifdef DEBUGGING
2009 PL_regindent++;
2010#endif
2011
2012 /* Note that nextchr is a byte even in UTF */
2013 nextchr = UCHARAT(locinput);
2014 scan = prog;
2015 while (scan != NULL) {
8ba1375e 2016
d6a28714
JH
2017 DEBUG_r( {
2018 SV *prop = sv_newmortal();
2019 int docolor = *PL_colors[0];
2020 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2021 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2022 /* The part of the string before starttry has one color
2023 (pref0_len chars), between starttry and current
2024 position another one (pref_len - pref0_len chars),
2025 after the current position the third one.
2026 We assume that pref0_len <= pref_len, otherwise we
2027 decrease pref0_len. */
9041c2e3 2028 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2029 ? (5 + taill) - l : locinput - PL_bostr;
2030 int pref0_len;
d6a28714 2031
1aa99e6b
IH
2032 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2033 pref_len++;
2034 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2035 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2036 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2037 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2038 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2039 l--;
d6a28714
JH
2040 if (pref0_len < 0)
2041 pref0_len = 0;
2042 if (pref0_len > pref_len)
2043 pref0_len = pref_len;
2044 regprop(prop, scan);
9041c2e3 2045 PerlIO_printf(Perl_debug_log,
b900a521 2046 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2047 (IV)(locinput - PL_bostr),
2048 PL_colors[4], pref0_len,
d6a28714 2049 locinput - pref_len, PL_colors[5],
9041c2e3 2050 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2051 locinput - pref_len + pref0_len, PL_colors[3],
2052 (docolor ? "" : "> <"),
2053 PL_colors[0], l, locinput, PL_colors[1],
2054 15 - l - pref_len + 1,
2055 "",
b900a521 2056 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2057 SvPVX(prop));
2058 } );
2059
2060 next = scan + NEXT_OFF(scan);
2061 if (next == scan)
2062 next = NULL;
2063
2064 switch (OP(scan)) {
2065 case BOL:
12d33761
HS
2066 if (locinput == PL_bostr || (PL_multiline &&
2067 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2068 {
2069 /* regtill = regbol; */
b8c5462f
JH
2070 break;
2071 }
d6a28714
JH
2072 sayNO;
2073 case MBOL:
12d33761
HS
2074 if (locinput == PL_bostr ||
2075 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2076 {
b8c5462f
JH
2077 break;
2078 }
d6a28714
JH
2079 sayNO;
2080 case SBOL:
c2a73568 2081 if (locinput == PL_bostr)
b8c5462f 2082 break;
d6a28714
JH
2083 sayNO;
2084 case GPOS:
2085 if (locinput == PL_reg_ganch)
2086 break;
2087 sayNO;
2088 case EOL:
2089 if (PL_multiline)
2090 goto meol;
2091 else
2092 goto seol;
2093 case MEOL:
2094 meol:
2095 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2096 sayNO;
b8c5462f 2097 break;
d6a28714
JH
2098 case SEOL:
2099 seol:
2100 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2101 sayNO;
d6a28714 2102 if (PL_regeol - locinput > 1)
b8c5462f 2103 sayNO;
b8c5462f 2104 break;
d6a28714
JH
2105 case EOS:
2106 if (PL_regeol != locinput)
b8c5462f 2107 sayNO;
d6a28714 2108 break;
ffc61ed2 2109 case SANY:
d6a28714 2110 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2111 sayNO;
f33976b4
DB
2112 if (do_utf8) {
2113 locinput += PL_utf8skip[nextchr];
2114 if (locinput > PL_regeol)
2115 sayNO;
2116 nextchr = UCHARAT(locinput);
2117 }
2118 else
2119 nextchr = UCHARAT(++locinput);
2120 break;
2121 case CANY:
2122 if (!nextchr && locinput >= PL_regeol)
2123 sayNO;
b8c5462f 2124 nextchr = UCHARAT(++locinput);
a0d0e21e 2125 break;
ffc61ed2 2126 case REG_ANY:
1aa99e6b
IH
2127 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2128 sayNO;
2129 if (do_utf8) {
b8c5462f 2130 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2131 if (locinput > PL_regeol)
2132 sayNO;
a0ed51b3 2133 nextchr = UCHARAT(locinput);
a0ed51b3 2134 }
1aa99e6b
IH
2135 else
2136 nextchr = UCHARAT(++locinput);
a0ed51b3 2137 break;
d6a28714 2138 case EXACT:
cd439c50
IZ
2139 s = STRING(scan);
2140 ln = STR_LEN(scan);
1aa99e6b
IH
2141 if (do_utf8 != (UTF!=0)) {
2142 char *l = locinput;
2143 char *e = s + ln;
2144 STRLEN len;
2145 if (do_utf8)
2146 while (s < e) {
2147 if (l >= PL_regeol)
2148 sayNO;
9041c2e3 2149 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2150 sayNO;
2151 s++;
2152 l += len;
2153 }
2154 else
2155 while (s < e) {
2156 if (l >= PL_regeol)
2157 sayNO;
9041c2e3 2158 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2159 sayNO;
2160 s += len;
2161 l++;
2162 }
2163 locinput = l;
2164 nextchr = UCHARAT(locinput);
2165 break;
2166 }
d6a28714
JH
2167 /* Inline the first character, for speed. */
2168 if (UCHARAT(s) != nextchr)
2169 sayNO;
2170 if (PL_regeol - locinput < ln)
2171 sayNO;
2172 if (ln > 1 && memNE(s, locinput, ln))
2173 sayNO;
2174 locinput += ln;
2175 nextchr = UCHARAT(locinput);
2176 break;
2177 case EXACTFL:
b8c5462f
JH
2178 PL_reg_flags |= RF_tainted;
2179 /* FALL THROUGH */
d6a28714 2180 case EXACTF:
cd439c50
IZ
2181 s = STRING(scan);
2182 ln = STR_LEN(scan);
d6a28714 2183
1aa99e6b 2184 if (do_utf8) {
d6a28714 2185 char *l = locinput;
1aa99e6b
IH
2186 char *e;
2187 e = s + ln;
d6a28714
JH
2188 c1 = OP(scan) == EXACTF;
2189 while (s < e) {
1aa99e6b 2190 if (l >= PL_regeol) {
d6a28714
JH
2191 sayNO;
2192 }
2b9d42f0 2193 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2194 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2195 sayNO;
2196 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2197 l += UTF8SKIP(l);
b8c5462f 2198 }
d6a28714 2199 locinput = l;
a0ed51b3
LW
2200 nextchr = UCHARAT(locinput);
2201 break;
2202 }
d6a28714
JH
2203
2204 /* Inline the first character, for speed. */
2205 if (UCHARAT(s) != nextchr &&
2206 UCHARAT(s) != ((OP(scan) == EXACTF)
2207 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2208 sayNO;
d6a28714 2209 if (PL_regeol - locinput < ln)
b8c5462f 2210 sayNO;
d6a28714
JH
2211 if (ln > 1 && (OP(scan) == EXACTF
2212 ? ibcmp(s, locinput, ln)
2213 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2214 sayNO;
d6a28714
JH
2215 locinput += ln;
2216 nextchr = UCHARAT(locinput);
a0d0e21e 2217 break;
d6a28714 2218 case ANYOF:
ffc61ed2
JH
2219 if (do_utf8) {
2220 if (!reginclass(scan, (U8*)locinput, do_utf8))
2221 sayNO;
2222 if (locinput >= PL_regeol)
2223 sayNO;
2224 locinput += PL_utf8skip[nextchr];
b8c5462f 2225 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2226 }
2227 else {
2228 if (nextchr < 0)
2229 nextchr = UCHARAT(locinput);
2230 if (!reginclass(scan, (U8*)locinput, do_utf8))
2231 sayNO;
2232 if (!nextchr && locinput >= PL_regeol)
2233 sayNO;
2234 nextchr = UCHARAT(++locinput);
2235 }
b8c5462f 2236 break;
d6a28714 2237 case ALNUML:
b8c5462f
JH
2238 PL_reg_flags |= RF_tainted;
2239 /* FALL THROUGH */
d6a28714 2240 case ALNUM:
b8c5462f 2241 if (!nextchr)
4633a7c4 2242 sayNO;
ffc61ed2 2243 if (do_utf8) {
ad24be35 2244 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2245 if (!(OP(scan) == ALNUM
3568d838 2246 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2247 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2248 {
2249 sayNO;
a0ed51b3 2250 }
b8c5462f 2251 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2252 nextchr = UCHARAT(locinput);
2253 break;
2254 }
ffc61ed2 2255 if (!(OP(scan) == ALNUM
d6a28714 2256 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2257 sayNO;
b8c5462f 2258 nextchr = UCHARAT(++locinput);
a0d0e21e 2259 break;
d6a28714 2260 case NALNUML:
b8c5462f
JH
2261 PL_reg_flags |= RF_tainted;
2262 /* FALL THROUGH */
d6a28714
JH
2263 case NALNUM:
2264 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2265 sayNO;
ffc61ed2 2266 if (do_utf8) {
8269fa76 2267 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2268 if (OP(scan) == NALNUM
3568d838 2269 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2270 : isALNUM_LC_utf8((U8*)locinput))
2271 {
b8c5462f 2272 sayNO;
d6a28714 2273 }
b8c5462f
JH
2274 locinput += PL_utf8skip[nextchr];
2275 nextchr = UCHARAT(locinput);
2276 break;
2277 }
ffc61ed2 2278 if (OP(scan) == NALNUM
d6a28714 2279 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2280 sayNO;
76e3520e 2281 nextchr = UCHARAT(++locinput);
a0d0e21e 2282 break;
d6a28714
JH
2283 case BOUNDL:
2284 case NBOUNDL:
3280af22 2285 PL_reg_flags |= RF_tainted;
bbce6d69 2286 /* FALL THROUGH */
d6a28714
JH
2287 case BOUND:
2288 case NBOUND:
2289 /* was last char in word? */
ffc61ed2 2290 if (do_utf8) {
12d33761
HS
2291 if (locinput == PL_bostr)
2292 ln = '\n';
ffc61ed2
JH
2293 else {
2294 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2295
2b9d42f0 2296 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2297 }
2298 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2299 ln = isALNUM_uni(ln);
8269fa76 2300 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2301 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2302 }
2303 else {
9041c2e3 2304 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2305 n = isALNUM_LC_utf8((U8*)locinput);
2306 }
a0ed51b3 2307 }
d6a28714 2308 else {
12d33761
HS
2309 ln = (locinput != PL_bostr) ?
2310 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2311 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2312 ln = isALNUM(ln);
2313 n = isALNUM(nextchr);
2314 }
2315 else {
2316 ln = isALNUM_LC(ln);
2317 n = isALNUM_LC(nextchr);
2318 }
d6a28714 2319 }
ffc61ed2
JH
2320 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2321 OP(scan) == BOUNDL))
2322 sayNO;
a0ed51b3 2323 break;
d6a28714 2324 case SPACEL:
3280af22 2325 PL_reg_flags |= RF_tainted;
bbce6d69 2326 /* FALL THROUGH */
d6a28714 2327 case SPACE:
9442cb0e 2328 if (!nextchr)
4633a7c4 2329 sayNO;
1aa99e6b 2330 if (do_utf8) {
fd400ab9 2331 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2332 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2333 if (!(OP(scan) == SPACE
3568d838 2334 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2335 : isSPACE_LC_utf8((U8*)locinput)))
2336 {
2337 sayNO;
2338 }
2339 locinput += PL_utf8skip[nextchr];
2340 nextchr = UCHARAT(locinput);
2341 break;
d6a28714 2342 }
ffc61ed2
JH
2343 if (!(OP(scan) == SPACE
2344 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2345 sayNO;
2346 nextchr = UCHARAT(++locinput);
2347 }
2348 else {
2349 if (!(OP(scan) == SPACE
2350 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2351 sayNO;
2352 nextchr = UCHARAT(++locinput);
a0ed51b3 2353 }
a0ed51b3 2354 break;
d6a28714 2355 case NSPACEL:
3280af22 2356 PL_reg_flags |= RF_tainted;
bbce6d69 2357 /* FALL THROUGH */
d6a28714 2358 case NSPACE:
9442cb0e 2359 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2360 sayNO;
1aa99e6b 2361 if (do_utf8) {
8269fa76 2362 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2363 if (OP(scan) == NSPACE
3568d838 2364 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2365 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2366 {
2367 sayNO;
2368 }
2369 locinput += PL_utf8skip[nextchr];
2370 nextchr = UCHARAT(locinput);
2371 break;
a0ed51b3 2372 }
ffc61ed2 2373 if (OP(scan) == NSPACE
d6a28714 2374 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2375 sayNO;
76e3520e 2376 nextchr = UCHARAT(++locinput);
a0d0e21e 2377 break;
d6a28714 2378 case DIGITL:
a0ed51b3
LW
2379 PL_reg_flags |= RF_tainted;
2380 /* FALL THROUGH */
d6a28714 2381 case DIGIT:
9442cb0e 2382 if (!nextchr)
a0ed51b3 2383 sayNO;
1aa99e6b 2384 if (do_utf8) {
8269fa76 2385 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2386 if (!(OP(scan) == DIGIT
3568d838 2387 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2388 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2389 {
a0ed51b3 2390 sayNO;
dfe13c55 2391 }
6f06b55f 2392 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2393 nextchr = UCHARAT(locinput);
2394 break;
2395 }
ffc61ed2 2396 if (!(OP(scan) == DIGIT
9442cb0e 2397 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2398 sayNO;
2399 nextchr = UCHARAT(++locinput);
2400 break;
d6a28714 2401 case NDIGITL:
b8c5462f
JH
2402 PL_reg_flags |= RF_tainted;
2403 /* FALL THROUGH */
d6a28714 2404 case NDIGIT:
9442cb0e 2405 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2406 sayNO;
1aa99e6b 2407 if (do_utf8) {
8269fa76 2408 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2409 if (OP(scan) == NDIGIT
3568d838 2410 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2411 : isDIGIT_LC_utf8((U8*)locinput))
2412 {
a0ed51b3 2413 sayNO;
9442cb0e 2414 }
6f06b55f 2415 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2416 nextchr = UCHARAT(locinput);
2417 break;
2418 }
ffc61ed2 2419 if (OP(scan) == NDIGIT
9442cb0e 2420 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2421 sayNO;
2422 nextchr = UCHARAT(++locinput);
2423 break;
2424 case CLUMP:
8269fa76 2425 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2426 if (locinput >= PL_regeol ||
2427 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2428 sayNO;
6f06b55f 2429 locinput += PL_utf8skip[nextchr];
3568d838
JH
2430 while (locinput < PL_regeol &&
2431 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2432 locinput += UTF8SKIP(locinput);
2433 if (locinput > PL_regeol)
2434 sayNO;
2435 nextchr = UCHARAT(locinput);
2436 break;
c8756f30 2437 case REFFL:
3280af22 2438 PL_reg_flags |= RF_tainted;
c8756f30 2439 /* FALL THROUGH */
c277df42 2440 case REF:
c8756f30 2441 case REFF:
c277df42 2442 n = ARG(scan); /* which paren pair */
cf93c79d 2443 ln = PL_regstartp[n];
2c2d71f5 2444 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2445 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2446 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2447 if (ln == PL_regendp[n])
a0d0e21e 2448 break;
a0ed51b3 2449
cf93c79d 2450 s = PL_bostr + ln;
1aa99e6b 2451 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2452 char *l = locinput;
cf93c79d 2453 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2454 /*
2455 * Note that we can't do the "other character" lookup trick as
2456 * in the 8-bit case (no pun intended) because in Unicode we
2457 * have to map both upper and title case to lower case.
2458 */
2459 if (OP(scan) == REFF) {
2460 while (s < e) {
2461 if (l >= PL_regeol)
2462 sayNO;
dfe13c55 2463 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2464 sayNO;
2465 s += UTF8SKIP(s);
2466 l += UTF8SKIP(l);
2467 }
2468 }
2469 else {
2470 while (s < e) {
2471 if (l >= PL_regeol)
2472 sayNO;
dfe13c55 2473 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2474 sayNO;
2475 s += UTF8SKIP(s);
2476 l += UTF8SKIP(l);
2477 }
2478 }
2479 locinput = l;
2480 nextchr = UCHARAT(locinput);
2481 break;
2482 }
2483
a0d0e21e 2484 /* Inline the first character, for speed. */
76e3520e 2485 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2486 (OP(scan) == REF ||
2487 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2488 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2489 sayNO;
cf93c79d 2490 ln = PL_regendp[n] - ln;
3280af22 2491 if (locinput + ln > PL_regeol)
4633a7c4 2492 sayNO;
c8756f30
AK
2493 if (ln > 1 && (OP(scan) == REF
2494 ? memNE(s, locinput, ln)
2495 : (OP(scan) == REFF
2496 ? ibcmp(s, locinput, ln)
2497 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2498 sayNO;
a0d0e21e 2499 locinput += ln;
76e3520e 2500 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2501 break;
2502
2503 case NOTHING:
c277df42 2504 case TAIL:
a0d0e21e
LW
2505 break;
2506 case BACK:
2507 break;
c277df42
IZ
2508 case EVAL:
2509 {
2510 dSP;
533c011a 2511 OP_4tree *oop = PL_op;
3280af22
NIS
2512 COP *ocurcop = PL_curcop;
2513 SV **ocurpad = PL_curpad;
c277df42 2514 SV *ret;
9041c2e3 2515
c277df42 2516 n = ARG(scan);
533c011a 2517 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2518 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2519 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2520 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2521
cea2e8a9 2522 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2523 SPAGAIN;
2524 ret = POPs;
2525 PUTBACK;
9041c2e3 2526
0f5d15d6
IZ
2527 PL_op = oop;
2528 PL_curpad = ocurpad;
2529 PL_curcop = ocurcop;
c277df42 2530 if (logical) {
0f5d15d6
IZ
2531 if (logical == 2) { /* Postponed subexpression. */
2532 regexp *re;
22c35a8c 2533 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2534 re_cc_state state;
0f5d15d6
IZ
2535 CHECKPOINT cp, lastcp;
2536
2537 if(SvROK(ret) || SvRMAGICAL(ret)) {
2538 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2539
2540 if(SvMAGICAL(sv))
14befaf4 2541 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2542 }
2543 if (mg) {
2544 re = (regexp *)mg->mg_obj;
df0003d4 2545 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2546 }
2547 else {
2548 STRLEN len;
2549 char *t = SvPV(ret, len);
2550 PMOP pm;
2551 char *oprecomp = PL_regprecomp;
2552 I32 osize = PL_regsize;
2553 I32 onpar = PL_regnpar;
2554
5fcd1c1b 2555 Zero(&pm, 1, PMOP);
cea2e8a9 2556 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2557 if (!(SvFLAGS(ret)
0f5d15d6 2558 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2559 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2560 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2561 PL_regprecomp = oprecomp;
2562 PL_regsize = osize;
2563 PL_regnpar = onpar;
2564 }
2565 DEBUG_r(
9041c2e3 2566 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2567 "Entering embedded `%s%.60s%s%s'\n",
2568 PL_colors[0],
2569 re->precomp,
2570 PL_colors[1],
2571 (strlen(re->precomp) > 60 ? "..." : ""))
2572 );
2573 state.node = next;
2574 state.prev = PL_reg_call_cc;
2575 state.cc = PL_regcc;
2576 state.re = PL_reg_re;
2577
2ab05381 2578 PL_regcc = 0;
9041c2e3 2579
0f5d15d6 2580 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2581 REGCP_SET(lastcp);
0f5d15d6
IZ
2582 cache_re(re);
2583 state.ss = PL_savestack_ix;
2584 *PL_reglastparen = 0;
a01268b5 2585 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2586 PL_reg_call_cc = &state;
2587 PL_reginput = locinput;
2c2d71f5
JH
2588
2589 /* XXXX This is too dramatic a measure... */
2590 PL_reg_maxiter = 0;
2591
0f5d15d6 2592 if (regmatch(re->program + 1)) {
2c914db6
IZ
2593 /* Even though we succeeded, we need to restore
2594 global variables, since we may be wrapped inside
2595 SUSPEND, thus the match may be not finished yet. */
2596
2597 /* XXXX Do this only if SUSPENDed? */
2598 PL_reg_call_cc = state.prev;
2599 PL_regcc = state.cc;
2600 PL_reg_re = state.re;
2601 cache_re(PL_reg_re);
2602
2603 /* XXXX This is too dramatic a measure... */
2604 PL_reg_maxiter = 0;
2605
2606 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2607 ReREFCNT_dec(re);
2608 regcpblow(cp);
2609 sayYES;
2610 }
0f5d15d6 2611 ReREFCNT_dec(re);
02db2b7b 2612 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2613 regcppop();
2614 PL_reg_call_cc = state.prev;
2615 PL_regcc = state.cc;
2616 PL_reg_re = state.re;
d3790889 2617 cache_re(PL_reg_re);
2c2d71f5
JH
2618
2619 /* XXXX This is too dramatic a measure... */
2620 PL_reg_maxiter = 0;
2621
8e514ae6 2622 logical = 0;
0f5d15d6
IZ
2623 sayNO;
2624 }
c277df42 2625 sw = SvTRUE(ret);
0f5d15d6 2626 logical = 0;
a0ed51b3
LW
2627 }
2628 else
3280af22 2629 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2630 break;
2631 }
a0d0e21e 2632 case OPEN:
c277df42 2633 n = ARG(scan); /* which paren pair */
3280af22
NIS
2634 PL_reg_start_tmp[n] = locinput;
2635 if (n > PL_regsize)
2636 PL_regsize = n;
a0d0e21e
LW
2637 break;
2638 case CLOSE:
c277df42 2639 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2640 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2641 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2642 if (n > *PL_reglastparen)
2643 *PL_reglastparen = n;
a01268b5 2644 *PL_reglastcloseparen = n;
a0d0e21e 2645 break;
c277df42
IZ
2646 case GROUPP:
2647 n = ARG(scan); /* which paren pair */
cf93c79d 2648 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2649 break;
2650 case IFTHEN:
2c2d71f5 2651 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2652 if (sw)
2653 next = NEXTOPER(NEXTOPER(scan));
2654 else {
2655 next = scan + ARG(scan);
2656 if (OP(next) == IFTHEN) /* Fake one. */
2657 next = NEXTOPER(NEXTOPER(next));
2658 }
2659 break;
2660 case LOGICAL:
0f5d15d6 2661 logical = scan->flags;
c277df42 2662 break;
2ab05381
IZ
2663/*******************************************************************
2664 PL_regcc contains infoblock about the innermost (...)* loop, and
2665 a pointer to the next outer infoblock.
2666
2667 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2668
2669 1) After matching X, regnode for CURLYX is processed;
2670
9041c2e3 2671 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2672 regmatch() recursively with the starting point at WHILEM node;
2673
2674 3) Each hit of WHILEM node tries to match A and Z (in the order
2675 depending on the current iteration, min/max of {min,max} and
2676 greediness). The information about where are nodes for "A"
2677 and "Z" is read from the infoblock, as is info on how many times "A"
2678 was already matched, and greediness.
2679
2680 4) After A matches, the same WHILEM node is hit again.
2681
2682 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2683 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2684 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2685 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2686 of the external loop.
2687
2688 Currently present infoblocks form a tree with a stem formed by PL_curcc
2689 and whatever it mentions via ->next, and additional attached trees
2690 corresponding to temporarily unset infoblocks as in "5" above.
2691
9041c2e3 2692 In the following picture infoblocks for outer loop of
2ab05381
IZ
2693 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2694 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2695 infoblocks are drawn below the "reset" infoblock.
2696
2697 In fact in the picture below we do not show failed matches for Z and T
2698 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2699 more obvious *why* one needs to *temporary* unset infoblocks.]
2700
2701 Matched REx position InfoBlocks Comment
2702 (Y(A)*?Z)*?T x
2703 Y(A)*?Z)*?T x <- O
2704 Y (A)*?Z)*?T x <- O
2705 Y A)*?Z)*?T x <- O <- I
2706 YA )*?Z)*?T x <- O <- I
2707 YA A)*?Z)*?T x <- O <- I
2708 YAA )*?Z)*?T x <- O <- I
2709 YAA Z)*?T x <- O # Temporary unset I
2710 I
2711
2712 YAAZ Y(A)*?Z)*?T x <- O
2713 I
2714
2715 YAAZY (A)*?Z)*?T x <- O
2716 I
2717
2718 YAAZY A)*?Z)*?T x <- O <- I
2719 I
2720
2721 YAAZYA )*?Z)*?T x <- O <- I
2722 I
2723
2724 YAAZYA Z)*?T x <- O # Temporary unset I
2725 I,I
2726
2727 YAAZYAZ )*?T x <- O
2728 I,I
2729
2730 YAAZYAZ T x # Temporary unset O
2731 O
2732 I,I
2733
2734 YAAZYAZT x
2735 O
2736 I,I
2737 *******************************************************************/
a0d0e21e
LW
2738 case CURLYX: {
2739 CURCUR cc;
3280af22 2740 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2741 /* No need to save/restore up to this paren */
2742 I32 parenfloor = scan->flags;
c277df42
IZ
2743
2744 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2745 next += ARG(next);
3280af22
NIS
2746 cc.oldcc = PL_regcc;
2747 PL_regcc = &cc;
cb434fcc
IZ
2748 /* XXXX Probably it is better to teach regpush to support
2749 parenfloor > PL_regsize... */
2750 if (parenfloor > *PL_reglastparen)
2751 parenfloor = *PL_reglastparen; /* Pessimization... */
2752 cc.parenfloor = parenfloor;
a0d0e21e
LW
2753 cc.cur = -1;
2754 cc.min = ARG1(scan);
2755 cc.max = ARG2(scan);
c277df42 2756 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2757 cc.next = next;
2758 cc.minmod = minmod;
2759 cc.lastloc = 0;
3280af22 2760 PL_reginput = locinput;
a0d0e21e
LW
2761 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2762 regcpblow(cp);
3280af22 2763 PL_regcc = cc.oldcc;
4633a7c4 2764 saySAME(n);
a0d0e21e
LW
2765 }
2766 /* NOT REACHED */
2767 case WHILEM: {
2768 /*
2769 * This is really hard to understand, because after we match
2770 * what we're trying to match, we must make sure the rest of
2c2d71f5 2771 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2772 * to go back UP the parse tree by recursing ever deeper. And
2773 * if it fails, we have to reset our parent's current state
2774 * that we can try again after backing off.
2775 */
2776
c277df42 2777 CHECKPOINT cp, lastcp;
3280af22 2778 CURCUR* cc = PL_regcc;
c277df42
IZ
2779 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2780
4633a7c4 2781 n = cc->cur + 1; /* how many we know we matched */
3280af22 2782 PL_reginput = locinput;
a0d0e21e 2783
c277df42 2784 DEBUG_r(
9041c2e3
NIS
2785 PerlIO_printf(Perl_debug_log,
2786 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2787 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2788 (long)n, (long)cc->min,
c277df42
IZ
2789 (long)cc->max, (long)cc)
2790 );
4633a7c4 2791
a0d0e21e
LW
2792 /* If degenerate scan matches "", assume scan done. */
2793
579cf2c3 2794 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2795 PL_regcc = cc->oldcc;
2ab05381
IZ
2796 if (PL_regcc)
2797 ln = PL_regcc->cur;
c277df42 2798 DEBUG_r(
c3464db5
DD
2799 PerlIO_printf(Perl_debug_log,
2800 "%*s empty match detected, try continuation...\n",
3280af22 2801 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2802 );
a0d0e21e 2803 if (regmatch(cc->next))
4633a7c4 2804 sayYES;
2ab05381
IZ
2805 if (PL_regcc)
2806 PL_regcc->cur = ln;
3280af22 2807 PL_regcc = cc;
4633a7c4 2808 sayNO;
a0d0e21e
LW
2809 }
2810
2811 /* First just match a string of min scans. */
2812
2813 if (n < cc->min) {
2814 cc->cur = n;
2815 cc->lastloc = locinput;
4633a7c4
LW
2816 if (regmatch(cc->scan))
2817 sayYES;
2818 cc->cur = n - 1;
c277df42 2819 cc->lastloc = lastloc;
4633a7c4 2820 sayNO;
a0d0e21e
LW
2821 }
2822
2c2d71f5
JH
2823 if (scan->flags) {
2824 /* Check whether we already were at this position.
2825 Postpone detection until we know the match is not
2826 *that* much linear. */
2827 if (!PL_reg_maxiter) {
2828 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2829 PL_reg_leftiter = PL_reg_maxiter;
2830 }
2831 if (PL_reg_leftiter-- == 0) {
2832 I32 size = (PL_reg_maxiter + 7)/8;
2833 if (PL_reg_poscache) {
2834 if (PL_reg_poscache_size < size) {
2835 Renew(PL_reg_poscache, size, char);
2836 PL_reg_poscache_size = size;
2837 }
2838 Zero(PL_reg_poscache, size, char);
2839 }
2840 else {
2841 PL_reg_poscache_size = size;
2842 Newz(29, PL_reg_poscache, size, char);
2843 }
2844 DEBUG_r(
2845 PerlIO_printf(Perl_debug_log,
2846 "%sDetected a super-linear match, switching on caching%s...\n",
2847 PL_colors[4], PL_colors[5])
2848 );
2849 }
2850 if (PL_reg_leftiter < 0) {
2851 I32 o = locinput - PL_bostr, b;
2852
2853 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2854 b = o % 8;
2855 o /= 8;
2856 if (PL_reg_poscache[o] & (1<<b)) {
2857 DEBUG_r(
2858 PerlIO_printf(Perl_debug_log,
2859 "%*s already tried at this position...\n",
2860 REPORT_CODE_OFF+PL_regindent*2, "")
2861 );
7821416a 2862 sayNO_SILENT;
2c2d71f5
JH
2863 }
2864 PL_reg_poscache[o] |= (1<<b);
2865 }
2866 }
2867
a0d0e21e
LW
2868 /* Prefer next over scan for minimal matching. */
2869
2870 if (cc->minmod) {
3280af22 2871 PL_regcc = cc->oldcc;
2ab05381
IZ
2872 if (PL_regcc)
2873 ln = PL_regcc->cur;
5f05dabc 2874 cp = regcppush(cc->parenfloor);
02db2b7b 2875 REGCP_SET(lastcp);
5f05dabc 2876 if (regmatch(cc->next)) {
c277df42 2877 regcpblow(cp);
4633a7c4 2878 sayYES; /* All done. */
5f05dabc 2879 }
02db2b7b 2880 REGCP_UNWIND(lastcp);
5f05dabc 2881 regcppop();
2ab05381
IZ
2882 if (PL_regcc)
2883 PL_regcc->cur = ln;
3280af22 2884 PL_regcc = cc;
a0d0e21e 2885
c277df42 2886 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2887 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2888 && !(PL_reg_flags & RF_warned)) {
2889 PL_reg_flags |= RF_warned;
e476b1b5 2890 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2891 "Complex regular subexpression recursion",
2892 REG_INFTY - 1);
c277df42 2893 }
4633a7c4 2894 sayNO;
c277df42 2895 }
a687059c 2896
c277df42 2897 DEBUG_r(
c3464db5
DD
2898 PerlIO_printf(Perl_debug_log,
2899 "%*s trying longer...\n",
3280af22 2900 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2901 );
a0d0e21e 2902 /* Try scanning more and see if it helps. */
3280af22 2903 PL_reginput = locinput;
a0d0e21e
LW
2904 cc->cur = n;
2905 cc->lastloc = locinput;
5f05dabc 2906 cp = regcppush(cc->parenfloor);
02db2b7b 2907 REGCP_SET(lastcp);
5f05dabc 2908 if (regmatch(cc->scan)) {
c277df42 2909 regcpblow(cp);
4633a7c4 2910 sayYES;
5f05dabc 2911 }
02db2b7b 2912 REGCP_UNWIND(lastcp);
5f05dabc 2913 regcppop();
4633a7c4 2914 cc->cur = n - 1;
c277df42 2915 cc->lastloc = lastloc;
4633a7c4 2916 sayNO;
a0d0e21e
LW
2917 }
2918
2919 /* Prefer scan over next for maximal matching. */
2920
2921 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2922 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2923 cc->cur = n;
2924 cc->lastloc = locinput;
02db2b7b 2925 REGCP_SET(lastcp);
5f05dabc 2926 if (regmatch(cc->scan)) {
c277df42 2927 regcpblow(cp);
4633a7c4 2928 sayYES;
5f05dabc 2929 }
02db2b7b 2930 REGCP_UNWIND(lastcp);
a0d0e21e 2931 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2932 PL_reginput = locinput;
c277df42 2933 DEBUG_r(
c3464db5
DD
2934 PerlIO_printf(Perl_debug_log,
2935 "%*s failed, try continuation...\n",
3280af22 2936 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2937 );
2938 }
9041c2e3 2939 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2940 && !(PL_reg_flags & RF_warned)) {
3280af22 2941 PL_reg_flags |= RF_warned;
e476b1b5 2942 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2943 "Complex regular subexpression recursion",
2944 REG_INFTY - 1);
a0d0e21e
LW
2945 }
2946
2947 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2948 PL_regcc = cc->oldcc;
2ab05381
IZ
2949 if (PL_regcc)
2950 ln = PL_regcc->cur;
a0d0e21e 2951 if (regmatch(cc->next))
4633a7c4 2952 sayYES;
2ab05381
IZ
2953 if (PL_regcc)
2954 PL_regcc->cur = ln;
3280af22 2955 PL_regcc = cc;
4633a7c4 2956 cc->cur = n - 1;
c277df42 2957 cc->lastloc = lastloc;
4633a7c4 2958 sayNO;
a0d0e21e
LW
2959 }
2960 /* NOT REACHED */
9041c2e3 2961 case BRANCHJ:
c277df42
IZ
2962 next = scan + ARG(scan);
2963 if (next == scan)
2964 next = NULL;
2965 inner = NEXTOPER(NEXTOPER(scan));
2966 goto do_branch;
9041c2e3 2967 case BRANCH:
c277df42
IZ
2968 inner = NEXTOPER(scan);
2969 do_branch:
2970 {
c277df42
IZ
2971 c1 = OP(scan);
2972 if (OP(next) != c1) /* No choice. */
2973 next = inner; /* Avoid recursion. */
a0d0e21e 2974 else {
02db2b7b
IZ
2975 I32 lastparen = *PL_reglastparen;
2976 I32 unwind1;
2977 re_unwind_branch_t *uw;
2978
2979 /* Put unwinding data on stack */
2980 unwind1 = SSNEWt(1,re_unwind_branch_t);
2981 uw = SSPTRt(unwind1,re_unwind_branch_t);
2982 uw->prev = unwind;
2983 unwind = unwind1;
2984 uw->type = ((c1 == BRANCH)
2985 ? RE_UNWIND_BRANCH
2986 : RE_UNWIND_BRANCHJ);
2987 uw->lastparen = lastparen;
2988 uw->next = next;
2989 uw->locinput = locinput;
2990 uw->nextchr = nextchr;
2991#ifdef DEBUGGING
2992 uw->regindent = ++PL_regindent;
2993#endif
c277df42 2994
02db2b7b
IZ
2995 REGCP_SET(uw->lastcp);
2996
2997 /* Now go into the first branch */
2998 next = inner;
a687059c 2999 }
a0d0e21e
LW
3000 }
3001 break;
3002 case MINMOD:
3003 minmod = 1;
3004 break;
c277df42
IZ
3005 case CURLYM:
3006 {
00db4c45 3007 I32 l = 0;
c277df42 3008 CHECKPOINT lastcp;
9041c2e3 3009
c277df42
IZ
3010 /* We suppose that the next guy does not need
3011 backtracking: in particular, it is of constant length,
3012 and has no parenths to influence future backrefs. */
3013 ln = ARG1(scan); /* min to match */
3014 n = ARG2(scan); /* max to match */
c277df42
IZ
3015 paren = scan->flags;
3016 if (paren) {
3280af22
NIS
3017 if (paren > PL_regsize)
3018 PL_regsize = paren;
3019 if (paren > *PL_reglastparen)
3020 *PL_reglastparen = paren;
c277df42 3021 }
dc45a647 3022 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3023 if (paren)
3024 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3025 PL_reginput = locinput;
c277df42
IZ
3026 if (minmod) {
3027 minmod = 0;
3028 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3029 sayNO;
f31a99c8
HS
3030 /* if we matched something zero-length we don't need to
3031 backtrack - capturing parens are already defined, so
3032 the caveat in the maximal case doesn't apply
3033
3034 XXXX if ln == 0, we can redo this check first time
3035 through the following loop
3036 */
3037 if (ln && l == 0)
3038 n = ln; /* don't backtrack */
3280af22 3039 locinput = PL_reginput;
22c35a8c 3040 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3041 c1 = (U8)*STRING(next);
c277df42 3042 if (OP(next) == EXACTF)
22c35a8c 3043 c2 = PL_fold[c1];
c277df42 3044 else if (OP(next) == EXACTFL)
22c35a8c 3045 c2 = PL_fold_locale[c1];
c277df42
IZ
3046 else
3047 c2 = c1;
a0ed51b3
LW
3048 }
3049 else
c277df42 3050 c1 = c2 = -1000;
02db2b7b 3051 REGCP_SET(lastcp);
5f4b28b2 3052 /* This may be improved if l == 0. */
c277df42
IZ
3053 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3054 /* If it could work, try it. */
3055 if (c1 == -1000 ||
3280af22
NIS
3056 UCHARAT(PL_reginput) == c1 ||
3057 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3058 {
3059 if (paren) {
f31a99c8 3060 if (ln) {
cf93c79d
IZ
3061 PL_regstartp[paren] =
3062 HOPc(PL_reginput, -l) - PL_bostr;
3063 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3064 }
3065 else
cf93c79d 3066 PL_regendp[paren] = -1;
c277df42
IZ
3067 }
3068 if (regmatch(next))
3069 sayYES;
02db2b7b 3070 REGCP_UNWIND(lastcp);
c277df42
IZ
3071 }
3072 /* Couldn't or didn't -- move forward. */
3280af22 3073 PL_reginput = locinput;
c277df42
IZ
3074 if (regrepeat_hard(scan, 1, &l)) {
3075 ln++;
3280af22 3076 locinput = PL_reginput;
c277df42
IZ
3077 }
3078 else
3079 sayNO;
3080 }
a0ed51b3
LW
3081 }
3082 else {
c277df42 3083 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3084 /* if we matched something zero-length we don't need to
3085 backtrack, unless the minimum count is zero and we
3086 are capturing the result - in that case the capture
3087 being defined or not may affect later execution
3088 */
3089 if (n != 0 && l == 0 && !(paren && ln == 0))
3090 ln = n; /* don't backtrack */
3280af22 3091 locinput = PL_reginput;
c277df42 3092 DEBUG_r(
5c0ca799 3093 PerlIO_printf(Perl_debug_log,
faccc32b 3094 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3095 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3096 (IV) n, (IV)l)
c277df42
IZ
3097 );
3098 if (n >= ln) {
22c35a8c 3099 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3100 c1 = (U8)*STRING(next);
c277df42 3101 if (OP(next) == EXACTF)
22c35a8c 3102 c2 = PL_fold[c1];
c277df42 3103 else if (OP(next) == EXACTFL)
22c35a8c 3104 c2 = PL_fold_locale[c1];
c277df42
IZ
3105 else
3106 c2 = c1;
a0ed51b3
LW
3107 }
3108 else
c277df42
IZ
3109 c1 = c2 = -1000;
3110 }
02db2b7b 3111 REGCP_SET(lastcp);
c277df42
IZ
3112 while (n >= ln) {
3113 /* If it could work, try it. */
3114 if (c1 == -1000 ||
3280af22
NIS
3115 UCHARAT(PL_reginput) == c1 ||
3116 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3117 {
3118 DEBUG_r(
c3464db5 3119 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3120 "%*s trying tail with n=%"IVdf"...\n",
3121 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3122 );
3123 if (paren) {
3124 if (n) {
cf93c79d
IZ
3125 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3126 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3127 }
a0ed51b3 3128 else
cf93c79d 3129 PL_regendp[paren] = -1;
c277df42 3130 }
a0ed51b3
LW
3131 if (regmatch(next))
3132 sayYES;
02db2b7b 3133 REGCP_UNWIND(lastcp);
a0ed51b3 3134 }
c277df42
IZ
3135 /* Couldn't or didn't -- back up. */
3136 n--;
dfe13c55 3137 locinput = HOPc(locinput, -l);
3280af22 3138 PL_reginput = locinput;
c277df42
IZ
3139 }
3140 }
3141 sayNO;
3142 break;
3143 }
3144 case CURLYN:
3145 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3146 if (paren > PL_regsize)
3147 PL_regsize = paren;
3148 if (paren > *PL_reglastparen)
3149 *PL_reglastparen = paren;
c277df42
IZ
3150 ln = ARG1(scan); /* min to match */
3151 n = ARG2(scan); /* max to match */
dc45a647 3152 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3153 goto repeat;
a0d0e21e 3154 case CURLY:
c277df42 3155 paren = 0;
a0d0e21e
LW
3156 ln = ARG1(scan); /* min to match */
3157 n = ARG2(scan); /* max to match */
dc45a647 3158 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3159 goto repeat;
3160 case STAR:
3161 ln = 0;
c277df42 3162 n = REG_INFTY;
a0d0e21e 3163 scan = NEXTOPER(scan);
c277df42 3164 paren = 0;
a0d0e21e
LW
3165 goto repeat;
3166 case PLUS:
c277df42
IZ
3167 ln = 1;
3168 n = REG_INFTY;
3169 scan = NEXTOPER(scan);
3170 paren = 0;
3171 repeat:
a0d0e21e
LW
3172 /*
3173 * Lookahead to avoid useless match attempts
3174 * when we know what character comes next.
3175 */
22c35a8c 3176 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3177 U8 *s = (U8*)STRING(next);
3178 if (!UTF) {
3179 c2 = c1 = *s;
3180 if (OP(next) == EXACTF)
3181 c2 = PL_fold[c1];
3182 else if (OP(next) == EXACTFL)
3183 c2 = PL_fold_locale[c1];
3184 }
3185 else { /* UTF */
3186 if (OP(next) == EXACTF) {
3187 c1 = to_utf8_lower(s);
3188 c2 = to_utf8_upper(s);
3189 }
3190 else {
9041c2e3 3191 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3192 }
3193 }
bbce6d69 3194 }
a0d0e21e 3195 else
bbce6d69 3196 c1 = c2 = -1000;
3280af22 3197 PL_reginput = locinput;
a0d0e21e 3198 if (minmod) {
c277df42 3199 CHECKPOINT lastcp;
a0d0e21e
LW
3200 minmod = 0;
3201 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3202 sayNO;
a0ed51b3 3203 locinput = PL_reginput;
02db2b7b 3204 REGCP_SET(lastcp);
0fe9bf95 3205 if (c1 != -1000) {
1aa99e6b 3206 char *e; /* Should not check after this */
0fe9bf95
IZ
3207 char *old = locinput;
3208
1aa99e6b 3209 if (n == REG_INFTY) {
0fe9bf95 3210 e = PL_regeol - 1;
1aa99e6b
IH
3211 if (do_utf8)
3212 while (UTF8_IS_CONTINUATION(*(U8*)e))
3213 e--;
3214 }
3215 else if (do_utf8) {
3216 int m = n - ln;
3217 for (e = locinput;
3218 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3219 e += UTF8SKIP(e);
3220 }
3221 else {
3222 e = locinput + n - ln;
3223 if (e >= PL_regeol)
3224 e = PL_regeol - 1;
3225 }
0fe9bf95 3226 while (1) {
1aa99e6b 3227 int count;
0fe9bf95 3228 /* Find place 'next' could work */
1aa99e6b
IH
3229 if (!do_utf8) {
3230 if (c1 == c2) {
3231 while (locinput <= e && *locinput != c1)
3232 locinput++;
3233 } else {
9041c2e3 3234 while (locinput <= e
1aa99e6b
IH
3235 && *locinput != c1
3236 && *locinput != c2)
3237 locinput++;
3238 }
3239 count = locinput - old;
3240 }
3241 else {
3242 STRLEN len;
3243 if (c1 == c2) {
3244 for (count = 0;
3245 locinput <= e &&
9041c2e3 3246 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3247 count++)
3248 locinput += len;
3249
3250 } else {
3251 for (count = 0; locinput <= e; count++) {
9041c2e3 3252 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3253 if (c == c1 || c == c2)
3254 break;
9041c2e3 3255 locinput += len;
1aa99e6b
IH
3256 }
3257 }
0fe9bf95 3258 }
9041c2e3 3259 if (locinput > e)
0fe9bf95
IZ
3260 sayNO;
3261 /* PL_reginput == old now */
3262 if (locinput != old) {
3263 ln = 1; /* Did some */
1aa99e6b 3264 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3265 sayNO;
3266 }
3267 /* PL_reginput == locinput now */
29d1e993 3268 TRYPAREN(paren, ln, locinput);
0fe9bf95 3269 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3270 REGCP_UNWIND(lastcp);
0fe9bf95 3271 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3272 old = locinput;
3273 if (do_utf8)
3274 locinput += UTF8SKIP(locinput);
3275 else
3276 locinput++;
0fe9bf95
IZ
3277 }
3278 }
3279 else
c277df42 3280 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3281 UV c;
3282 if (c1 != -1000) {
3283 if (do_utf8)
9041c2e3 3284 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3285 else
9041c2e3 3286 c = UCHARAT(PL_reginput);
2390ecbc
PP
3287 /* If it could work, try it. */
3288 if (c == c1 || c == c2)
3289 {
3290 TRYPAREN(paren, n, PL_reginput);
3291 REGCP_UNWIND(lastcp);
3292 }
1aa99e6b 3293 }
a0d0e21e 3294 /* If it could work, try it. */
2390ecbc 3295 else if (c1 == -1000)
bbce6d69 3296 {
29d1e993 3297 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3298 REGCP_UNWIND(lastcp);
bbce6d69 3299 }
c277df42 3300 /* Couldn't or didn't -- move forward. */
a0ed51b3 3301 PL_reginput = locinput;
a0d0e21e
LW
3302 if (regrepeat(scan, 1)) {
3303 ln++;
a0ed51b3
LW
3304 locinput = PL_reginput;
3305 }
3306 else
4633a7c4 3307 sayNO;
a0d0e21e
LW
3308 }
3309 }
3310 else {
c277df42 3311 CHECKPOINT lastcp;
a0d0e21e 3312 n = regrepeat(scan, n);
a0ed51b3 3313 locinput = PL_reginput;
22c35a8c 3314 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3315 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3316 ln = n; /* why back off? */
1aeab75a
GS
3317 /* ...because $ and \Z can match before *and* after
3318 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3319 We should back off by one in this case. */
3320 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3321 ln--;
3322 }
02db2b7b 3323 REGCP_SET(lastcp);
c277df42 3324 if (paren) {
8fa7f367 3325 UV c = 0;
c277df42 3326 while (n >= ln) {
1aa99e6b
IH
3327 if (c1 != -1000) {
3328 if (do_utf8)
9041c2e3 3329 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3330 else
9041c2e3 3331 c = UCHARAT(PL_reginput);
1aa99e6b 3332 }
c277df42 3333 /* If it could work, try it. */
1aa99e6b 3334 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3335 {
29d1e993 3336 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3337 REGCP_UNWIND(lastcp);
c277df42
IZ
3338 }
3339 /* Couldn't or didn't -- back up. */
3340 n--;
dfe13c55 3341 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3342 }
a0ed51b3
LW
3343 }
3344 else {
8fa7f367 3345 UV c = 0;
c277df42 3346 while (n >= ln) {
1aa99e6b
IH
3347 if (c1 != -1000) {
3348 if (do_utf8)
9041c2e3 3349 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3350 else
9041c2e3 3351 c = UCHARAT(PL_reginput);
1aa99e6b 3352 }
c277df42 3353 /* If it could work, try it. */
1aa99e6b 3354 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3355 {
29d1e993 3356 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3357 REGCP_UNWIND(lastcp);
c277df42
IZ
3358 }
3359 /* Couldn't or didn't -- back up. */
3360 n--;
dfe13c55 3361 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3362 }
a0d0e21e
LW
3363 }
3364 }
4633a7c4 3365 sayNO;
c277df42 3366 break;
a0d0e21e 3367 case END:
0f5d15d6
IZ
3368 if (PL_reg_call_cc) {
3369 re_cc_state *cur_call_cc = PL_reg_call_cc;
3370 CURCUR *cctmp = PL_regcc;
3371 regexp *re = PL_reg_re;
3372 CHECKPOINT cp, lastcp;
3373
3374 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3375 REGCP_SET(lastcp);
0f5d15d6
IZ
3376 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3377 the caller. */
3378 PL_reginput = locinput; /* Make position available to
3379 the callcc. */
3380 cache_re(PL_reg_call_cc->re);
3381 PL_regcc = PL_reg_call_cc->cc;
3382 PL_reg_call_cc = PL_reg_call_cc->prev;
3383 if (regmatch(cur_call_cc->node)) {
3384 PL_reg_call_cc = cur_call_cc;
3385 regcpblow(cp);
3386 sayYES;
3387 }
02db2b7b 3388 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3389 regcppop();
3390 PL_reg_call_cc = cur_call_cc;
3391 PL_regcc = cctmp;
3392 PL_reg_re = re;
3393 cache_re(re);
3394
3395 DEBUG_r(
3396 PerlIO_printf(Perl_debug_log,
3397 "%*s continuation failed...\n",
3398 REPORT_CODE_OFF+PL_regindent*2, "")
3399 );
7821416a 3400 sayNO_SILENT;
0f5d15d6 3401 }
7821416a
IZ
3402 if (locinput < PL_regtill) {
3403 DEBUG_r(PerlIO_printf(Perl_debug_log,
3404 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3405 PL_colors[4],
3406 (long)(locinput - PL_reg_starttry),
3407 (long)(PL_regtill - PL_reg_starttry),
3408 PL_colors[5]));
3409 sayNO_FINAL; /* Cannot match: too short. */
3410 }
3411 PL_reginput = locinput; /* put where regtry can find it */
3412 sayYES_FINAL; /* Success! */
7e5428c5 3413 case SUCCEED:
3280af22 3414 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3415 sayYES_LOUD; /* Success! */
c277df42
IZ
3416 case SUSPEND:
3417 n = 1;
9fe1d20c 3418 PL_reginput = locinput;
9041c2e3 3419 goto do_ifmatch;
a0d0e21e 3420 case UNLESSM:
c277df42 3421 n = 0;
a0ed51b3 3422 if (scan->flags) {
efb30f32
HS
3423 s = HOPBACKc(locinput, scan->flags);
3424 if (!s)
3425 goto say_yes;
3426 PL_reginput = s;
a0ed51b3
LW
3427 }
3428 else
3429 PL_reginput = locinput;
c277df42
IZ
3430 goto do_ifmatch;
3431 case IFMATCH:
3432 n = 1;
a0ed51b3 3433 if (scan->flags) {
efb30f32
HS
3434 s = HOPBACKc(locinput, scan->flags);
3435 if (!s)
3436 goto say_no;
3437 PL_reginput = s;
a0ed51b3
LW
3438 }
3439 else
3440 PL_reginput = locinput;
3441
c277df42 3442 do_ifmatch:
c277df42
IZ
3443 inner = NEXTOPER(NEXTOPER(scan));
3444 if (regmatch(inner) != n) {
3445 say_no:
3446 if (logical) {
3447 logical = 0;
3448 sw = 0;
3449 goto do_longjump;
a0ed51b3
LW
3450 }
3451 else
c277df42
IZ
3452 sayNO;
3453 }
3454 say_yes:
3455 if (logical) {
3456 logical = 0;
3457 sw = 1;
3458 }
fe44a5e8 3459 if (OP(scan) == SUSPEND) {
3280af22 3460 locinput = PL_reginput;
565764a8 3461 nextchr = UCHARAT(locinput);
fe44a5e8 3462 }
c277df42
IZ
3463 /* FALL THROUGH. */
3464 case LONGJMP:
3465 do_longjump:
3466 next = scan + ARG(scan);
3467 if (next == scan)
3468 next = NULL;
a0d0e21e
LW
3469 break;
3470 default:
b900a521 3471 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3472 PTR2UV(scan), OP(scan));
cea2e8a9 3473 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3474 }
02db2b7b 3475 reenter:
a0d0e21e
LW
3476 scan = next;
3477 }
a687059c 3478
a0d0e21e
LW
3479 /*
3480 * We get here only if there's trouble -- normally "case END" is
3481 * the terminating point.
3482 */
cea2e8a9 3483 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3484 /*NOTREACHED*/
4633a7c4
LW
3485 sayNO;
3486
7821416a
IZ
3487yes_loud:
3488 DEBUG_r(
3489 PerlIO_printf(Perl_debug_log,
3490 "%*s %scould match...%s\n",
3491 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3492 );
3493 goto yes;
3494yes_final:
3495 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3496 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3497yes:
3498#ifdef DEBUGGING
3280af22 3499 PL_regindent--;
4633a7c4 3500#endif
02db2b7b
IZ
3501
3502#if 0 /* Breaks $^R */
3503 if (unwind)
3504 regcpblow(firstcp);
3505#endif
4633a7c4
LW
3506 return 1;
3507
3508no:
7821416a
IZ
3509 DEBUG_r(
3510 PerlIO_printf(Perl_debug_log,
3511 "%*s %sfailed...%s\n",
3512 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3513 );
3514 goto do_no;
3515no_final:
3516do_no:
02db2b7b
IZ
3517 if (unwind) {
3518 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3519
3520 switch (uw->type) {
3521 case RE_UNWIND_BRANCH:
3522 case RE_UNWIND_BRANCHJ:
3523 {
3524 re_unwind_branch_t *uwb = &(uw->branch);
3525 I32 lastparen = uwb->lastparen;
9041c2e3 3526
02db2b7b
IZ
3527 REGCP_UNWIND(uwb->lastcp);
3528 for (n = *PL_reglastparen; n > lastparen; n--)
3529 PL_regendp[n] = -1;
3530 *PL_reglastparen = n;
3531 scan = next = uwb->next;
9041c2e3
NIS
3532 if ( !scan ||
3533 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3534 ? BRANCH : BRANCHJ) ) { /* Failure */
3535 unwind = uwb->prev;
3536#ifdef DEBUGGING
3537 PL_regindent--;
3538#endif
3539 goto do_no;
3540 }
3541 /* Have more choice yet. Reuse the same uwb. */
3542 /*SUPPRESS 560*/
3543 if ((n = (uwb->type == RE_UNWIND_BRANCH
3544 ? NEXT_OFF(next) : ARG(next))))
3545 next += n;
3546 else
3547 next = NULL; /* XXXX Needn't unwinding in this case... */
3548 uwb->next = next;
3549 next = NEXTOPER(scan);
3550 if (uwb->type == RE_UNWIND_BRANCHJ)
3551 next = NEXTOPER(next);
3552 locinput = uwb->locinput;
3553 nextchr = uwb->nextchr;
3554#ifdef DEBUGGING
3555 PL_regindent = uwb->regindent;
3556#endif
3557
3558 goto reenter;
3559 }
3560 /* NOT REACHED */
3561 default:
3562 Perl_croak(aTHX_ "regexp unwind memory corruption");
3563 }
3564 /* NOT REACHED */
3565 }
4633a7c4 3566#ifdef DEBUGGING
3280af22 3567 PL_regindent--;
4633a7c4 3568#endif
a0d0e21e 3569 return 0;
a687059c
LW
3570}
3571
3572/*
3573 - regrepeat - repeatedly match something simple, report how many
3574 */
3575/*
3576 * [This routine now assumes that it will only match on things of length 1.
3577 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3578 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3579 */
76e3520e 3580STATIC I32
cea2e8a9 3581S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3582{
a0d0e21e 3583 register char *scan;
a0d0e21e 3584 register I32 c;
3280af22 3585 register char *loceol = PL_regeol;
a0ed51b3 3586 register I32 hardcount = 0;
d9f424b2 3587 register bool do_utf8 = PL_reg_sv_utf8;
a0d0e21e 3588
3280af22 3589 scan = PL_reginput;
c277df42 3590 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3591 loceol = scan + max;
a0d0e21e 3592 switch (OP(p)) {
22c35a8c 3593 case REG_ANY:
1aa99e6b 3594 if (do_utf8) {
ffc61ed2 3595 loceol = PL_regeol;
1aa99e6b 3596 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3597 scan += UTF8SKIP(scan);
3598 hardcount++;
3599 }
3600 } else {
3601 while (scan < loceol && *scan != '\n')
3602 scan++;
a0ed51b3
LW
3603 }
3604 break;
ffc61ed2 3605 case SANY:
3baa4c62 3606 scan = loceol;
a0ed51b3 3607 break;
f33976b4
DB
3608 case CANY:
3609 scan = loceol;
3610 break;
bbce6d69 3611 case EXACT: /* length of string is 1 */
cd439c50 3612 c = (U8)*STRING(p);
bbce6d69 3613 while (scan < loceol && UCHARAT(scan) == c)
3614 scan++;
3615 break;
3616 case EXACTF: /* length of string is 1 */
cd439c50 3617 c = (U8)*STRING(p);
bbce6d69 3618 while (scan < loceol &&
22c35a8c 3619 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3620 scan++;
3621 break;
3622 case EXACTFL: /* length of string is 1 */
3280af22 3623 PL_reg_flags |= RF_tainted;
cd439c50 3624 c = (U8)*STRING(p);
bbce6d69 3625 while (scan < loceol &&
22c35a8c 3626 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3627 scan++;
3628 break;
3629 case ANYOF:
ffc61ed2
JH
3630 if (do_utf8) {
3631 loceol = PL_regeol;
1aa99e6b
IH
3632 while (hardcount < max && scan < loceol &&
3633 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3634 scan += UTF8SKIP(scan);
3635 hardcount++;
3636 }
3637 } else {
3638 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3639 scan++;
3640 }
a0d0e21e
LW
3641 break;
3642 case ALNUM:
1aa99e6b 3643 if (do_utf8) {
ffc61ed2 3644 loceol = PL_regeol;
8269fa76 3645 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3646 while (hardcount < max && scan < loceol &&
3568d838 3647 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3648 scan += UTF8SKIP(scan);
3649 hardcount++;
3650 }
3651 } else {
3652 while (scan < loceol && isALNUM(*scan))
3653 scan++;
a0ed51b3
LW
3654 }
3655 break;
bbce6d69 3656 case ALNUML:
3280af22 3657 PL_reg_flags |= RF_tainted;
1aa99e6b 3658 if (do_utf8) {
ffc61ed2 3659 loceol = PL_regeol;
1aa99e6b
IH
3660 while (hardcount < max && scan < loceol &&
3661 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3662 scan += UTF8SKIP(scan);
3663 hardcount++;
3664 }
3665 } else {
3666 while (scan < loceol && isALNUM_LC(*scan))
3667 scan++;
a0ed51b3
LW
3668 }
3669 break;
a0d0e21e 3670 case NALNUM:
1aa99e6b 3671 if (do_utf8) {
ffc61ed2 3672 loceol = PL_regeol;
8269fa76 3673 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3674 while (hardcount < max && scan < loceol &&
3568d838 3675 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3676 scan += UTF8SKIP(scan);
3677 hardcount++;
3678 }
3679 } else {
3680 while (scan < loceol && !isALNUM(*scan))
3681 scan++;
a0ed51b3
LW
3682 }
3683 break;
bbce6d69 3684 case NALNUML:
3280af22 3685 PL_reg_flags |= RF_tainted;
1aa99e6b 3686 if (do_utf8) {
ffc61ed2 3687 loceol = PL_regeol;
1aa99e6b
IH
3688 while (hardcount < max && scan < loceol &&
3689 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3690 scan += UTF8SKIP(scan);
3691 hardcount++;
3692 }
3693 } else {
3694 while (scan < loceol && !isALNUM_LC(*scan))
3695 scan++;
a0ed51b3
LW
3696 }
3697 break;
a0d0e21e 3698 case SPACE:
1aa99e6b 3699 if (do_utf8) {
ffc61ed2 3700 loceol = PL_regeol;
8269fa76 3701 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3702 while (hardcount < max && scan < loceol &&
3568d838
JH
3703 (*scan == ' ' ||
3704 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3705 scan += UTF8SKIP(scan);
3706 hardcount++;
3707 }
3708 } else {
3709 while (scan < loceol && isSPACE(*scan))
3710 scan++;
a0ed51b3
LW
3711 }
3712 break;
bbce6d69 3713 case SPACEL:
3280af22 3714 PL_reg_flags |= RF_tainted;
1aa99e6b 3715 if (do_utf8) {
ffc61ed2 3716 loceol = PL_regeol;
1aa99e6b 3717 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3718 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3719 scan += UTF8SKIP(scan);
3720 hardcount++;
3721 }
3722 } else {
3723 while (scan < loceol && isSPACE_LC(*scan))
3724 scan++;
a0ed51b3
LW
3725 }
3726 break;
a0d0e21e 3727 case NSPACE:
1aa99e6b 3728 if (do_utf8) {
ffc61ed2 3729 loceol = PL_regeol;
8269fa76 3730 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3731 while (hardcount < max && scan < loceol &&
3568d838
JH
3732 !(*scan == ' ' ||
3733 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3734 scan += UTF8SKIP(scan);
3735 hardcount++;
3736 }
3737 } else {
3738 while (scan < loceol && !isSPACE(*scan))
3739 scan++;
3740 break;
a0ed51b3 3741 }
bbce6d69 3742 case NSPACEL:
3280af22 3743 PL_reg_flags |= RF_tainted;
1aa99e6b 3744 if (do_utf8) {
ffc61ed2 3745 loceol = PL_regeol;
1aa99e6b 3746 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3747 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3748 scan += UTF8SKIP(scan);
3749 hardcount++;
3750 }
3751 } else {
3752 while (scan < loceol && !isSPACE_LC(*scan))
3753 scan++;
a0ed51b3
LW
3754 }
3755 break;
a0d0e21e 3756 case DIGIT:
1aa99e6b 3757 if (do_utf8) {
ffc61ed2 3758 loceol = PL_regeol;
8269fa76 3759 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3760 while (hardcount < max && scan < loceol &&
3568d838 3761 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3762 scan += UTF8SKIP(scan);
3763 hardcount++;
3764 }
3765 } else {
3766 while (scan < loceol && isDIGIT(*scan))
3767 scan++;
a0ed51b3
LW
3768 }
3769 break;
a0d0e21e 3770 case NDIGIT:
1aa99e6b 3771 if (do_utf8) {
ffc61ed2 3772 loceol = PL_regeol;
8269fa76 3773 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3774 while (hardcount < max && scan < loceol &&
3568d838 3775 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3776 scan += UTF8SKIP(scan);
3777 hardcount++;
3778 }
3779 } else {
3780 while (scan < loceol && !isDIGIT(*scan))
3781 scan++;
a0ed51b3
LW
3782 }
3783 break;
a0d0e21e
LW
3784 default: /* Called on something of 0 width. */
3785 break; /* So match right here or not at all. */
3786 }
a687059c 3787
a0ed51b3
LW
3788 if (hardcount)
3789 c = hardcount;
3790 else
3791 c = scan - PL_reginput;
3280af22 3792 PL_reginput = scan;
a687059c 3793
9041c2e3 3794 DEBUG_r(
c277df42
IZ
3795 {
3796 SV *prop = sv_newmortal();
3797
3798 regprop(prop, p);
9041c2e3
NIS
3799 PerlIO_printf(Perl_debug_log,
3800 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3801 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3802 });
9041c2e3 3803
a0d0e21e 3804 return(c);
a687059c
LW
3805}
3806
3807/*
c277df42 3808 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3809 *
c277df42
IZ
3810 * The repeater is supposed to have constant length.
3811 */
3812
76e3520e 3813STATIC I32
cea2e8a9 3814S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3815{
b7953727 3816 register char *scan = Nullch;
c277df42 3817 register char *start;
3280af22 3818 register char *loceol = PL_regeol;
a0ed51b3 3819 I32 l = 0;
708e3b05 3820 I32 count = 0, res = 1;
a0ed51b3
LW
3821
3822 if (!max)
3823 return 0;
c277df42 3824
3280af22 3825 start = PL_reginput;
d9f424b2 3826 if (PL_reg_sv_utf8) {
708e3b05 3827 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3828 if (!count++) {
3829 l = 0;
3830 while (start < PL_reginput) {
3831 l++;
3832 start += UTF8SKIP(start);
3833 }
3834 *lp = l;
3835 if (l == 0)
3836 return max;
3837 }
3838 if (count == max)
3839 return count;
3840 }
3841 }
3842 else {
708e3b05 3843 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3844 if (!count++) {
3845 *lp = l = PL_reginput - start;
3846 if (max != REG_INFTY && l*max < loceol - scan)
3847 loceol = scan + l*max;
3848 if (l == 0)
3849 return max;
c277df42
IZ
3850 }
3851 }
3852 }
708e3b05 3853 if (!res)
3280af22 3854 PL_reginput = scan;
9041c2e3 3855
a0ed51b3 3856 return count;
c277df42
IZ
3857}
3858
3859/*
ffc61ed2
JH
3860- regclass_swash - prepare the utf8 swash
3861*/
3862
3863SV *
3864Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3865{
3866 SV *sw = NULL;
3867 SV *si = NULL;
3868
3869 if (PL_regdata && PL_regdata->count) {
3870 U32 n = ARG(node);
3871
3872 if (PL_regdata->what[n] == 's') {
3873 SV *rv = (SV*)PL_regdata->data[n];
3874 AV *av = (AV*)SvRV((SV*)rv);
3875 SV **a;
9041c2e3 3876
ffc61ed2
JH
3877 si = *av_fetch(av, 0, FALSE);
3878 a = av_fetch(av, 1, FALSE);
9041c2e3 3879
ffc61ed2
JH
3880 if (a)
3881 sw = *a;
3882 else if (si && doinit) {
3883 sw = swash_init("utf8", "", si, 1, 0);
3884 (void)av_store(av, 1, sw);
3885 }
3886 }
3887 }
3888
3889 if (initsvp)
3890 *initsvp = si;
3891
3892 return sw;
3893}
3894
3895/*
cb8d8820 3896 - reginclass - determine if a character falls into a character class
bbce6d69 3897 */
3898
76e3520e 3899STATIC bool
ffc61ed2 3900S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3901{
ffc61ed2 3902 char flags = ANYOF_FLAGS(n);
bbce6d69 3903 bool match = FALSE;
1aa99e6b 3904 UV c;
3568d838 3905 STRLEN len = 0;
1aa99e6b 3906
3568d838 3907 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3908
ffc61ed2
JH
3909 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3910 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3911 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3912 match = TRUE;
bbce6d69 3913 }
3568d838 3914 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3915 match = TRUE;
ffc61ed2
JH
3916 if (!match) {
3917 SV *sw = regclass_swash(n, TRUE, 0);
3918
3919 if (sw) {
3568d838 3920 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3921 match = TRUE;
3922 else if (flags & ANYOF_FOLD) {
3923 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3924
ffc61ed2
JH
3925 if (flags & ANYOF_LOCALE) {
3926 PL_reg_flags |= RF_tainted;
9041c2e3 3927 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3928 }
1aa99e6b 3929 else
9041c2e3 3930 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3931 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3932 match = TRUE;
3933 }
3934 }
bbce6d69 3935 }
3936 }
1aa99e6b 3937 if (!match && c < 256) {
ffc61ed2
JH
3938 if (ANYOF_BITMAP_TEST(n, c))
3939 match = TRUE;
3940 else if (flags & ANYOF_FOLD) {
3568d838 3941 I32 f;
a0ed51b3 3942
ffc61ed2
JH
3943 if (flags & ANYOF_LOCALE) {
3944 PL_reg_flags |= RF_tainted;
3945 f = PL_fold_locale[c];
3946 }
3947 else
3948 f = PL_fold[c];
3949 if (f != c && ANYOF_BITMAP_TEST(n, f))
3950 match = TRUE;
3951 }
3952
3953 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3954 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3955 if (
3956 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3957 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3958 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3959 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3960 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3961 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3962 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3963 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3964 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3965 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3966 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3967 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3968 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3969 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3970 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3971 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3972 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3973 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3974 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3975 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3976 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3977 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3978 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3979 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3980 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3981 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3982 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3983 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3984 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3985 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3986 ) /* How's that for a conditional? */
3987 {
3988 match = TRUE;
3989 }
a0ed51b3 3990 }
a0ed51b3
LW
3991 }
3992
a0ed51b3
LW
3993 return (flags & ANYOF_INVERT) ? !match : match;
3994}
161b471a 3995
dfe13c55 3996STATIC U8 *
cea2e8a9 3997S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3998{
1aa99e6b
IH
3999 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4000}
4001
4002STATIC U8 *
4003S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4004{
a0ed51b3 4005 if (off >= 0) {
1aa99e6b 4006 while (off-- && s < lim) {
ffc61ed2 4007 /* XXX could check well-formedness here */
a0ed51b3 4008 s += UTF8SKIP(s);
ffc61ed2 4009 }
a0ed51b3
LW
4010 }
4011 else {
4012 while (off++) {
1aa99e6b 4013 if (s > lim) {
a0ed51b3 4014 s--;
ffc61ed2 4015 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4016 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4017 s--;
ffc61ed2
JH
4018 }
4019 /* XXX could check well-formedness here */
a0ed51b3
LW
4020 }
4021 }
4022 }
4023 return s;
4024}
161b471a 4025
dfe13c55 4026STATIC U8 *
1aa99e6b 4027S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4028{
1aa99e6b
IH
4029 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4030}
4031
4032STATIC U8 *
4033S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4034{
4035 if (off >= 0) {
1aa99e6b 4036 while (off-- && s < lim) {
ffc61ed2 4037 /* XXX could check well-formedness here */
a0ed51b3 4038 s += UTF8SKIP(s);
ffc61ed2 4039 }
a0ed51b3
LW
4040 if (off >= 0)
4041 return 0;
4042 }
4043 else {
4044 while (off++) {
1aa99e6b 4045 if (s > lim) {
a0ed51b3 4046 s--;
ffc61ed2 4047 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4048 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4049 s--;
ffc61ed2
JH
4050 }
4051 /* XXX could check well-formedness here */
a0ed51b3
LW
4052 }
4053 else
4054 break;
4055 }
4056 if (off <= 0)
4057 return 0;
4058 }
4059 return s;
4060}
51371543 4061
51371543 4062static void
acfe0abc 4063restore_pos(pTHX_ void *arg)
51371543 4064{
51371543
GS
4065 if (PL_reg_eval_set) {
4066 if (PL_reg_oldsaved) {
4067 PL_reg_re->subbeg = PL_reg_oldsaved;
4068 PL_reg_re->sublen = PL_reg_oldsavedlen;
4069 RX_MATCH_COPIED_on(PL_reg_re);
4070 }
4071 PL_reg_magic->mg_len = PL_reg_oldpos;
4072 PL_reg_eval_set = 0;
4073 PL_curpm = PL_reg_oldcurpm;
4074 }
4075}