This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Dispell the "use utf8" superstition.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
e49a9654
IH
147 if (paren_elems_to_push < 0)
148 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
149
a01268b5 150#define REGCP_OTHER_ELEMS 6
b1ce53c5 151 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 152 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 153/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
154 SSPUSHINT(PL_regendp[p]);
155 SSPUSHINT(PL_regstartp[p]);
3280af22 156 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
157 SSPUSHINT(p);
158 }
b1ce53c5 159/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
160 SSPUSHINT(PL_regsize);
161 SSPUSHINT(*PL_reglastparen);
a01268b5 162 SSPUSHINT(*PL_reglastcloseparen);
3280af22 163 SSPUSHPTR(PL_reginput);
41123dfd
JH
164#define REGCP_FRAME_ELEMS 2
165/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
166 * are needed for the regexp context stack bookkeeping. */
167 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 168 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 169
a0d0e21e
LW
170 return retval;
171}
172
c277df42 173/* These are needed since we do not localize EVAL nodes: */
02db2b7b 174# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 175 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 176 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 177
02db2b7b 178# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 179 PerlIO_printf(Perl_debug_log, \
faccc32b 180 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 181 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 182
76e3520e 183STATIC char *
cea2e8a9 184S_regcppop(pTHX)
a0d0e21e 185{
b1ce53c5 186 I32 i;
a0d0e21e
LW
187 U32 paren = 0;
188 char *input;
cf93c79d 189 I32 tmps;
b1ce53c5
JH
190
191 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 192 i = SSPOPINT;
b1ce53c5
JH
193 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
194 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 195 input = (char *) SSPOPPTR;
a01268b5 196 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
197 *PL_reglastparen = SSPOPINT;
198 PL_regsize = SSPOPINT;
b1ce53c5
JH
199
200 /* Now restore the parentheses context. */
41123dfd
JH
201 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
202 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 203 paren = (U32)SSPOPINT;
3280af22 204 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
205 PL_regstartp[paren] = SSPOPINT;
206 tmps = SSPOPINT;
3280af22
NIS
207 if (paren <= *PL_reglastparen)
208 PL_regendp[paren] = tmps;
c277df42 209 DEBUG_r(
c3464db5 210 PerlIO_printf(Perl_debug_log,
b900a521 211 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 212 (UV)paren, (IV)PL_regstartp[paren],
b900a521 213 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 214 (IV)PL_regendp[paren],
3280af22 215 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 216 );
a0d0e21e 217 }
c277df42 218 DEBUG_r(
3280af22 219 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 220 PerlIO_printf(Perl_debug_log,
faccc32b
JH
221 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
222 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
223 }
224 );
daf18116 225#if 1
dafc8851
JH
226 /* It would seem that the similar code in regtry()
227 * already takes care of this, and in fact it is in
228 * a better location to since this code can #if 0-ed out
229 * but the code in regtry() is needed or otherwise tests
230 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
231 * (as of patchlevel 7877) will fail. Then again,
232 * this code seems to be necessary or otherwise
233 * building DynaLoader will fail:
234 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
235 * --jhi */
3280af22
NIS
236 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
237 if (paren > PL_regsize)
cf93c79d
IZ
238 PL_regstartp[paren] = -1;
239 PL_regendp[paren] = -1;
a0d0e21e 240 }
dafc8851 241#endif
a0d0e21e
LW
242 return input;
243}
244
0f5d15d6 245STATIC char *
cea2e8a9 246S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
247{
248 I32 tmp = PL_savestack_ix;
249
250 PL_savestack_ix = ss;
251 regcppop();
252 PL_savestack_ix = tmp;
942e002e 253 return Nullch;
0f5d15d6
IZ
254}
255
256typedef struct re_cc_state
257{
258 I32 ss;
259 regnode *node;
260 struct re_cc_state *prev;
261 CURCUR *cc;
262 regexp *re;
263} re_cc_state;
264
02db2b7b 265#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 266
29d1e993
HS
267#define TRYPAREN(paren, n, input) { \
268 if (paren) { \
269 if (n) { \
270 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
271 PL_regendp[paren] = input - PL_bostr; \
272 } \
273 else \
274 PL_regendp[paren] = -1; \
275 } \
276 if (regmatch(next)) \
277 sayYES; \
278 if (paren && n) \
279 PL_regendp[paren] = -1; \
280}
281
282
a687059c 283/*
e50aee73 284 * pregexec and friends
a687059c
LW
285 */
286
287/*
c277df42 288 - pregexec - match a regexp against a string
a687059c 289 */
c277df42 290I32
864dbfa3 291Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 292 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
293/* strend: pointer to null at end of string */
294/* strbeg: real beginning of string */
295/* minend: end of match must be >=minend after stringarg. */
296/* nosave: For optimizations. */
297{
298 return
9041c2e3 299 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
300 nosave ? 0 : REXEC_COPY_STR);
301}
0f5d15d6
IZ
302
303STATIC void
cea2e8a9 304S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
305{
306 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
307#ifdef DEBUGGING
308 PL_regprogram = prog->program;
309#endif
310 PL_regnpar = prog->nparens;
9041c2e3
NIS
311 PL_regdata = prog->data;
312 PL_reg_re = prog;
0f5d15d6 313}
22e551b9 314
9041c2e3 315/*
cad2e5aa
JH
316 * Need to implement the following flags for reg_anch:
317 *
318 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
319 * USE_INTUIT_ML
320 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
321 * INTUIT_AUTORITATIVE_ML
322 * INTUIT_ONCE_NOML - Intuit can match in one location only.
323 * INTUIT_ONCE_ML
324 *
325 * Another flag for this function: SECOND_TIME (so that float substrs
326 * with giant delta may be not rechecked).
327 */
328
329/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
330
2c2d71f5 331/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
332 Otherwise, only SvCUR(sv) is used to get strbeg. */
333
334/* XXXX We assume that strpos is strbeg unless sv. */
335
6eb5f6b9
JH
336/* XXXX Some places assume that there is a fixed substring.
337 An update may be needed if optimizer marks as "INTUITable"
338 RExen without fixed substrings. Similarly, it is assumed that
339 lengths of all the strings are no more than minlen, thus they
340 cannot come from lookahead.
341 (Or minlen should take into account lookahead.) */
342
2c2d71f5
JH
343/* A failure to find a constant substring means that there is no need to make
344 an expensive call to REx engine, thus we celebrate a failure. Similarly,
345 finding a substring too deep into the string means that less calls to
30944b6d
IZ
346 regtry() should be needed.
347
348 REx compiler's optimizer found 4 possible hints:
349 a) Anchored substring;
350 b) Fixed substring;
351 c) Whether we are anchored (beginning-of-line or \G);
352 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 353 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
354 string which does not contradict any of them.
355 */
2c2d71f5 356
6eb5f6b9
JH
357/* Most of decisions we do here should have been done at compile time.
358 The nodes of the REx which we used for the search should have been
359 deleted from the finite automaton. */
360
cad2e5aa
JH
361char *
362Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
363 char *strend, U32 flags, re_scream_pos_data *data)
364{
b7953727 365 register I32 start_shift = 0;
cad2e5aa 366 /* Should be nonnegative! */
b7953727 367 register I32 end_shift = 0;
2c2d71f5
JH
368 register char *s;
369 register SV *check;
a1933d95 370 char *strbeg;
cad2e5aa
JH
371 char *t;
372 I32 ml_anch;
6eb5f6b9 373 register char *other_last = Nullch; /* other substr checked before this */
b7953727 374 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
375#ifdef DEBUGGING
376 char *i_strpos = strpos;
377#endif
cad2e5aa
JH
378
379 DEBUG_r( if (!PL_colorset) reginitcolors() );
380 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 381 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
382 PL_colors[4],PL_colors[5],PL_colors[0],
383 prog->precomp,
384 PL_colors[1],
385 (strlen(prog->precomp) > 60 ? "..." : ""),
386 PL_colors[0],
b900a521 387 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
388 strpos, PL_colors[1],
389 (strend - strpos > 60 ? "..." : ""))
390 );
391
1aa99e6b
IH
392 if (prog->reganch & ROPT_UTF8)
393 PL_reg_flags |= RF_utf8;
394
395 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 396 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 397 goto fail;
2c2d71f5 398 }
a1933d95 399 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 400 PL_regeol = strend;
653099ff 401 check = prog->check_substr;
2c2d71f5 402 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
403 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
404 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 405 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 406
7e25d62c
JH
407 if (!ml_anch) {
408 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
409 | ROPT_IMPLICIT)) /* not a real BOL */
410 /* SvCUR is not set on references: SvRV and SvPVX overlap */
411 && sv && !SvROK(sv)
412 && (strpos != strbeg)) {
413 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
414 goto fail;
415 }
416 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 417 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 418 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
419 I32 slen;
420
1aa99e6b 421 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
422 if (SvTAIL(check)) {
423 slen = SvCUR(check); /* >= 1 */
cad2e5aa 424
9041c2e3 425 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
426 || (strend - s == slen && strend[-1] != '\n')) {
427 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
428 goto fail_finish;
cad2e5aa
JH
429 }
430 /* Now should match s[0..slen-2] */
431 slen--;
653099ff 432 if (slen && (*SvPVX(check) != *s
cad2e5aa 433 || (slen > 1
653099ff 434 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
435 report_neq:
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
437 goto fail_finish;
438 }
cad2e5aa 439 }
653099ff
GS
440 else if (*SvPVX(check) != *s
441 || ((slen = SvCUR(check)) > 1
442 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
443 goto report_neq;
444 goto success_at_start;
7e25d62c 445 }
cad2e5aa 446 }
2c2d71f5 447 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 448 s = strpos;
2c2d71f5 449 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 450 end_shift = prog->minlen - start_shift -
653099ff 451 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 452 if (!ml_anch) {
653099ff
GS
453 I32 end = prog->check_offset_max + CHR_SVLEN(check)
454 - (SvTAIL(check) != 0);
1aa99e6b 455 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
456
457 if (end_shift < eshift)
458 end_shift = eshift;
459 }
cad2e5aa 460 }
2c2d71f5 461 else { /* Can match at random position */
cad2e5aa
JH
462 ml_anch = 0;
463 s = strpos;
2c2d71f5
JH
464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
465 /* Should be nonnegative! */
466 end_shift = prog->minlen - start_shift -
653099ff 467 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
468 }
469
2c2d71f5 470#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 471 if (end_shift < 0)
6bbae5e6 472 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
473#endif
474
2c2d71f5
JH
475 restart:
476 /* Find a possible match in the region s..strend by looking for
477 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 478 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
479 I32 p = -1; /* Internal iterator of scream. */
480 I32 *pp = data ? data->scream_pos : &p;
481
2c2d71f5
JH
482 if (PL_screamfirst[BmRARE(check)] >= 0
483 || ( BmRARE(check) == '\n'
484 && (BmPREVIOUS(check) == SvCUR(check) - 1)
485 && SvTAIL(check) ))
9041c2e3 486 s = screaminstr(sv, check,
2c2d71f5 487 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 488 else
2c2d71f5 489 goto fail_finish;
cad2e5aa
JH
490 if (data)
491 *data->scream_olds = s;
492 }
f33976b4 493 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
494 s = fbm_instr((U8*)(s + start_shift),
495 (U8*)(strend - end_shift),
496 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 497 else
1aa99e6b
IH
498 s = fbm_instr(HOP3(s, start_shift, strend),
499 HOP3(strend, -end_shift, strbeg),
2c2d71f5 500 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
501
502 /* Update the count-of-usability, remove useless subpatterns,
503 unshift s. */
2c2d71f5
JH
504
505 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
506 (s ? "Found" : "Did not find"),
507 ((check == prog->anchored_substr) ? "anchored" : "floating"),
508 PL_colors[0],
7b0972df
JH
509 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
510 SvPVX(check),
2c2d71f5
JH
511 PL_colors[1], (SvTAIL(check) ? "$" : ""),
512 (s ? " at offset " : "...\n") ) );
513
514 if (!s)
515 goto fail_finish;
516
6eb5f6b9
JH
517 check_at = s;
518
2c2d71f5 519 /* Finish the diagnostic message */
30944b6d 520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
521
522 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
523 Start with the other substr.
524 XXXX no SCREAM optimization yet - and a very coarse implementation
525 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
526 *always* match. Probably should be marked during compile...
527 Probably it is right to do no SCREAM here...
528 */
529
530 if (prog->float_substr && prog->anchored_substr) {
30944b6d 531 /* Take into account the "other" substring. */
2c2d71f5
JH
532 /* XXXX May be hopelessly wrong for UTF... */
533 if (!other_last)
6eb5f6b9 534 other_last = strpos;
2c2d71f5 535 if (check == prog->float_substr) {
30944b6d
IZ
536 do_other_anchored:
537 {
1aa99e6b 538 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
539 char *s1 = s;
540
2c2d71f5
JH
541 t = s - prog->check_offset_max;
542 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
543 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 544 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 545 && t > strpos)))
30944b6d 546 /* EMPTY */;
2c2d71f5
JH
547 else
548 t = strpos;
1aa99e6b 549 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
550 if (t < other_last) /* These positions already checked */
551 t = other_last;
1aa99e6b 552 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
553 if (last < last1)
554 last1 = last;
555 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
556 /* On end-of-str: see comment below. */
557 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
558 HOP3(HOP3(last1, prog->anchored_offset, strend)
559 + SvCUR(prog->anchored_substr),
560 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
561 prog->anchored_substr,
562 PL_multiline ? FBMrf_MULTILINE : 0);
563 DEBUG_r(PerlIO_printf(Perl_debug_log,
564 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
565 (s ? "Found" : "Contradicts"),
566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(prog->anchored_substr)
568 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
569 SvPVX(prog->anchored_substr),
570 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
571 if (!s) {
572 if (last1 >= last2) {
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", giving up...\n"));
575 goto fail_finish;
576 }
577 DEBUG_r(PerlIO_printf(Perl_debug_log,
578 ", trying floating at offset %ld...\n",
1aa99e6b
IH
579 (long)(HOP3c(s1, 1, strend) - i_strpos)));
580 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
581 s = HOP3c(last, 1, strend);
2c2d71f5
JH
582 goto restart;
583 }
584 else {
585 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 586 (long)(s - i_strpos)));
1aa99e6b
IH
587 t = HOP3c(s, -prog->anchored_offset, strbeg);
588 other_last = HOP3c(s, 1, strend);
30944b6d 589 s = s1;
2c2d71f5
JH
590 if (t == strpos)
591 goto try_at_start;
2c2d71f5
JH
592 goto try_at_offset;
593 }
30944b6d 594 }
2c2d71f5
JH
595 }
596 else { /* Take into account the floating substring. */
597 char *last, *last1;
598 char *s1 = s;
599
1aa99e6b
IH
600 t = HOP3c(s, -start_shift, strbeg);
601 last1 = last =
602 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
603 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
604 last = HOP3c(t, prog->float_max_offset, strend);
605 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
606 if (s < other_last)
607 s = other_last;
2c2d71f5
JH
608 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
609 /* fbm_instr() takes into account exact value of end-of-str
610 if the check is SvTAIL(ed). Since false positives are OK,
611 and end-of-str is not later than strend we are OK. */
612 s = fbm_instr((unsigned char*)s,
613 (unsigned char*)last + SvCUR(prog->float_substr)
614 - (SvTAIL(prog->float_substr)!=0),
615 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
616 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
617 (s ? "Found" : "Contradicts"),
618 PL_colors[0],
7b0972df
JH
619 (int)(SvCUR(prog->float_substr)
620 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
621 SvPVX(prog->float_substr),
622 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
623 if (!s) {
624 if (last1 == last) {
625 DEBUG_r(PerlIO_printf(Perl_debug_log,
626 ", giving up...\n"));
627 goto fail_finish;
628 }
629 DEBUG_r(PerlIO_printf(Perl_debug_log,
630 ", trying anchored starting at offset %ld...\n",
30944b6d 631 (long)(s1 + 1 - i_strpos)));
803ff556 632 other_last = last;
1aa99e6b 633 s = HOP3c(t, 1, strend);
2c2d71f5
JH
634 goto restart;
635 }
636 else {
637 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 638 (long)(s - i_strpos)));
803ff556 639 other_last = s; /* Fix this later. --Hugo */
30944b6d 640 s = s1;
2c2d71f5
JH
641 if (t == strpos)
642 goto try_at_start;
2c2d71f5
JH
643 goto try_at_offset;
644 }
645 }
cad2e5aa 646 }
2c2d71f5
JH
647
648 t = s - prog->check_offset_max;
2c2d71f5
JH
649 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
650 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
651 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
652 && t > strpos))) {
2c2d71f5
JH
653 /* Fixed substring is found far enough so that the match
654 cannot start at strpos. */
655 try_at_offset:
cad2e5aa 656 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
657 /* Eventually fbm_*() should handle this, but often
658 anchored_offset is not 0, so this check will not be wasted. */
659 /* XXXX In the code below we prefer to look for "^" even in
660 presence of anchored substrings. And we search even
661 beyond the found float position. These pessimizations
662 are historical artefacts only. */
663 find_anchor:
2c2d71f5 664 while (t < strend - prog->minlen) {
cad2e5aa 665 if (*t == '\n') {
4ee3650e 666 if (t < check_at - prog->check_offset_min) {
30944b6d 667 if (prog->anchored_substr) {
4ee3650e
GS
668 /* Since we moved from the found position,
669 we definitely contradict the found anchored
30944b6d
IZ
670 substr. Due to the above check we do not
671 contradict "check" substr.
672 Thus we can arrive here only if check substr
673 is float. Redo checking for "other"=="fixed".
674 */
9041c2e3 675 strpos = t + 1;
30944b6d
IZ
676 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
677 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
678 goto do_other_anchored;
679 }
4ee3650e
GS
680 /* We don't contradict the found floating substring. */
681 /* XXXX Why not check for STCLASS? */
cad2e5aa 682 s = t + 1;
2c2d71f5 683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 684 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
685 goto set_useful;
686 }
4ee3650e
GS
687 /* Position contradicts check-string */
688 /* XXXX probably better to look for check-string
689 than for "\n", so one should lower the limit for t? */
690 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 691 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 692 other_last = strpos = s = t + 1;
cad2e5aa
JH
693 goto restart;
694 }
695 t++;
696 }
2c2d71f5
JH
697 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
698 PL_colors[0],PL_colors[1]));
699 goto fail_finish;
cad2e5aa 700 }
f5952150
GS
701 else {
702 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
703 PL_colors[0],PL_colors[1]));
704 }
cad2e5aa
JH
705 s = t;
706 set_useful:
2c2d71f5 707 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
708 }
709 else {
f5952150 710 /* The found string does not prohibit matching at strpos,
2c2d71f5 711 - no optimization of calling REx engine can be performed,
f5952150
GS
712 unless it was an MBOL and we are not after MBOL,
713 or a future STCLASS check will fail this. */
2c2d71f5
JH
714 try_at_start:
715 /* Even in this situation we may use MBOL flag if strpos is offset
716 wrt the start of the string. */
05b4157f 717 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 718 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
719 /* May be due to an implicit anchor of m{.*foo} */
720 && !(prog->reganch & ROPT_IMPLICIT))
721 {
cad2e5aa
JH
722 t = strpos;
723 goto find_anchor;
724 }
30944b6d 725 DEBUG_r( if (ml_anch)
f5952150
GS
726 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
727 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 728 );
2c2d71f5 729 success_at_start:
30944b6d 730 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 731 && prog->check_substr /* Could be deleted already */
cad2e5aa 732 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
733 && prog->check_substr == prog->float_substr)
734 {
cad2e5aa 735 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 736 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
737 SvREFCNT_dec(prog->check_substr);
738 prog->check_substr = Nullsv; /* disable */
739 prog->float_substr = Nullsv; /* clear */
5e39e1e5 740 check = Nullsv; /* abort */
cad2e5aa 741 s = strpos;
3cf5c195
IZ
742 /* XXXX This is a remnant of the old implementation. It
743 looks wasteful, since now INTUIT can use many
6eb5f6b9 744 other heuristics. */
cad2e5aa
JH
745 prog->reganch &= ~RE_USE_INTUIT;
746 }
747 else
748 s = strpos;
749 }
750
6eb5f6b9
JH
751 /* Last resort... */
752 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
753 if (prog->regstclass) {
754 /* minlen == 0 is possible if regstclass is \b or \B,
755 and the fixed substr is ''$.
756 Since minlen is already taken into account, s+1 is before strend;
757 accidentally, minlen >= 1 guaranties no false positives at s + 1
758 even for \b or \B. But (minlen? 1 : 0) below assumes that
759 regstclass does not come from lookahead... */
760 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
761 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 762 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 763 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 764 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 765 : 1);
6eb5f6b9 766 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
767 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
768 : (prog->float_substr
769 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
770 cl_l, strend)
771 : strend);
a1933d95 772 char *startpos = strbeg;
6eb5f6b9
JH
773
774 t = s;
76384e4a 775 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 776 PL_regdata = prog->data;
76384e4a
GS
777 PL_bostr = startpos;
778 }
f33976b4 779 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
780 if (!s) {
781#ifdef DEBUGGING
b7953727 782 char *what = 0;
6eb5f6b9
JH
783#endif
784 if (endpos == strend) {
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Could not match STCLASS...\n") );
787 goto fail;
788 }
66e933ab
GS
789 DEBUG_r( PerlIO_printf(Perl_debug_log,
790 "This position contradicts STCLASS...\n") );
653099ff
GS
791 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
792 goto fail;
6eb5f6b9
JH
793 /* Contradict one of substrings */
794 if (prog->anchored_substr) {
6eb5f6b9
JH
795 if (prog->anchored_substr == check) {
796 DEBUG_r( what = "anchored" );
797 hop_and_restart:
1aa99e6b 798 s = HOP3c(t, 1, strend);
66e933ab
GS
799 if (s + start_shift + end_shift > strend) {
800 /* XXXX Should be taken into account earlier? */
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Could not match STCLASS...\n") );
803 goto fail;
804 }
5e39e1e5
HS
805 if (!check)
806 goto giveup;
6eb5f6b9 807 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 808 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
809 what, (long)(s + start_shift - i_strpos)) );
810 goto restart;
811 }
66e933ab 812 /* Have both, check_string is floating */
6eb5f6b9
JH
813 if (t + start_shift >= check_at) /* Contradicts floating=check */
814 goto retry_floating_check;
815 /* Recheck anchored substring, but not floating... */
9041c2e3 816 s = check_at;
5e39e1e5
HS
817 if (!check)
818 goto giveup;
6eb5f6b9 819 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 820 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
821 (long)(other_last - i_strpos)) );
822 goto do_other_anchored;
823 }
60e71179
GS
824 /* Another way we could have checked stclass at the
825 current position only: */
826 if (ml_anch) {
827 s = t = t + 1;
5e39e1e5
HS
828 if (!check)
829 goto giveup;
60e71179 830 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
831 "Looking for /%s^%s/m starting at offset %ld...\n",
832 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 833 goto try_at_offset;
66e933ab 834 }
60e71179
GS
835 if (!prog->float_substr) /* Could have been deleted */
836 goto fail;
6eb5f6b9
JH
837 /* Check is floating subtring. */
838 retry_floating_check:
839 t = check_at - start_shift;
840 DEBUG_r( what = "floating" );
841 goto hop_and_restart;
842 }
b7953727
JH
843 if (t != s) {
844 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 845 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
846 (long)(t - i_strpos), (long)(s - i_strpos))
847 );
848 }
849 else {
850 DEBUG_r(PerlIO_printf(Perl_debug_log,
851 "Does not contradict STCLASS...\n");
852 );
853 }
6eb5f6b9 854 }
5e39e1e5
HS
855 giveup:
856 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
857 PL_colors[4], (check ? "Guessed" : "Giving up"),
858 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 859 return s;
2c2d71f5
JH
860
861 fail_finish: /* Substring not found */
66e933ab
GS
862 if (prog->check_substr) /* could be removed already */
863 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 864 fail:
2c2d71f5 865 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
866 PL_colors[4],PL_colors[5]));
867 return Nullch;
868}
9661b544 869
6eb5f6b9 870/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
871STATIC char *
872S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 873{
6eb5f6b9
JH
874 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
875 char *m;
d8093b23
G
876 STRLEN ln;
877 unsigned int c1;
878 unsigned int c2;
6eb5f6b9
JH
879 char *e;
880 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 881 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 882
6eb5f6b9
JH
883 /* We know what class it must start with. */
884 switch (OP(c)) {
6eb5f6b9 885 case ANYOF:
a0ed51b3 886 while (s < strend) {
ffc61ed2 887 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
888 if (tmp && (norun || regtry(prog, s)))
889 goto got_it;
890 else
891 tmp = doevery;
a0ed51b3 892 }
6eb5f6b9
JH
893 else
894 tmp = 1;
ffc61ed2 895 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 896 }
6eb5f6b9 897 break;
f33976b4
DB
898 case CANY:
899 while (s < strend) {
900 if (tmp && (norun || regtry(prog, s)))
901 goto got_it;
902 else
903 tmp = doevery;
904 s++;
905 }
906 break;
6eb5f6b9
JH
907 case EXACTF:
908 m = STRING(c);
909 ln = STR_LEN(c);
1aa99e6b
IH
910 if (UTF) {
911 c1 = to_utf8_lower((U8*)m);
912 c2 = to_utf8_upper((U8*)m);
913 }
914 else {
915 c1 = *(U8*)m;
916 c2 = PL_fold[c1];
917 }
6eb5f6b9
JH
918 goto do_exactf;
919 case EXACTFL:
920 m = STRING(c);
921 ln = STR_LEN(c);
d8093b23 922 c1 = *(U8*)m;
6eb5f6b9
JH
923 c2 = PL_fold_locale[c1];
924 do_exactf:
925 e = strend - ln;
b3c9acc1 926
6eb5f6b9
JH
927 if (norun && e < s)
928 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
929
930 if (do_utf8) {
931 STRLEN len;
932 if (c1 == c2)
933 while (s <= e) {
9041c2e3 934 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
935 && regtry(prog, s) )
936 goto got_it;
937 s += len;
938 }
939 else
940 while (s <= e) {
9041c2e3 941 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
942 if ( (c == c1 || c == c2) && regtry(prog, s) )
943 goto got_it;
944 s += len;
945 }
946 }
947 else {
948 if (c1 == c2)
949 while (s <= e) {
950 if ( *(U8*)s == c1
951 && (ln == 1 || !(OP(c) == EXACTF
952 ? ibcmp(s, m, ln)
953 : ibcmp_locale(s, m, ln)))
954 && (norun || regtry(prog, s)) )
955 goto got_it;
956 s++;
957 }
958 else
959 while (s <= e) {
960 if ( (*(U8*)s == c1 || *(U8*)s == c2)
961 && (ln == 1 || !(OP(c) == EXACTF
962 ? ibcmp(s, m, ln)
963 : ibcmp_locale(s, m, ln)))
964 && (norun || regtry(prog, s)) )
965 goto got_it;
966 s++;
967 }
b3c9acc1
IZ
968 }
969 break;
bbce6d69 970 case BOUNDL:
3280af22 971 PL_reg_flags |= RF_tainted;
bbce6d69 972 /* FALL THROUGH */
a0d0e21e 973 case BOUND:
ffc61ed2 974 if (do_utf8) {
12d33761 975 if (s == PL_bostr)
ffc61ed2
JH
976 tmp = '\n';
977 else {
1aa99e6b 978 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 979
0064a8a9
JH
980 if (s > (char*)r)
981 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
982 }
983 tmp = ((OP(c) == BOUND ?
9041c2e3 984 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 985 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
986 while (s < strend) {
987 if (tmp == !(OP(c) == BOUND ?
3568d838 988 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
989 isALNUM_LC_utf8((U8*)s)))
990 {
991 tmp = !tmp;
992 if ((norun || regtry(prog, s)))
993 goto got_it;
994 }
995 s += UTF8SKIP(s);
a687059c 996 }
a0d0e21e 997 }
667bb95a 998 else {
12d33761 999 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1000 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1001 while (s < strend) {
1002 if (tmp ==
1003 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1004 tmp = !tmp;
1005 if ((norun || regtry(prog, s)))
1006 goto got_it;
1007 }
1008 s++;
a0ed51b3 1009 }
a0ed51b3 1010 }
6eb5f6b9 1011 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1012 goto got_it;
1013 break;
bbce6d69 1014 case NBOUNDL:
3280af22 1015 PL_reg_flags |= RF_tainted;
bbce6d69 1016 /* FALL THROUGH */
a0d0e21e 1017 case NBOUND:
ffc61ed2 1018 if (do_utf8) {
12d33761 1019 if (s == PL_bostr)
ffc61ed2
JH
1020 tmp = '\n';
1021 else {
1aa99e6b 1022 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1023
0064a8a9
JH
1024 if (s > (char*)r)
1025 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1026 }
1027 tmp = ((OP(c) == NBOUND ?
9041c2e3 1028 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1029 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1030 while (s < strend) {
1031 if (tmp == !(OP(c) == NBOUND ?
3568d838 1032 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1033 isALNUM_LC_utf8((U8*)s)))
1034 tmp = !tmp;
1035 else if ((norun || regtry(prog, s)))
1036 goto got_it;
1037 s += UTF8SKIP(s);
1038 }
a0d0e21e 1039 }
667bb95a 1040 else {
12d33761 1041 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1042 tmp = ((OP(c) == NBOUND ?
1043 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1044 while (s < strend) {
1045 if (tmp ==
1046 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1047 tmp = !tmp;
1048 else if ((norun || regtry(prog, s)))
1049 goto got_it;
1050 s++;
1051 }
a0ed51b3 1052 }
6eb5f6b9 1053 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1054 goto got_it;
1055 break;
a0d0e21e 1056 case ALNUM:
ffc61ed2 1057 if (do_utf8) {
8269fa76 1058 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1059 while (s < strend) {
3568d838 1060 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1061 if (tmp && (norun || regtry(prog, s)))
1062 goto got_it;
1063 else
1064 tmp = doevery;
1065 }
bbce6d69 1066 else
ffc61ed2
JH
1067 tmp = 1;
1068 s += UTF8SKIP(s);
bbce6d69 1069 }
bbce6d69 1070 }
ffc61ed2
JH
1071 else {
1072 while (s < strend) {
1073 if (isALNUM(*s)) {
1074 if (tmp && (norun || regtry(prog, s)))
1075 goto got_it;
1076 else
1077 tmp = doevery;
1078 }
a0ed51b3 1079 else
ffc61ed2
JH
1080 tmp = 1;
1081 s++;
a0ed51b3 1082 }
a0ed51b3
LW
1083 }
1084 break;
bbce6d69 1085 case ALNUML:
3280af22 1086 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1087 if (do_utf8) {
1088 while (s < strend) {
1089 if (isALNUM_LC_utf8((U8*)s)) {
1090 if (tmp && (norun || regtry(prog, s)))
1091 goto got_it;
1092 else
1093 tmp = doevery;
1094 }
a687059c 1095 else
ffc61ed2
JH
1096 tmp = 1;
1097 s += UTF8SKIP(s);
a0d0e21e 1098 }
a0d0e21e 1099 }
ffc61ed2
JH
1100 else {
1101 while (s < strend) {
1102 if (isALNUM_LC(*s)) {
1103 if (tmp && (norun || regtry(prog, s)))
1104 goto got_it;
1105 else
1106 tmp = doevery;
1107 }
a0ed51b3 1108 else
ffc61ed2
JH
1109 tmp = 1;
1110 s++;
a0ed51b3 1111 }
a0ed51b3
LW
1112 }
1113 break;
a0d0e21e 1114 case NALNUM:
ffc61ed2 1115 if (do_utf8) {
8269fa76 1116 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1117 while (s < strend) {
3568d838 1118 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1119 if (tmp && (norun || regtry(prog, s)))
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
bbce6d69 1124 else
ffc61ed2
JH
1125 tmp = 1;
1126 s += UTF8SKIP(s);
bbce6d69 1127 }
bbce6d69 1128 }
ffc61ed2
JH
1129 else {
1130 while (s < strend) {
1131 if (!isALNUM(*s)) {
1132 if (tmp && (norun || regtry(prog, s)))
1133 goto got_it;
1134 else
1135 tmp = doevery;
1136 }
a0ed51b3 1137 else
ffc61ed2
JH
1138 tmp = 1;
1139 s++;
a0ed51b3 1140 }
a0ed51b3
LW
1141 }
1142 break;
bbce6d69 1143 case NALNUML:
3280af22 1144 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1145 if (do_utf8) {
1146 while (s < strend) {
1147 if (!isALNUM_LC_utf8((U8*)s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
a687059c 1153 else
ffc61ed2
JH
1154 tmp = 1;
1155 s += UTF8SKIP(s);
a687059c 1156 }
a0d0e21e 1157 }
ffc61ed2
JH
1158 else {
1159 while (s < strend) {
1160 if (!isALNUM_LC(*s)) {
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
a0ed51b3 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s++;
a0ed51b3 1169 }
a0ed51b3
LW
1170 }
1171 break;
a0d0e21e 1172 case SPACE:
ffc61ed2 1173 if (do_utf8) {
8269fa76 1174 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1175 while (s < strend) {
3568d838 1176 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1177 if (tmp && (norun || regtry(prog, s)))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
a0d0e21e 1182 else
ffc61ed2
JH
1183 tmp = 1;
1184 s += UTF8SKIP(s);
2304df62 1185 }
a0d0e21e 1186 }
ffc61ed2
JH
1187 else {
1188 while (s < strend) {
1189 if (isSPACE(*s)) {
1190 if (tmp && (norun || regtry(prog, s)))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
a0ed51b3 1195 else
ffc61ed2
JH
1196 tmp = 1;
1197 s++;
a0ed51b3 1198 }
a0ed51b3
LW
1199 }
1200 break;
bbce6d69 1201 case SPACEL:
3280af22 1202 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1203 if (do_utf8) {
1204 while (s < strend) {
1205 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1206 if (tmp && (norun || regtry(prog, s)))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
bbce6d69 1211 else
ffc61ed2
JH
1212 tmp = 1;
1213 s += UTF8SKIP(s);
bbce6d69 1214 }
bbce6d69 1215 }
ffc61ed2
JH
1216 else {
1217 while (s < strend) {
1218 if (isSPACE_LC(*s)) {
1219 if (tmp && (norun || regtry(prog, s)))
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
a0ed51b3 1224 else
ffc61ed2
JH
1225 tmp = 1;
1226 s++;
a0ed51b3 1227 }
a0ed51b3
LW
1228 }
1229 break;
a0d0e21e 1230 case NSPACE:
ffc61ed2 1231 if (do_utf8) {
8269fa76 1232 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1233 while (s < strend) {
3568d838 1234 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1235 if (tmp && (norun || regtry(prog, s)))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
a0d0e21e 1240 else
ffc61ed2
JH
1241 tmp = 1;
1242 s += UTF8SKIP(s);
a687059c 1243 }
a0d0e21e 1244 }
ffc61ed2
JH
1245 else {
1246 while (s < strend) {
1247 if (!isSPACE(*s)) {
1248 if (tmp && (norun || regtry(prog, s)))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a0ed51b3 1253 else
ffc61ed2
JH
1254 tmp = 1;
1255 s++;
a0ed51b3 1256 }
a0ed51b3
LW
1257 }
1258 break;
bbce6d69 1259 case NSPACEL:
3280af22 1260 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1261 if (do_utf8) {
1262 while (s < strend) {
1263 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
bbce6d69 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s += UTF8SKIP(s);
bbce6d69 1272 }
bbce6d69 1273 }
ffc61ed2
JH
1274 else {
1275 while (s < strend) {
1276 if (!isSPACE_LC(*s)) {
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0ed51b3 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s++;
a0ed51b3 1285 }
a0ed51b3
LW
1286 }
1287 break;
a0d0e21e 1288 case DIGIT:
ffc61ed2 1289 if (do_utf8) {
8269fa76 1290 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1291 while (s < strend) {
3568d838 1292 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
a0d0e21e 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s += UTF8SKIP(s);
2b69d0c2 1301 }
a0d0e21e 1302 }
ffc61ed2
JH
1303 else {
1304 while (s < strend) {
1305 if (isDIGIT(*s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a0ed51b3 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s++;
a0ed51b3 1314 }
a0ed51b3
LW
1315 }
1316 break;
b8c5462f
JH
1317 case DIGITL:
1318 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1319 if (do_utf8) {
1320 while (s < strend) {
1321 if (isDIGIT_LC_utf8((U8*)s)) {
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
b8c5462f 1327 else
ffc61ed2
JH
1328 tmp = 1;
1329 s += UTF8SKIP(s);
b8c5462f 1330 }
b8c5462f 1331 }
ffc61ed2
JH
1332 else {
1333 while (s < strend) {
1334 if (isDIGIT_LC(*s)) {
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
b8c5462f 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s++;
b8c5462f 1343 }
b8c5462f
JH
1344 }
1345 break;
a0d0e21e 1346 case NDIGIT:
ffc61ed2 1347 if (do_utf8) {
8269fa76 1348 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1349 while (s < strend) {
3568d838 1350 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
a0d0e21e 1356 else
ffc61ed2
JH
1357 tmp = 1;
1358 s += UTF8SKIP(s);
a687059c 1359 }
a0d0e21e 1360 }
ffc61ed2
JH
1361 else {
1362 while (s < strend) {
1363 if (!isDIGIT(*s)) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
a0ed51b3 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s++;
a0ed51b3 1372 }
a0ed51b3
LW
1373 }
1374 break;
b8c5462f
JH
1375 case NDIGITL:
1376 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1377 if (do_utf8) {
1378 while (s < strend) {
1379 if (!isDIGIT_LC_utf8((U8*)s)) {
1380 if (tmp && (norun || regtry(prog, s)))
1381 goto got_it;
1382 else
1383 tmp = doevery;
1384 }
b8c5462f 1385 else
ffc61ed2
JH
1386 tmp = 1;
1387 s += UTF8SKIP(s);
b8c5462f 1388 }
a0ed51b3 1389 }
ffc61ed2
JH
1390 else {
1391 while (s < strend) {
1392 if (!isDIGIT_LC(*s)) {
1393 if (tmp && (norun || regtry(prog, s)))
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
cf93c79d 1398 else
ffc61ed2
JH
1399 tmp = 1;
1400 s++;
b8c5462f 1401 }
b8c5462f
JH
1402 }
1403 break;
b3c9acc1 1404 default:
3c3eec57
GS
1405 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1406 break;
d6a28714 1407 }
6eb5f6b9
JH
1408 return 0;
1409 got_it:
1410 return s;
1411}
1412
1413/*
1414 - regexec_flags - match a regexp against a string
1415 */
1416I32
1417Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1418 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1419/* strend: pointer to null at end of string */
1420/* strbeg: real beginning of string */
1421/* minend: end of match must be >=minend after stringarg. */
1422/* data: May be used for some additional optimizations. */
1423/* nosave: For optimizations. */
1424{
6eb5f6b9
JH
1425 register char *s;
1426 register regnode *c;
1427 register char *startpos = stringarg;
6eb5f6b9
JH
1428 I32 minlen; /* must match at least this many chars */
1429 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1430 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1431 constant substr. */ /* CC */
1432 I32 end_shift = 0; /* Same for the end. */ /* CC */
1433 I32 scream_pos = -1; /* Internal iterator of scream. */
1434 char *scream_olds;
1435 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1436 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1437
1438 PL_regcc = 0;
1439
1440 cache_re(prog);
1441#ifdef DEBUGGING
aea4f609 1442 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1443#endif
1444
1445 /* Be paranoid... */
1446 if (prog == NULL || startpos == NULL) {
1447 Perl_croak(aTHX_ "NULL regexp parameter");
1448 return 0;
1449 }
1450
1451 minlen = prog->minlen;
f33976b4 1452 if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
3baa4c62 1453 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1454 }
1455 else {
f33976b4 1456 if (strend - startpos < minlen) goto phooey;
1aa99e6b 1457 }
6eb5f6b9 1458
6eb5f6b9
JH
1459 /* Check validity of program. */
1460 if (UCHARAT(prog->program) != REG_MAGIC) {
1461 Perl_croak(aTHX_ "corrupted regexp program");
1462 }
1463
1464 PL_reg_flags = 0;
1465 PL_reg_eval_set = 0;
1466 PL_reg_maxiter = 0;
1467
1468 if (prog->reganch & ROPT_UTF8)
1469 PL_reg_flags |= RF_utf8;
1470
1471 /* Mark beginning of line for ^ and lookbehind. */
1472 PL_regbol = startpos;
1473 PL_bostr = strbeg;
1474 PL_reg_sv = sv;
1475
1476 /* Mark end of line for $ (and such) */
1477 PL_regeol = strend;
1478
1479 /* see how far we have to get to not match where we matched before */
1480 PL_regtill = startpos+minend;
1481
1482 /* We start without call_cc context. */
1483 PL_reg_call_cc = 0;
1484
1485 /* If there is a "must appear" string, look for it. */
1486 s = startpos;
1487
1488 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1489 MAGIC *mg;
1490
1491 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1492 PL_reg_ganch = startpos;
1493 else if (sv && SvTYPE(sv) >= SVt_PVMG
1494 && SvMAGIC(sv)
14befaf4
DM
1495 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1496 && mg->mg_len >= 0) {
6eb5f6b9
JH
1497 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1498 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1499 if (s > PL_reg_ganch)
6eb5f6b9
JH
1500 goto phooey;
1501 s = PL_reg_ganch;
1502 }
1503 }
1504 else /* pos() not defined */
1505 PL_reg_ganch = strbeg;
1506 }
1507
699c3c34
JH
1508 if (do_utf8 == (UTF!=0) &&
1509 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1510 re_scream_pos_data d;
1511
1512 d.scream_olds = &scream_olds;
1513 d.scream_pos = &scream_pos;
1514 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1515 if (!s)
1516 goto phooey; /* not present */
1517 }
1518
1519 DEBUG_r( if (!PL_colorset) reginitcolors() );
1520 DEBUG_r(PerlIO_printf(Perl_debug_log,
1521 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1522 PL_colors[4],PL_colors[5],PL_colors[0],
1523 prog->precomp,
1524 PL_colors[1],
1525 (strlen(prog->precomp) > 60 ? "..." : ""),
1526 PL_colors[0],
1527 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1528 startpos, PL_colors[1],
1529 (strend - startpos > 60 ? "..." : ""))
1530 );
1531
1532 /* Simplest case: anchored match need be tried only once. */
1533 /* [unless only anchor is BOL and multiline is set] */
1534 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1535 if (s == startpos && regtry(prog, startpos))
1536 goto got_it;
1537 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1538 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1539 {
1540 char *end;
1541
1542 if (minlen)
1543 dontbother = minlen - 1;
1aa99e6b 1544 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1545 /* for multiline we only have to try after newlines */
1546 if (prog->check_substr) {
1547 if (s == startpos)
1548 goto after_try;
1549 while (1) {
1550 if (regtry(prog, s))
1551 goto got_it;
1552 after_try:
1553 if (s >= end)
1554 goto phooey;
1555 if (prog->reganch & RE_USE_INTUIT) {
1556 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1557 if (!s)
1558 goto phooey;
1559 }
1560 else
1561 s++;
1562 }
1563 } else {
1564 if (s > startpos)
1565 s--;
1566 while (s < end) {
1567 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1568 if (regtry(prog, s))
1569 goto got_it;
1570 }
1571 }
1572 }
1573 }
1574 goto phooey;
1575 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1576 if (regtry(prog, PL_reg_ganch))
1577 goto got_it;
1578 goto phooey;
1579 }
1580
1581 /* Messy cases: unanchored match. */
9041c2e3 1582 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1583 /* we have /x+whatever/ */
1584 /* it must be a one character string (XXXX Except UTF?) */
1585 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1586#ifdef DEBUGGING
1587 int did_match = 0;
1588#endif
1589
1aa99e6b 1590 if (do_utf8) {
6eb5f6b9
JH
1591 while (s < strend) {
1592 if (*s == ch) {
bf93d4cc 1593 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1594 if (regtry(prog, s)) goto got_it;
1595 s += UTF8SKIP(s);
1596 while (s < strend && *s == ch)
1597 s += UTF8SKIP(s);
1598 }
1599 s += UTF8SKIP(s);
1600 }
1601 }
1602 else {
1603 while (s < strend) {
1604 if (*s == ch) {
bf93d4cc 1605 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1606 if (regtry(prog, s)) goto got_it;
1607 s++;
1608 while (s < strend && *s == ch)
1609 s++;
1610 }
1611 s++;
1612 }
1613 }
b7953727 1614 DEBUG_r(if (!did_match)
bf93d4cc 1615 PerlIO_printf(Perl_debug_log,
b7953727
JH
1616 "Did not find anchored character...\n")
1617 );
6eb5f6b9
JH
1618 }
1619 /*SUPPRESS 560*/
1aa99e6b
IH
1620 else if (do_utf8 == (UTF!=0) &&
1621 (prog->anchored_substr != Nullsv
9041c2e3 1622 || (prog->float_substr != Nullsv
1aa99e6b 1623 && prog->float_max_offset < strend - s))) {
9041c2e3 1624 SV *must = prog->anchored_substr
6eb5f6b9 1625 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1626 I32 back_max =
6eb5f6b9 1627 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1628 I32 back_min =
6eb5f6b9 1629 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1630 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1631 -(I32)(CHR_SVLEN(must)
1aa99e6b 1632 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1633 char *last1; /* Last position checked before */
bf93d4cc
GS
1634#ifdef DEBUGGING
1635 int did_match = 0;
1636#endif
6eb5f6b9
JH
1637
1638 if (s > PL_bostr)
1639 last1 = HOPc(s, -1);
1640 else
1641 last1 = s - 1; /* bogus */
1642
1643 /* XXXX check_substr already used to find `s', can optimize if
1644 check_substr==must. */
1645 scream_pos = -1;
1646 dontbother = end_shift;
1647 strend = HOPc(strend, -dontbother);
1648 while ( (s <= last) &&
9041c2e3 1649 ((flags & REXEC_SCREAM)
1aa99e6b 1650 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1651 end_shift, &scream_pos, 0))
1aa99e6b 1652 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1653 (unsigned char*)strend, must,
6eb5f6b9 1654 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1655 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1656 if (HOPc(s, -back_max) > last1) {
1657 last1 = HOPc(s, -back_min);
1658 s = HOPc(s, -back_max);
1659 }
1660 else {
1661 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1662
1663 last1 = HOPc(s, -back_min);
1664 s = t;
1665 }
1aa99e6b 1666 if (do_utf8) {
6eb5f6b9
JH
1667 while (s <= last1) {
1668 if (regtry(prog, s))
1669 goto got_it;
1670 s += UTF8SKIP(s);
1671 }
1672 }
1673 else {
1674 while (s <= last1) {
1675 if (regtry(prog, s))
1676 goto got_it;
1677 s++;
1678 }
1679 }
1680 }
b7953727
JH
1681 DEBUG_r(if (!did_match)
1682 PerlIO_printf(Perl_debug_log,
1683 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1684 ((must == prog->anchored_substr)
1685 ? "anchored" : "floating"),
1686 PL_colors[0],
1687 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1688 SvPVX(must),
b7953727
JH
1689 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1690 );
6eb5f6b9
JH
1691 goto phooey;
1692 }
155aba94 1693 else if ((c = prog->regstclass)) {
66e933ab
GS
1694 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1695 /* don't bother with what can't match */
6eb5f6b9 1696 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1697 DEBUG_r({
1698 SV *prop = sv_newmortal();
1699 regprop(prop, c);
1700 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1701 });
6eb5f6b9
JH
1702 if (find_byclass(prog, c, s, strend, startpos, 0))
1703 goto got_it;
bf93d4cc 1704 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1705 }
1706 else {
1707 dontbother = 0;
1708 if (prog->float_substr != Nullsv) { /* Trim the end. */
1709 char *last;
d6a28714
JH
1710
1711 if (flags & REXEC_SCREAM) {
1712 last = screaminstr(sv, prog->float_substr, s - strbeg,
1713 end_shift, &scream_pos, 1); /* last one */
1714 if (!last)
ffc61ed2 1715 last = scream_olds; /* Only one occurrence. */
b8c5462f 1716 }
d6a28714
JH
1717 else {
1718 STRLEN len;
1719 char *little = SvPV(prog->float_substr, len);
1720
1721 if (SvTAIL(prog->float_substr)) {
1722 if (memEQ(strend - len + 1, little, len - 1))
1723 last = strend - len + 1;
1724 else if (!PL_multiline)
9041c2e3 1725 last = memEQ(strend - len, little, len)
d6a28714 1726 ? strend - len : Nullch;
b8c5462f 1727 else
d6a28714
JH
1728 goto find_last;
1729 } else {
1730 find_last:
9041c2e3 1731 if (len)
d6a28714 1732 last = rninstr(s, strend, little, little + len);
b8c5462f 1733 else
d6a28714 1734 last = strend; /* matching `$' */
b8c5462f 1735 }
b8c5462f 1736 }
bf93d4cc
GS
1737 if (last == NULL) {
1738 DEBUG_r(PerlIO_printf(Perl_debug_log,
1739 "%sCan't trim the tail, match fails (should not happen)%s\n",
1740 PL_colors[4],PL_colors[5]));
1741 goto phooey; /* Should not happen! */
1742 }
d6a28714
JH
1743 dontbother = strend - last + prog->float_min_offset;
1744 }
1745 if (minlen && (dontbother < minlen))
1746 dontbother = minlen - 1;
1747 strend -= dontbother; /* this one's always in bytes! */
1748 /* We don't know much -- general case. */
1aa99e6b 1749 if (do_utf8) {
d6a28714
JH
1750 for (;;) {
1751 if (regtry(prog, s))
1752 goto got_it;
1753 if (s >= strend)
1754 break;
b8c5462f 1755 s += UTF8SKIP(s);
d6a28714
JH
1756 };
1757 }
1758 else {
1759 do {
1760 if (regtry(prog, s))
1761 goto got_it;
1762 } while (s++ < strend);
1763 }
1764 }
1765
1766 /* Failure. */
1767 goto phooey;
1768
1769got_it:
1770 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1771
1772 if (PL_reg_eval_set) {
1773 /* Preserve the current value of $^R */
1774 if (oreplsv != GvSV(PL_replgv))
1775 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1776 restored, the value remains
1777 the same. */
1778 restore_pos(aTHXo_ 0);
1779 }
1780
1781 /* make sure $`, $&, $', and $digit will work later */
1782 if ( !(flags & REXEC_NOT_FIRST) ) {
1783 if (RX_MATCH_COPIED(prog)) {
1784 Safefree(prog->subbeg);
1785 RX_MATCH_COPIED_off(prog);
1786 }
1787 if (flags & REXEC_COPY_STR) {
1788 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1789
1790 s = savepvn(strbeg, i);
1791 prog->subbeg = s;
1792 prog->sublen = i;
1793 RX_MATCH_COPIED_on(prog);
1794 }
1795 else {
1796 prog->subbeg = strbeg;
1797 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1798 }
1799 }
9041c2e3 1800
d6a28714
JH
1801 return 1;
1802
1803phooey:
bf93d4cc
GS
1804 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1805 PL_colors[4],PL_colors[5]));
d6a28714
JH
1806 if (PL_reg_eval_set)
1807 restore_pos(aTHXo_ 0);
1808 return 0;
1809}
1810
1811/*
1812 - regtry - try match at specific point
1813 */
1814STATIC I32 /* 0 failure, 1 success */
1815S_regtry(pTHX_ regexp *prog, char *startpos)
1816{
d6a28714
JH
1817 register I32 i;
1818 register I32 *sp;
1819 register I32 *ep;
1820 CHECKPOINT lastcp;
1821
02db2b7b
IZ
1822#ifdef DEBUGGING
1823 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1824#endif
d6a28714
JH
1825 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1826 MAGIC *mg;
1827
1828 PL_reg_eval_set = RS_init;
1829 DEBUG_r(DEBUG_s(
b900a521
JH
1830 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1831 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1832 ));
e8347627 1833 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1834 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1835 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1836 SAVETMPS;
1837 /* Apparently this is not needed, judging by wantarray. */
e8347627 1838 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1839 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1840
1841 if (PL_reg_sv) {
1842 /* Make $_ available to executed code. */
1843 if (PL_reg_sv != DEFSV) {
1844 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1845 SAVESPTR(DEFSV);
1846 DEFSV = PL_reg_sv;
b8c5462f 1847 }
d6a28714 1848
9041c2e3 1849 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1850 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1851 /* prepare for quick setting of pos */
14befaf4
DM
1852 sv_magic(PL_reg_sv, (SV*)0,
1853 PERL_MAGIC_regex_global, Nullch, 0);
1854 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1855 mg->mg_len = -1;
b8c5462f 1856 }
d6a28714
JH
1857 PL_reg_magic = mg;
1858 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1859 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1860 }
09687e5a 1861 if (!PL_reg_curpm) {
0f79a09d 1862 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1863#ifdef USE_ITHREADS
1864 {
1865 SV* repointer = newSViv(0);
577e12cc 1866 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1867 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1868 av_push(PL_regex_padav,repointer);
1869 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1870 PL_regex_pad = AvARRAY(PL_regex_padav);
1871 }
1872#endif
1873 }
aaa362c4 1874 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1875 PL_reg_oldcurpm = PL_curpm;
1876 PL_curpm = PL_reg_curpm;
1877 if (RX_MATCH_COPIED(prog)) {
1878 /* Here is a serious problem: we cannot rewrite subbeg,
1879 since it may be needed if this match fails. Thus
1880 $` inside (?{}) could fail... */
1881 PL_reg_oldsaved = prog->subbeg;
1882 PL_reg_oldsavedlen = prog->sublen;
1883 RX_MATCH_COPIED_off(prog);
1884 }
1885 else
1886 PL_reg_oldsaved = Nullch;
1887 prog->subbeg = PL_bostr;
1888 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1889 }
1890 prog->startp[0] = startpos - PL_bostr;
1891 PL_reginput = startpos;
1892 PL_regstartp = prog->startp;
1893 PL_regendp = prog->endp;
1894 PL_reglastparen = &prog->lastparen;
a01268b5 1895 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1896 prog->lastparen = 0;
1897 PL_regsize = 0;
1898 DEBUG_r(PL_reg_starttry = startpos);
1899 if (PL_reg_start_tmpl <= prog->nparens) {
1900 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1901 if(PL_reg_start_tmp)
1902 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1903 else
1904 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1905 }
1906
1907 /* XXXX What this code is doing here?!!! There should be no need
1908 to do this again and again, PL_reglastparen should take care of
3dd2943c 1909 this! --ilya*/
dafc8851
JH
1910
1911 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1912 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1913 * PL_reglastparen), is not needed at all by the test suite
1914 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1915 * enough, for building DynaLoader, or otherwise this
1916 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1917 * will happen. Meanwhile, this code *is* needed for the
1918 * above-mentioned test suite tests to succeed. The common theme
1919 * on those tests seems to be returning null fields from matches.
1920 * --jhi */
dafc8851 1921#if 1
d6a28714
JH
1922 sp = prog->startp;
1923 ep = prog->endp;
1924 if (prog->nparens) {
09e8ae3b 1925 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1926 *++sp = -1;
1927 *++ep = -1;
1928 }
1929 }
dafc8851 1930#endif
02db2b7b 1931 REGCP_SET(lastcp);
d6a28714
JH
1932 if (regmatch(prog->program + 1)) {
1933 prog->endp[0] = PL_reginput - PL_bostr;
1934 return 1;
1935 }
02db2b7b 1936 REGCP_UNWIND(lastcp);
d6a28714
JH
1937 return 0;
1938}
1939
02db2b7b
IZ
1940#define RE_UNWIND_BRANCH 1
1941#define RE_UNWIND_BRANCHJ 2
1942
1943union re_unwind_t;
1944
1945typedef struct { /* XX: makes sense to enlarge it... */
1946 I32 type;
1947 I32 prev;
1948 CHECKPOINT lastcp;
1949} re_unwind_generic_t;
1950
1951typedef struct {
1952 I32 type;
1953 I32 prev;
1954 CHECKPOINT lastcp;
1955 I32 lastparen;
1956 regnode *next;
1957 char *locinput;
1958 I32 nextchr;
1959#ifdef DEBUGGING
1960 int regindent;
1961#endif
1962} re_unwind_branch_t;
1963
1964typedef union re_unwind_t {
1965 I32 type;
1966 re_unwind_generic_t generic;
1967 re_unwind_branch_t branch;
1968} re_unwind_t;
1969
8ba1375e
MJD
1970#define sayYES goto yes
1971#define sayNO goto no
1972#define sayYES_FINAL goto yes_final
1973#define sayYES_LOUD goto yes_loud
1974#define sayNO_FINAL goto no_final
1975#define sayNO_SILENT goto do_no
1976#define saySAME(x) if (x) goto yes; else goto no
1977
1978#define REPORT_CODE_OFF 24
1979
d6a28714
JH
1980/*
1981 - regmatch - main matching routine
1982 *
1983 * Conceptually the strategy is simple: check to see whether the current
1984 * node matches, call self recursively to see whether the rest matches,
1985 * and then act accordingly. In practice we make some effort to avoid
1986 * recursion, in particular by going through "ordinary" nodes (that don't
1987 * need to know whether the rest of the match failed) by a loop instead of
1988 * by recursion.
1989 */
1990/* [lwall] I've hoisted the register declarations to the outer block in order to
1991 * maybe save a little bit of pushing and popping on the stack. It also takes
1992 * advantage of machines that use a register save mask on subroutine entry.
1993 */
1994STATIC I32 /* 0 failure, 1 success */
1995S_regmatch(pTHX_ regnode *prog)
1996{
d6a28714
JH
1997 register regnode *scan; /* Current node. */
1998 regnode *next; /* Next node. */
1999 regnode *inner; /* Next node in internal branch. */
2000 register I32 nextchr; /* renamed nextchr - nextchar colides with
2001 function of same name */
2002 register I32 n; /* no or next */
b7953727
JH
2003 register I32 ln = 0; /* len or last */
2004 register char *s = Nullch; /* operand or save */
d6a28714 2005 register char *locinput = PL_reginput;
b7953727 2006 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2007 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2008 I32 unwind = 0;
b7953727 2009#if 0
02db2b7b 2010 I32 firstcp = PL_savestack_ix;
b7953727 2011#endif
ffc61ed2 2012 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 2013
d6a28714
JH
2014#ifdef DEBUGGING
2015 PL_regindent++;
2016#endif
2017
2018 /* Note that nextchr is a byte even in UTF */
2019 nextchr = UCHARAT(locinput);
2020 scan = prog;
2021 while (scan != NULL) {
8ba1375e 2022
d6a28714
JH
2023 DEBUG_r( {
2024 SV *prop = sv_newmortal();
2025 int docolor = *PL_colors[0];
2026 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2027 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2028 /* The part of the string before starttry has one color
2029 (pref0_len chars), between starttry and current
2030 position another one (pref_len - pref0_len chars),
2031 after the current position the third one.
2032 We assume that pref0_len <= pref_len, otherwise we
2033 decrease pref0_len. */
9041c2e3 2034 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2035 ? (5 + taill) - l : locinput - PL_bostr;
2036 int pref0_len;
d6a28714 2037
1aa99e6b
IH
2038 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2039 pref_len++;
2040 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2041 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2042 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2043 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2044 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2045 l--;
d6a28714
JH
2046 if (pref0_len < 0)
2047 pref0_len = 0;
2048 if (pref0_len > pref_len)
2049 pref0_len = pref_len;
2050 regprop(prop, scan);
9041c2e3 2051 PerlIO_printf(Perl_debug_log,
b900a521 2052 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2053 (IV)(locinput - PL_bostr),
2054 PL_colors[4], pref0_len,
d6a28714 2055 locinput - pref_len, PL_colors[5],
9041c2e3 2056 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2057 locinput - pref_len + pref0_len, PL_colors[3],
2058 (docolor ? "" : "> <"),
2059 PL_colors[0], l, locinput, PL_colors[1],
2060 15 - l - pref_len + 1,
2061 "",
b900a521 2062 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2063 SvPVX(prop));
2064 } );
2065
2066 next = scan + NEXT_OFF(scan);
2067 if (next == scan)
2068 next = NULL;
2069
2070 switch (OP(scan)) {
2071 case BOL:
12d33761
HS
2072 if (locinput == PL_bostr || (PL_multiline &&
2073 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2074 {
2075 /* regtill = regbol; */
b8c5462f
JH
2076 break;
2077 }
d6a28714
JH
2078 sayNO;
2079 case MBOL:
12d33761
HS
2080 if (locinput == PL_bostr ||
2081 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2082 {
b8c5462f
JH
2083 break;
2084 }
d6a28714
JH
2085 sayNO;
2086 case SBOL:
c2a73568 2087 if (locinput == PL_bostr)
b8c5462f 2088 break;
d6a28714
JH
2089 sayNO;
2090 case GPOS:
2091 if (locinput == PL_reg_ganch)
2092 break;
2093 sayNO;
2094 case EOL:
2095 if (PL_multiline)
2096 goto meol;
2097 else
2098 goto seol;
2099 case MEOL:
2100 meol:
2101 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2102 sayNO;
b8c5462f 2103 break;
d6a28714
JH
2104 case SEOL:
2105 seol:
2106 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2107 sayNO;
d6a28714 2108 if (PL_regeol - locinput > 1)
b8c5462f 2109 sayNO;
b8c5462f 2110 break;
d6a28714
JH
2111 case EOS:
2112 if (PL_regeol != locinput)
b8c5462f 2113 sayNO;
d6a28714 2114 break;
ffc61ed2 2115 case SANY:
d6a28714 2116 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2117 sayNO;
f33976b4
DB
2118 if (do_utf8) {
2119 locinput += PL_utf8skip[nextchr];
2120 if (locinput > PL_regeol)
2121 sayNO;
2122 nextchr = UCHARAT(locinput);
2123 }
2124 else
2125 nextchr = UCHARAT(++locinput);
2126 break;
2127 case CANY:
2128 if (!nextchr && locinput >= PL_regeol)
2129 sayNO;
b8c5462f 2130 nextchr = UCHARAT(++locinput);
a0d0e21e 2131 break;
ffc61ed2 2132 case REG_ANY:
1aa99e6b
IH
2133 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2134 sayNO;
2135 if (do_utf8) {
b8c5462f 2136 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2137 if (locinput > PL_regeol)
2138 sayNO;
a0ed51b3 2139 nextchr = UCHARAT(locinput);
a0ed51b3 2140 }
1aa99e6b
IH
2141 else
2142 nextchr = UCHARAT(++locinput);
a0ed51b3 2143 break;
d6a28714 2144 case EXACT:
cd439c50
IZ
2145 s = STRING(scan);
2146 ln = STR_LEN(scan);
1aa99e6b
IH
2147 if (do_utf8 != (UTF!=0)) {
2148 char *l = locinput;
2149 char *e = s + ln;
2150 STRLEN len;
2151 if (do_utf8)
2152 while (s < e) {
2153 if (l >= PL_regeol)
2154 sayNO;
9041c2e3 2155 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2156 sayNO;
2157 s++;
2158 l += len;
2159 }
2160 else
2161 while (s < e) {
2162 if (l >= PL_regeol)
2163 sayNO;
9041c2e3 2164 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2165 sayNO;
2166 s += len;
2167 l++;
2168 }
2169 locinput = l;
2170 nextchr = UCHARAT(locinput);
2171 break;
2172 }
d6a28714
JH
2173 /* Inline the first character, for speed. */
2174 if (UCHARAT(s) != nextchr)
2175 sayNO;
2176 if (PL_regeol - locinput < ln)
2177 sayNO;
2178 if (ln > 1 && memNE(s, locinput, ln))
2179 sayNO;
2180 locinput += ln;
2181 nextchr = UCHARAT(locinput);
2182 break;
2183 case EXACTFL:
b8c5462f
JH
2184 PL_reg_flags |= RF_tainted;
2185 /* FALL THROUGH */
d6a28714 2186 case EXACTF:
cd439c50
IZ
2187 s = STRING(scan);
2188 ln = STR_LEN(scan);
d6a28714 2189
1aa99e6b 2190 if (do_utf8) {
d6a28714 2191 char *l = locinput;
1aa99e6b
IH
2192 char *e;
2193 e = s + ln;
d6a28714
JH
2194 c1 = OP(scan) == EXACTF;
2195 while (s < e) {
1aa99e6b 2196 if (l >= PL_regeol) {
d6a28714
JH
2197 sayNO;
2198 }
2b9d42f0 2199 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2200 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2201 sayNO;
2202 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2203 l += UTF8SKIP(l);
b8c5462f 2204 }
d6a28714 2205 locinput = l;
a0ed51b3
LW
2206 nextchr = UCHARAT(locinput);
2207 break;
2208 }
d6a28714
JH
2209
2210 /* Inline the first character, for speed. */
2211 if (UCHARAT(s) != nextchr &&
2212 UCHARAT(s) != ((OP(scan) == EXACTF)
2213 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2214 sayNO;
d6a28714 2215 if (PL_regeol - locinput < ln)
b8c5462f 2216 sayNO;
d6a28714
JH
2217 if (ln > 1 && (OP(scan) == EXACTF
2218 ? ibcmp(s, locinput, ln)
2219 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2220 sayNO;
d6a28714
JH
2221 locinput += ln;
2222 nextchr = UCHARAT(locinput);
a0d0e21e 2223 break;
d6a28714 2224 case ANYOF:
ffc61ed2
JH
2225 if (do_utf8) {
2226 if (!reginclass(scan, (U8*)locinput, do_utf8))
2227 sayNO;
2228 if (locinput >= PL_regeol)
2229 sayNO;
2230 locinput += PL_utf8skip[nextchr];
b8c5462f 2231 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2232 }
2233 else {
2234 if (nextchr < 0)
2235 nextchr = UCHARAT(locinput);
2236 if (!reginclass(scan, (U8*)locinput, do_utf8))
2237 sayNO;
2238 if (!nextchr && locinput >= PL_regeol)
2239 sayNO;
2240 nextchr = UCHARAT(++locinput);
2241 }
b8c5462f 2242 break;
d6a28714 2243 case ALNUML:
b8c5462f
JH
2244 PL_reg_flags |= RF_tainted;
2245 /* FALL THROUGH */
d6a28714 2246 case ALNUM:
b8c5462f 2247 if (!nextchr)
4633a7c4 2248 sayNO;
ffc61ed2 2249 if (do_utf8) {
ad24be35 2250 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2251 if (!(OP(scan) == ALNUM
3568d838 2252 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2253 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2254 {
2255 sayNO;
a0ed51b3 2256 }
b8c5462f 2257 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2258 nextchr = UCHARAT(locinput);
2259 break;
2260 }
ffc61ed2 2261 if (!(OP(scan) == ALNUM
d6a28714 2262 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2263 sayNO;
b8c5462f 2264 nextchr = UCHARAT(++locinput);
a0d0e21e 2265 break;
d6a28714 2266 case NALNUML:
b8c5462f
JH
2267 PL_reg_flags |= RF_tainted;
2268 /* FALL THROUGH */
d6a28714
JH
2269 case NALNUM:
2270 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2271 sayNO;
ffc61ed2 2272 if (do_utf8) {
8269fa76 2273 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2274 if (OP(scan) == NALNUM
3568d838 2275 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2276 : isALNUM_LC_utf8((U8*)locinput))
2277 {
b8c5462f 2278 sayNO;
d6a28714 2279 }
b8c5462f
JH
2280 locinput += PL_utf8skip[nextchr];
2281 nextchr = UCHARAT(locinput);
2282 break;
2283 }
ffc61ed2 2284 if (OP(scan) == NALNUM
d6a28714 2285 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2286 sayNO;
76e3520e 2287 nextchr = UCHARAT(++locinput);
a0d0e21e 2288 break;
d6a28714
JH
2289 case BOUNDL:
2290 case NBOUNDL:
3280af22 2291 PL_reg_flags |= RF_tainted;
bbce6d69 2292 /* FALL THROUGH */
d6a28714
JH
2293 case BOUND:
2294 case NBOUND:
2295 /* was last char in word? */
ffc61ed2 2296 if (do_utf8) {
12d33761
HS
2297 if (locinput == PL_bostr)
2298 ln = '\n';
ffc61ed2
JH
2299 else {
2300 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2301
2b9d42f0 2302 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2303 }
2304 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2305 ln = isALNUM_uni(ln);
8269fa76 2306 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2307 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2308 }
2309 else {
9041c2e3 2310 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2311 n = isALNUM_LC_utf8((U8*)locinput);
2312 }
a0ed51b3 2313 }
d6a28714 2314 else {
12d33761
HS
2315 ln = (locinput != PL_bostr) ?
2316 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2317 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2318 ln = isALNUM(ln);
2319 n = isALNUM(nextchr);
2320 }
2321 else {
2322 ln = isALNUM_LC(ln);
2323 n = isALNUM_LC(nextchr);
2324 }
d6a28714 2325 }
ffc61ed2
JH
2326 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2327 OP(scan) == BOUNDL))
2328 sayNO;
a0ed51b3 2329 break;
d6a28714 2330 case SPACEL:
3280af22 2331 PL_reg_flags |= RF_tainted;
bbce6d69 2332 /* FALL THROUGH */
d6a28714 2333 case SPACE:
9442cb0e 2334 if (!nextchr)
4633a7c4 2335 sayNO;
1aa99e6b 2336 if (do_utf8) {
fd400ab9 2337 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2338 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2339 if (!(OP(scan) == SPACE
3568d838 2340 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2341 : isSPACE_LC_utf8((U8*)locinput)))
2342 {
2343 sayNO;
2344 }
2345 locinput += PL_utf8skip[nextchr];
2346 nextchr = UCHARAT(locinput);
2347 break;
d6a28714 2348 }
ffc61ed2
JH
2349 if (!(OP(scan) == SPACE
2350 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2351 sayNO;
2352 nextchr = UCHARAT(++locinput);
2353 }
2354 else {
2355 if (!(OP(scan) == SPACE
2356 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2357 sayNO;
2358 nextchr = UCHARAT(++locinput);
a0ed51b3 2359 }
a0ed51b3 2360 break;
d6a28714 2361 case NSPACEL:
3280af22 2362 PL_reg_flags |= RF_tainted;
bbce6d69 2363 /* FALL THROUGH */
d6a28714 2364 case NSPACE:
9442cb0e 2365 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2366 sayNO;
1aa99e6b 2367 if (do_utf8) {
8269fa76 2368 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2369 if (OP(scan) == NSPACE
3568d838 2370 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2371 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2372 {
2373 sayNO;
2374 }
2375 locinput += PL_utf8skip[nextchr];
2376 nextchr = UCHARAT(locinput);
2377 break;
a0ed51b3 2378 }
ffc61ed2 2379 if (OP(scan) == NSPACE
d6a28714 2380 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2381 sayNO;
76e3520e 2382 nextchr = UCHARAT(++locinput);
a0d0e21e 2383 break;
d6a28714 2384 case DIGITL:
a0ed51b3
LW
2385 PL_reg_flags |= RF_tainted;
2386 /* FALL THROUGH */
d6a28714 2387 case DIGIT:
9442cb0e 2388 if (!nextchr)
a0ed51b3 2389 sayNO;
1aa99e6b 2390 if (do_utf8) {
8269fa76 2391 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2392 if (!(OP(scan) == DIGIT
3568d838 2393 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2394 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2395 {
a0ed51b3 2396 sayNO;
dfe13c55 2397 }
6f06b55f 2398 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2399 nextchr = UCHARAT(locinput);
2400 break;
2401 }
ffc61ed2 2402 if (!(OP(scan) == DIGIT
9442cb0e 2403 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2404 sayNO;
2405 nextchr = UCHARAT(++locinput);
2406 break;
d6a28714 2407 case NDIGITL:
b8c5462f
JH
2408 PL_reg_flags |= RF_tainted;
2409 /* FALL THROUGH */
d6a28714 2410 case NDIGIT:
9442cb0e 2411 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2412 sayNO;
1aa99e6b 2413 if (do_utf8) {
8269fa76 2414 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2415 if (OP(scan) == NDIGIT
3568d838 2416 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2417 : isDIGIT_LC_utf8((U8*)locinput))
2418 {
a0ed51b3 2419 sayNO;
9442cb0e 2420 }
6f06b55f 2421 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2422 nextchr = UCHARAT(locinput);
2423 break;
2424 }
ffc61ed2 2425 if (OP(scan) == NDIGIT
9442cb0e 2426 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2427 sayNO;
2428 nextchr = UCHARAT(++locinput);
2429 break;
2430 case CLUMP:
8269fa76 2431 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2432 if (locinput >= PL_regeol ||
2433 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2434 sayNO;
6f06b55f 2435 locinput += PL_utf8skip[nextchr];
3568d838
JH
2436 while (locinput < PL_regeol &&
2437 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2438 locinput += UTF8SKIP(locinput);
2439 if (locinput > PL_regeol)
2440 sayNO;
2441 nextchr = UCHARAT(locinput);
2442 break;
c8756f30 2443 case REFFL:
3280af22 2444 PL_reg_flags |= RF_tainted;
c8756f30 2445 /* FALL THROUGH */
c277df42 2446 case REF:
c8756f30 2447 case REFF:
c277df42 2448 n = ARG(scan); /* which paren pair */
cf93c79d 2449 ln = PL_regstartp[n];
2c2d71f5 2450 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2451 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2452 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2453 if (ln == PL_regendp[n])
a0d0e21e 2454 break;
a0ed51b3 2455
cf93c79d 2456 s = PL_bostr + ln;
1aa99e6b 2457 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2458 char *l = locinput;
cf93c79d 2459 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2460 /*
2461 * Note that we can't do the "other character" lookup trick as
2462 * in the 8-bit case (no pun intended) because in Unicode we
2463 * have to map both upper and title case to lower case.
2464 */
2465 if (OP(scan) == REFF) {
2466 while (s < e) {
2467 if (l >= PL_regeol)
2468 sayNO;
dfe13c55 2469 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2470 sayNO;
2471 s += UTF8SKIP(s);
2472 l += UTF8SKIP(l);
2473 }
2474 }
2475 else {
2476 while (s < e) {
2477 if (l >= PL_regeol)
2478 sayNO;
dfe13c55 2479 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2480 sayNO;
2481 s += UTF8SKIP(s);
2482 l += UTF8SKIP(l);
2483 }
2484 }
2485 locinput = l;
2486 nextchr = UCHARAT(locinput);
2487 break;
2488 }
2489
a0d0e21e 2490 /* Inline the first character, for speed. */
76e3520e 2491 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2492 (OP(scan) == REF ||
2493 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2494 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2495 sayNO;
cf93c79d 2496 ln = PL_regendp[n] - ln;
3280af22 2497 if (locinput + ln > PL_regeol)
4633a7c4 2498 sayNO;
c8756f30
AK
2499 if (ln > 1 && (OP(scan) == REF
2500 ? memNE(s, locinput, ln)
2501 : (OP(scan) == REFF
2502 ? ibcmp(s, locinput, ln)
2503 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2504 sayNO;
a0d0e21e 2505 locinput += ln;
76e3520e 2506 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2507 break;
2508
2509 case NOTHING:
c277df42 2510 case TAIL:
a0d0e21e
LW
2511 break;
2512 case BACK:
2513 break;
c277df42
IZ
2514 case EVAL:
2515 {
2516 dSP;
533c011a 2517 OP_4tree *oop = PL_op;
3280af22
NIS
2518 COP *ocurcop = PL_curcop;
2519 SV **ocurpad = PL_curpad;
c277df42 2520 SV *ret;
9041c2e3 2521
c277df42 2522 n = ARG(scan);
533c011a 2523 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2524 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2525 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2526 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2527
cea2e8a9 2528 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2529 SPAGAIN;
2530 ret = POPs;
2531 PUTBACK;
9041c2e3 2532
0f5d15d6
IZ
2533 PL_op = oop;
2534 PL_curpad = ocurpad;
2535 PL_curcop = ocurcop;
c277df42 2536 if (logical) {
0f5d15d6
IZ
2537 if (logical == 2) { /* Postponed subexpression. */
2538 regexp *re;
22c35a8c 2539 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2540 re_cc_state state;
0f5d15d6
IZ
2541 CHECKPOINT cp, lastcp;
2542
2543 if(SvROK(ret) || SvRMAGICAL(ret)) {
2544 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2545
2546 if(SvMAGICAL(sv))
14befaf4 2547 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2548 }
2549 if (mg) {
2550 re = (regexp *)mg->mg_obj;
df0003d4 2551 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2552 }
2553 else {
2554 STRLEN len;
2555 char *t = SvPV(ret, len);
2556 PMOP pm;
2557 char *oprecomp = PL_regprecomp;
2558 I32 osize = PL_regsize;
2559 I32 onpar = PL_regnpar;
2560
5fcd1c1b 2561 Zero(&pm, 1, PMOP);
cea2e8a9 2562 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2563 if (!(SvFLAGS(ret)
0f5d15d6 2564 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2565 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2566 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2567 PL_regprecomp = oprecomp;
2568 PL_regsize = osize;
2569 PL_regnpar = onpar;
2570 }
2571 DEBUG_r(
9041c2e3 2572 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2573 "Entering embedded `%s%.60s%s%s'\n",
2574 PL_colors[0],
2575 re->precomp,
2576 PL_colors[1],
2577 (strlen(re->precomp) > 60 ? "..." : ""))
2578 );
2579 state.node = next;
2580 state.prev = PL_reg_call_cc;
2581 state.cc = PL_regcc;
2582 state.re = PL_reg_re;
2583
2ab05381 2584 PL_regcc = 0;
9041c2e3 2585
0f5d15d6 2586 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2587 REGCP_SET(lastcp);
0f5d15d6
IZ
2588 cache_re(re);
2589 state.ss = PL_savestack_ix;
2590 *PL_reglastparen = 0;
a01268b5 2591 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2592 PL_reg_call_cc = &state;
2593 PL_reginput = locinput;
2c2d71f5
JH
2594
2595 /* XXXX This is too dramatic a measure... */
2596 PL_reg_maxiter = 0;
2597
0f5d15d6 2598 if (regmatch(re->program + 1)) {
2c914db6
IZ
2599 /* Even though we succeeded, we need to restore
2600 global variables, since we may be wrapped inside
2601 SUSPEND, thus the match may be not finished yet. */
2602
2603 /* XXXX Do this only if SUSPENDed? */
2604 PL_reg_call_cc = state.prev;
2605 PL_regcc = state.cc;
2606 PL_reg_re = state.re;
2607 cache_re(PL_reg_re);
2608
2609 /* XXXX This is too dramatic a measure... */
2610 PL_reg_maxiter = 0;
2611
2612 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2613 ReREFCNT_dec(re);
2614 regcpblow(cp);
2615 sayYES;
2616 }
0f5d15d6 2617 ReREFCNT_dec(re);
02db2b7b 2618 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2619 regcppop();
2620 PL_reg_call_cc = state.prev;
2621 PL_regcc = state.cc;
2622 PL_reg_re = state.re;
d3790889 2623 cache_re(PL_reg_re);
2c2d71f5
JH
2624
2625 /* XXXX This is too dramatic a measure... */
2626 PL_reg_maxiter = 0;
2627
0f5d15d6
IZ
2628 sayNO;
2629 }
c277df42 2630 sw = SvTRUE(ret);
0f5d15d6 2631 logical = 0;
a0ed51b3
LW
2632 }
2633 else
3280af22 2634 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2635 break;
2636 }
a0d0e21e 2637 case OPEN:
c277df42 2638 n = ARG(scan); /* which paren pair */
3280af22
NIS
2639 PL_reg_start_tmp[n] = locinput;
2640 if (n > PL_regsize)
2641 PL_regsize = n;
a0d0e21e
LW
2642 break;
2643 case CLOSE:
c277df42 2644 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2645 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2646 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2647 if (n > *PL_reglastparen)
2648 *PL_reglastparen = n;
a01268b5 2649 *PL_reglastcloseparen = n;
a0d0e21e 2650 break;
c277df42
IZ
2651 case GROUPP:
2652 n = ARG(scan); /* which paren pair */
cf93c79d 2653 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2654 break;
2655 case IFTHEN:
2c2d71f5 2656 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2657 if (sw)
2658 next = NEXTOPER(NEXTOPER(scan));
2659 else {
2660 next = scan + ARG(scan);
2661 if (OP(next) == IFTHEN) /* Fake one. */
2662 next = NEXTOPER(NEXTOPER(next));
2663 }
2664 break;
2665 case LOGICAL:
0f5d15d6 2666 logical = scan->flags;
c277df42 2667 break;
2ab05381
IZ
2668/*******************************************************************
2669 PL_regcc contains infoblock about the innermost (...)* loop, and
2670 a pointer to the next outer infoblock.
2671
2672 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2673
2674 1) After matching X, regnode for CURLYX is processed;
2675
9041c2e3 2676 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2677 regmatch() recursively with the starting point at WHILEM node;
2678
2679 3) Each hit of WHILEM node tries to match A and Z (in the order
2680 depending on the current iteration, min/max of {min,max} and
2681 greediness). The information about where are nodes for "A"
2682 and "Z" is read from the infoblock, as is info on how many times "A"
2683 was already matched, and greediness.
2684
2685 4) After A matches, the same WHILEM node is hit again.
2686
2687 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2688 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2689 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2690 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2691 of the external loop.
2692
2693 Currently present infoblocks form a tree with a stem formed by PL_curcc
2694 and whatever it mentions via ->next, and additional attached trees
2695 corresponding to temporarily unset infoblocks as in "5" above.
2696
9041c2e3 2697 In the following picture infoblocks for outer loop of
2ab05381
IZ
2698 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2699 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2700 infoblocks are drawn below the "reset" infoblock.
2701
2702 In fact in the picture below we do not show failed matches for Z and T
2703 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2704 more obvious *why* one needs to *temporary* unset infoblocks.]
2705
2706 Matched REx position InfoBlocks Comment
2707 (Y(A)*?Z)*?T x
2708 Y(A)*?Z)*?T x <- O
2709 Y (A)*?Z)*?T x <- O
2710 Y A)*?Z)*?T x <- O <- I
2711 YA )*?Z)*?T x <- O <- I
2712 YA A)*?Z)*?T x <- O <- I
2713 YAA )*?Z)*?T x <- O <- I
2714 YAA Z)*?T x <- O # Temporary unset I
2715 I
2716
2717 YAAZ Y(A)*?Z)*?T x <- O
2718 I
2719
2720 YAAZY (A)*?Z)*?T x <- O
2721 I
2722
2723 YAAZY A)*?Z)*?T x <- O <- I
2724 I
2725
2726 YAAZYA )*?Z)*?T x <- O <- I
2727 I
2728
2729 YAAZYA Z)*?T x <- O # Temporary unset I
2730 I,I
2731
2732 YAAZYAZ )*?T x <- O
2733 I,I
2734
2735 YAAZYAZ T x # Temporary unset O
2736 O
2737 I,I
2738
2739 YAAZYAZT x
2740 O
2741 I,I
2742 *******************************************************************/
a0d0e21e
LW
2743 case CURLYX: {
2744 CURCUR cc;
3280af22 2745 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2746 /* No need to save/restore up to this paren */
2747 I32 parenfloor = scan->flags;
c277df42
IZ
2748
2749 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2750 next += ARG(next);
3280af22
NIS
2751 cc.oldcc = PL_regcc;
2752 PL_regcc = &cc;
cb434fcc
IZ
2753 /* XXXX Probably it is better to teach regpush to support
2754 parenfloor > PL_regsize... */
2755 if (parenfloor > *PL_reglastparen)
2756 parenfloor = *PL_reglastparen; /* Pessimization... */
2757 cc.parenfloor = parenfloor;
a0d0e21e
LW
2758 cc.cur = -1;
2759 cc.min = ARG1(scan);
2760 cc.max = ARG2(scan);
c277df42 2761 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2762 cc.next = next;
2763 cc.minmod = minmod;
2764 cc.lastloc = 0;
3280af22 2765 PL_reginput = locinput;
a0d0e21e
LW
2766 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2767 regcpblow(cp);
3280af22 2768 PL_regcc = cc.oldcc;
4633a7c4 2769 saySAME(n);
a0d0e21e
LW
2770 }
2771 /* NOT REACHED */
2772 case WHILEM: {
2773 /*
2774 * This is really hard to understand, because after we match
2775 * what we're trying to match, we must make sure the rest of
2c2d71f5 2776 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2777 * to go back UP the parse tree by recursing ever deeper. And
2778 * if it fails, we have to reset our parent's current state
2779 * that we can try again after backing off.
2780 */
2781
c277df42 2782 CHECKPOINT cp, lastcp;
3280af22 2783 CURCUR* cc = PL_regcc;
c277df42
IZ
2784 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2785
4633a7c4 2786 n = cc->cur + 1; /* how many we know we matched */
3280af22 2787 PL_reginput = locinput;
a0d0e21e 2788
c277df42 2789 DEBUG_r(
9041c2e3
NIS
2790 PerlIO_printf(Perl_debug_log,
2791 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2792 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2793 (long)n, (long)cc->min,
c277df42
IZ
2794 (long)cc->max, (long)cc)
2795 );
4633a7c4 2796
a0d0e21e
LW
2797 /* If degenerate scan matches "", assume scan done. */
2798
579cf2c3 2799 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2800 PL_regcc = cc->oldcc;
2ab05381
IZ
2801 if (PL_regcc)
2802 ln = PL_regcc->cur;
c277df42 2803 DEBUG_r(
c3464db5
DD
2804 PerlIO_printf(Perl_debug_log,
2805 "%*s empty match detected, try continuation...\n",
3280af22 2806 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2807 );
a0d0e21e 2808 if (regmatch(cc->next))
4633a7c4 2809 sayYES;
2ab05381
IZ
2810 if (PL_regcc)
2811 PL_regcc->cur = ln;
3280af22 2812 PL_regcc = cc;
4633a7c4 2813 sayNO;
a0d0e21e
LW
2814 }
2815
2816 /* First just match a string of min scans. */
2817
2818 if (n < cc->min) {
2819 cc->cur = n;
2820 cc->lastloc = locinput;
4633a7c4
LW
2821 if (regmatch(cc->scan))
2822 sayYES;
2823 cc->cur = n - 1;
c277df42 2824 cc->lastloc = lastloc;
4633a7c4 2825 sayNO;
a0d0e21e
LW
2826 }
2827
2c2d71f5
JH
2828 if (scan->flags) {
2829 /* Check whether we already were at this position.
2830 Postpone detection until we know the match is not
2831 *that* much linear. */
2832 if (!PL_reg_maxiter) {
2833 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2834 PL_reg_leftiter = PL_reg_maxiter;
2835 }
2836 if (PL_reg_leftiter-- == 0) {
2837 I32 size = (PL_reg_maxiter + 7)/8;
2838 if (PL_reg_poscache) {
2839 if (PL_reg_poscache_size < size) {
2840 Renew(PL_reg_poscache, size, char);
2841 PL_reg_poscache_size = size;
2842 }
2843 Zero(PL_reg_poscache, size, char);
2844 }
2845 else {
2846 PL_reg_poscache_size = size;
2847 Newz(29, PL_reg_poscache, size, char);
2848 }
2849 DEBUG_r(
2850 PerlIO_printf(Perl_debug_log,
2851 "%sDetected a super-linear match, switching on caching%s...\n",
2852 PL_colors[4], PL_colors[5])
2853 );
2854 }
2855 if (PL_reg_leftiter < 0) {
2856 I32 o = locinput - PL_bostr, b;
2857
2858 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2859 b = o % 8;
2860 o /= 8;
2861 if (PL_reg_poscache[o] & (1<<b)) {
2862 DEBUG_r(
2863 PerlIO_printf(Perl_debug_log,
2864 "%*s already tried at this position...\n",
2865 REPORT_CODE_OFF+PL_regindent*2, "")
2866 );
7821416a 2867 sayNO_SILENT;
2c2d71f5
JH
2868 }
2869 PL_reg_poscache[o] |= (1<<b);
2870 }
2871 }
2872
a0d0e21e
LW
2873 /* Prefer next over scan for minimal matching. */
2874
2875 if (cc->minmod) {
3280af22 2876 PL_regcc = cc->oldcc;
2ab05381
IZ
2877 if (PL_regcc)
2878 ln = PL_regcc->cur;
5f05dabc 2879 cp = regcppush(cc->parenfloor);
02db2b7b 2880 REGCP_SET(lastcp);
5f05dabc 2881 if (regmatch(cc->next)) {
c277df42 2882 regcpblow(cp);
4633a7c4 2883 sayYES; /* All done. */
5f05dabc 2884 }
02db2b7b 2885 REGCP_UNWIND(lastcp);
5f05dabc 2886 regcppop();
2ab05381
IZ
2887 if (PL_regcc)
2888 PL_regcc->cur = ln;
3280af22 2889 PL_regcc = cc;
a0d0e21e 2890
c277df42 2891 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2892 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2893 && !(PL_reg_flags & RF_warned)) {
2894 PL_reg_flags |= RF_warned;
e476b1b5 2895 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2896 "Complex regular subexpression recursion",
2897 REG_INFTY - 1);
c277df42 2898 }
4633a7c4 2899 sayNO;
c277df42 2900 }
a687059c 2901
c277df42 2902 DEBUG_r(
c3464db5
DD
2903 PerlIO_printf(Perl_debug_log,
2904 "%*s trying longer...\n",
3280af22 2905 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2906 );
a0d0e21e 2907 /* Try scanning more and see if it helps. */
3280af22 2908 PL_reginput = locinput;
a0d0e21e
LW
2909 cc->cur = n;
2910 cc->lastloc = locinput;
5f05dabc 2911 cp = regcppush(cc->parenfloor);
02db2b7b 2912 REGCP_SET(lastcp);
5f05dabc 2913 if (regmatch(cc->scan)) {
c277df42 2914 regcpblow(cp);
4633a7c4 2915 sayYES;
5f05dabc 2916 }
02db2b7b 2917 REGCP_UNWIND(lastcp);
5f05dabc 2918 regcppop();
4633a7c4 2919 cc->cur = n - 1;
c277df42 2920 cc->lastloc = lastloc;
4633a7c4 2921 sayNO;
a0d0e21e
LW
2922 }
2923
2924 /* Prefer scan over next for maximal matching. */
2925
2926 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2927 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2928 cc->cur = n;
2929 cc->lastloc = locinput;
02db2b7b 2930 REGCP_SET(lastcp);
5f05dabc 2931 if (regmatch(cc->scan)) {
c277df42 2932 regcpblow(cp);
4633a7c4 2933 sayYES;
5f05dabc 2934 }
02db2b7b 2935 REGCP_UNWIND(lastcp);
a0d0e21e 2936 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2937 PL_reginput = locinput;
c277df42 2938 DEBUG_r(
c3464db5
DD
2939 PerlIO_printf(Perl_debug_log,
2940 "%*s failed, try continuation...\n",
3280af22 2941 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2942 );
2943 }
9041c2e3 2944 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2945 && !(PL_reg_flags & RF_warned)) {
3280af22 2946 PL_reg_flags |= RF_warned;
e476b1b5 2947 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2948 "Complex regular subexpression recursion",
2949 REG_INFTY - 1);
a0d0e21e
LW
2950 }
2951
2952 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2953 PL_regcc = cc->oldcc;
2ab05381
IZ
2954 if (PL_regcc)
2955 ln = PL_regcc->cur;
a0d0e21e 2956 if (regmatch(cc->next))
4633a7c4 2957 sayYES;
2ab05381
IZ
2958 if (PL_regcc)
2959 PL_regcc->cur = ln;
3280af22 2960 PL_regcc = cc;
4633a7c4 2961 cc->cur = n - 1;
c277df42 2962 cc->lastloc = lastloc;
4633a7c4 2963 sayNO;
a0d0e21e
LW
2964 }
2965 /* NOT REACHED */
9041c2e3 2966 case BRANCHJ:
c277df42
IZ
2967 next = scan + ARG(scan);
2968 if (next == scan)
2969 next = NULL;
2970 inner = NEXTOPER(NEXTOPER(scan));
2971 goto do_branch;
9041c2e3 2972 case BRANCH:
c277df42
IZ
2973 inner = NEXTOPER(scan);
2974 do_branch:
2975 {
c277df42
IZ
2976 c1 = OP(scan);
2977 if (OP(next) != c1) /* No choice. */
2978 next = inner; /* Avoid recursion. */
a0d0e21e 2979 else {
02db2b7b
IZ
2980 I32 lastparen = *PL_reglastparen;
2981 I32 unwind1;
2982 re_unwind_branch_t *uw;
2983
2984 /* Put unwinding data on stack */
2985 unwind1 = SSNEWt(1,re_unwind_branch_t);
2986 uw = SSPTRt(unwind1,re_unwind_branch_t);
2987 uw->prev = unwind;
2988 unwind = unwind1;
2989 uw->type = ((c1 == BRANCH)
2990 ? RE_UNWIND_BRANCH
2991 : RE_UNWIND_BRANCHJ);
2992 uw->lastparen = lastparen;
2993 uw->next = next;
2994 uw->locinput = locinput;
2995 uw->nextchr = nextchr;
2996#ifdef DEBUGGING
2997 uw->regindent = ++PL_regindent;
2998#endif
c277df42 2999
02db2b7b
IZ
3000 REGCP_SET(uw->lastcp);
3001
3002 /* Now go into the first branch */
3003 next = inner;
a687059c 3004 }
a0d0e21e
LW
3005 }
3006 break;
3007 case MINMOD:
3008 minmod = 1;
3009 break;
c277df42
IZ
3010 case CURLYM:
3011 {
00db4c45 3012 I32 l = 0;
c277df42 3013 CHECKPOINT lastcp;
9041c2e3 3014
c277df42
IZ
3015 /* We suppose that the next guy does not need
3016 backtracking: in particular, it is of constant length,
3017 and has no parenths to influence future backrefs. */
3018 ln = ARG1(scan); /* min to match */
3019 n = ARG2(scan); /* max to match */
c277df42
IZ
3020 paren = scan->flags;
3021 if (paren) {
3280af22
NIS
3022 if (paren > PL_regsize)
3023 PL_regsize = paren;
3024 if (paren > *PL_reglastparen)
3025 *PL_reglastparen = paren;
c277df42 3026 }
dc45a647 3027 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3028 if (paren)
3029 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3030 PL_reginput = locinput;
c277df42
IZ
3031 if (minmod) {
3032 minmod = 0;
3033 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3034 sayNO;
5f4b28b2 3035 if (ln && l == 0 && n >= ln
c277df42
IZ
3036 /* In fact, this is tricky. If paren, then the
3037 fact that we did/didnot match may influence
3038 future execution. */
3039 && !(paren && ln == 0))
3040 ln = n;
3280af22 3041 locinput = PL_reginput;
22c35a8c 3042 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3043 c1 = (U8)*STRING(next);
c277df42 3044 if (OP(next) == EXACTF)
22c35a8c 3045 c2 = PL_fold[c1];
c277df42 3046 else if (OP(next) == EXACTFL)
22c35a8c 3047 c2 = PL_fold_locale[c1];
c277df42
IZ
3048 else
3049 c2 = c1;
a0ed51b3
LW
3050 }
3051 else
c277df42 3052 c1 = c2 = -1000;
02db2b7b 3053 REGCP_SET(lastcp);
5f4b28b2 3054 /* This may be improved if l == 0. */
c277df42
IZ
3055 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3056 /* If it could work, try it. */
3057 if (c1 == -1000 ||
3280af22
NIS
3058 UCHARAT(PL_reginput) == c1 ||
3059 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3060 {
3061 if (paren) {
3062 if (n) {
cf93c79d
IZ
3063 PL_regstartp[paren] =
3064 HOPc(PL_reginput, -l) - PL_bostr;
3065 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3066 }
3067 else
cf93c79d 3068 PL_regendp[paren] = -1;
c277df42
IZ
3069 }
3070 if (regmatch(next))
3071 sayYES;
02db2b7b 3072 REGCP_UNWIND(lastcp);
c277df42
IZ
3073 }
3074 /* Couldn't or didn't -- move forward. */
3280af22 3075 PL_reginput = locinput;
c277df42
IZ
3076 if (regrepeat_hard(scan, 1, &l)) {
3077 ln++;
3280af22 3078 locinput = PL_reginput;
c277df42
IZ
3079 }
3080 else
3081 sayNO;
3082 }
a0ed51b3
LW
3083 }
3084 else {
c277df42
IZ
3085 n = regrepeat_hard(scan, n, &l);
3086 if (n != 0 && l == 0
3087 /* In fact, this is tricky. If paren, then the
3088 fact that we did/didnot match may influence
3089 future execution. */
3090 && !(paren && ln == 0))
3091 ln = n;
3280af22 3092 locinput = PL_reginput;
c277df42 3093 DEBUG_r(
5c0ca799 3094 PerlIO_printf(Perl_debug_log,
faccc32b 3095 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3096 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3097 (IV) n, (IV)l)
c277df42
IZ
3098 );
3099 if (n >= ln) {
22c35a8c 3100 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3101 c1 = (U8)*STRING(next);
c277df42 3102 if (OP(next) == EXACTF)
22c35a8c 3103 c2 = PL_fold[c1];
c277df42 3104 else if (OP(next) == EXACTFL)
22c35a8c 3105 c2 = PL_fold_locale[c1];
c277df42
IZ
3106 else
3107 c2 = c1;
a0ed51b3
LW
3108 }
3109 else
c277df42
IZ
3110 c1 = c2 = -1000;
3111 }
02db2b7b 3112 REGCP_SET(lastcp);
c277df42
IZ
3113 while (n >= ln) {
3114 /* If it could work, try it. */
3115 if (c1 == -1000 ||
3280af22
NIS
3116 UCHARAT(PL_reginput) == c1 ||
3117 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3118 {
3119 DEBUG_r(
c3464db5 3120 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3121 "%*s trying tail with n=%"IVdf"...\n",
3122 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3123 );
3124 if (paren) {
3125 if (n) {
cf93c79d
IZ
3126 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3127 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3128 }
a0ed51b3 3129 else
cf93c79d 3130 PL_regendp[paren] = -1;
c277df42 3131 }
a0ed51b3
LW
3132 if (regmatch(next))
3133 sayYES;
02db2b7b 3134 REGCP_UNWIND(lastcp);
a0ed51b3 3135 }
c277df42
IZ
3136 /* Couldn't or didn't -- back up. */
3137 n--;
dfe13c55 3138 locinput = HOPc(locinput, -l);
3280af22 3139 PL_reginput = locinput;
c277df42
IZ
3140 }
3141 }
3142 sayNO;
3143 break;
3144 }
3145 case CURLYN:
3146 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3147 if (paren > PL_regsize)
3148 PL_regsize = paren;
3149 if (paren > *PL_reglastparen)
3150 *PL_reglastparen = paren;
c277df42
IZ
3151 ln = ARG1(scan); /* min to match */
3152 n = ARG2(scan); /* max to match */
dc45a647 3153 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3154 goto repeat;
a0d0e21e 3155 case CURLY:
c277df42 3156 paren = 0;
a0d0e21e
LW
3157 ln = ARG1(scan); /* min to match */
3158 n = ARG2(scan); /* max to match */
dc45a647 3159 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3160 goto repeat;
3161 case STAR:
3162 ln = 0;
c277df42 3163 n = REG_INFTY;
a0d0e21e 3164 scan = NEXTOPER(scan);
c277df42 3165 paren = 0;
a0d0e21e
LW
3166 goto repeat;
3167 case PLUS:
c277df42
IZ
3168 ln = 1;
3169 n = REG_INFTY;
3170 scan = NEXTOPER(scan);
3171 paren = 0;
3172 repeat:
a0d0e21e
LW
3173 /*
3174 * Lookahead to avoid useless match attempts
3175 * when we know what character comes next.
3176 */
22c35a8c 3177 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3178 U8 *s = (U8*)STRING(next);
3179 if (!UTF) {
3180 c2 = c1 = *s;
3181 if (OP(next) == EXACTF)
3182 c2 = PL_fold[c1];
3183 else if (OP(next) == EXACTFL)
3184 c2 = PL_fold_locale[c1];
3185 }
3186 else { /* UTF */
3187 if (OP(next) == EXACTF) {
3188 c1 = to_utf8_lower(s);
3189 c2 = to_utf8_upper(s);
3190 }
3191 else {
9041c2e3 3192 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3193 }
3194 }
bbce6d69 3195 }
a0d0e21e 3196 else
bbce6d69 3197 c1 = c2 = -1000;
3280af22 3198 PL_reginput = locinput;
a0d0e21e 3199 if (minmod) {
c277df42 3200 CHECKPOINT lastcp;
a0d0e21e
LW
3201 minmod = 0;
3202 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3203 sayNO;
a0ed51b3 3204 locinput = PL_reginput;
02db2b7b 3205 REGCP_SET(lastcp);
0fe9bf95 3206 if (c1 != -1000) {
1aa99e6b 3207 char *e; /* Should not check after this */
0fe9bf95
IZ
3208 char *old = locinput;
3209
1aa99e6b 3210 if (n == REG_INFTY) {
0fe9bf95 3211 e = PL_regeol - 1;
1aa99e6b
IH
3212 if (do_utf8)
3213 while (UTF8_IS_CONTINUATION(*(U8*)e))
3214 e--;
3215 }
3216 else if (do_utf8) {
3217 int m = n - ln;
3218 for (e = locinput;
3219 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3220 e += UTF8SKIP(e);
3221 }
3222 else {
3223 e = locinput + n - ln;
3224 if (e >= PL_regeol)
3225 e = PL_regeol - 1;
3226 }
0fe9bf95 3227 while (1) {
1aa99e6b 3228 int count;
0fe9bf95 3229 /* Find place 'next' could work */
1aa99e6b
IH
3230 if (!do_utf8) {
3231 if (c1 == c2) {
3232 while (locinput <= e && *locinput != c1)
3233 locinput++;
3234 } else {
9041c2e3 3235 while (locinput <= e
1aa99e6b
IH
3236 && *locinput != c1
3237 && *locinput != c2)
3238 locinput++;
3239 }
3240 count = locinput - old;
3241 }
3242 else {
3243 STRLEN len;
3244 if (c1 == c2) {
3245 for (count = 0;
3246 locinput <= e &&
9041c2e3 3247 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3248 count++)
3249 locinput += len;
3250
3251 } else {
3252 for (count = 0; locinput <= e; count++) {
9041c2e3 3253 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3254 if (c == c1 || c == c2)
3255 break;
9041c2e3 3256 locinput += len;
1aa99e6b
IH
3257 }
3258 }
0fe9bf95 3259 }
9041c2e3 3260 if (locinput > e)
0fe9bf95
IZ
3261 sayNO;
3262 /* PL_reginput == old now */
3263 if (locinput != old) {
3264 ln = 1; /* Did some */
1aa99e6b 3265 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3266 sayNO;
3267 }
3268 /* PL_reginput == locinput now */
29d1e993 3269 TRYPAREN(paren, ln, locinput);
0fe9bf95 3270 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3271 REGCP_UNWIND(lastcp);
0fe9bf95 3272 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3273 old = locinput;
3274 if (do_utf8)
3275 locinput += UTF8SKIP(locinput);
3276 else
3277 locinput++;
0fe9bf95
IZ
3278 }
3279 }
3280 else
c277df42 3281 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3282 UV c;
3283 if (c1 != -1000) {
3284 if (do_utf8)
9041c2e3 3285 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3286 else
9041c2e3 3287 c = UCHARAT(PL_reginput);
2390ecbc
PP
3288 /* If it could work, try it. */
3289 if (c == c1 || c == c2)
3290 {
3291 TRYPAREN(paren, n, PL_reginput);
3292 REGCP_UNWIND(lastcp);
3293 }
1aa99e6b 3294 }
a0d0e21e 3295 /* If it could work, try it. */
2390ecbc 3296 else if (c1 == -1000)
bbce6d69 3297 {
29d1e993 3298 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3299 REGCP_UNWIND(lastcp);
bbce6d69 3300 }
c277df42 3301 /* Couldn't or didn't -- move forward. */
a0ed51b3 3302 PL_reginput = locinput;
a0d0e21e
LW
3303 if (regrepeat(scan, 1)) {
3304 ln++;
a0ed51b3
LW
3305 locinput = PL_reginput;
3306 }
3307 else
4633a7c4 3308 sayNO;
a0d0e21e
LW
3309 }
3310 }
3311 else {
c277df42 3312 CHECKPOINT lastcp;
a0d0e21e 3313 n = regrepeat(scan, n);
a0ed51b3 3314 locinput = PL_reginput;
22c35a8c 3315 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3316 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3317 ln = n; /* why back off? */
1aeab75a
GS
3318 /* ...because $ and \Z can match before *and* after
3319 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3320 We should back off by one in this case. */
3321 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3322 ln--;
3323 }
02db2b7b 3324 REGCP_SET(lastcp);
c277df42 3325 if (paren) {
8fa7f367 3326 UV c = 0;
c277df42 3327 while (n >= ln) {
1aa99e6b
IH
3328 if (c1 != -1000) {
3329 if (do_utf8)
9041c2e3 3330 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3331 else
9041c2e3 3332 c = UCHARAT(PL_reginput);
1aa99e6b 3333 }
c277df42 3334 /* If it could work, try it. */
1aa99e6b 3335 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3336 {
29d1e993 3337 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3338 REGCP_UNWIND(lastcp);
c277df42
IZ
3339 }
3340 /* Couldn't or didn't -- back up. */
3341 n--;
dfe13c55 3342 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3343 }
a0ed51b3
LW
3344 }
3345 else {
8fa7f367 3346 UV c = 0;
c277df42 3347 while (n >= ln) {
1aa99e6b
IH
3348 if (c1 != -1000) {
3349 if (do_utf8)
9041c2e3 3350 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3351 else
9041c2e3 3352 c = UCHARAT(PL_reginput);
1aa99e6b 3353 }
c277df42 3354 /* If it could work, try it. */
1aa99e6b 3355 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3356 {
29d1e993 3357 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3358 REGCP_UNWIND(lastcp);
c277df42
IZ
3359 }
3360 /* Couldn't or didn't -- back up. */
3361 n--;
dfe13c55 3362 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3363 }
a0d0e21e
LW
3364 }
3365 }
4633a7c4 3366 sayNO;
c277df42 3367 break;
a0d0e21e 3368 case END:
0f5d15d6
IZ
3369 if (PL_reg_call_cc) {
3370 re_cc_state *cur_call_cc = PL_reg_call_cc;
3371 CURCUR *cctmp = PL_regcc;
3372 regexp *re = PL_reg_re;
3373 CHECKPOINT cp, lastcp;
3374
3375 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3376 REGCP_SET(lastcp);
0f5d15d6
IZ
3377 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3378 the caller. */
3379 PL_reginput = locinput; /* Make position available to
3380 the callcc. */
3381 cache_re(PL_reg_call_cc->re);
3382 PL_regcc = PL_reg_call_cc->cc;
3383 PL_reg_call_cc = PL_reg_call_cc->prev;
3384 if (regmatch(cur_call_cc->node)) {
3385 PL_reg_call_cc = cur_call_cc;
3386 regcpblow(cp);
3387 sayYES;
3388 }
02db2b7b 3389 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3390 regcppop();
3391 PL_reg_call_cc = cur_call_cc;
3392 PL_regcc = cctmp;
3393 PL_reg_re = re;
3394 cache_re(re);
3395
3396 DEBUG_r(
3397 PerlIO_printf(Perl_debug_log,
3398 "%*s continuation failed...\n",
3399 REPORT_CODE_OFF+PL_regindent*2, "")
3400 );
7821416a 3401 sayNO_SILENT;
0f5d15d6 3402 }
7821416a
IZ
3403 if (locinput < PL_regtill) {
3404 DEBUG_r(PerlIO_printf(Perl_debug_log,
3405 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3406 PL_colors[4],
3407 (long)(locinput - PL_reg_starttry),
3408 (long)(PL_regtill - PL_reg_starttry),
3409 PL_colors[5]));
3410 sayNO_FINAL; /* Cannot match: too short. */
3411 }
3412 PL_reginput = locinput; /* put where regtry can find it */
3413 sayYES_FINAL; /* Success! */
7e5428c5 3414 case SUCCEED:
3280af22 3415 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3416 sayYES_LOUD; /* Success! */
c277df42
IZ
3417 case SUSPEND:
3418 n = 1;
9fe1d20c 3419 PL_reginput = locinput;
9041c2e3 3420 goto do_ifmatch;
a0d0e21e 3421 case UNLESSM:
c277df42 3422 n = 0;
a0ed51b3 3423 if (scan->flags) {
efb30f32
HS
3424 s = HOPBACKc(locinput, scan->flags);
3425 if (!s)
3426 goto say_yes;
3427 PL_reginput = s;
a0ed51b3
LW
3428 }
3429 else
3430 PL_reginput = locinput;
c277df42
IZ
3431 goto do_ifmatch;
3432 case IFMATCH:
3433 n = 1;
a0ed51b3 3434 if (scan->flags) {
efb30f32
HS
3435 s = HOPBACKc(locinput, scan->flags);
3436 if (!s)
3437 goto say_no;
3438 PL_reginput = s;
a0ed51b3
LW
3439 }
3440 else
3441 PL_reginput = locinput;
3442
c277df42 3443 do_ifmatch:
c277df42
IZ
3444 inner = NEXTOPER(NEXTOPER(scan));
3445 if (regmatch(inner) != n) {
3446 say_no:
3447 if (logical) {
3448 logical = 0;
3449 sw = 0;
3450 goto do_longjump;
a0ed51b3
LW
3451 }
3452 else
c277df42
IZ
3453 sayNO;
3454 }
3455 say_yes:
3456 if (logical) {
3457 logical = 0;
3458 sw = 1;
3459 }
fe44a5e8 3460 if (OP(scan) == SUSPEND) {
3280af22 3461 locinput = PL_reginput;
565764a8 3462 nextchr = UCHARAT(locinput);
fe44a5e8 3463 }
c277df42
IZ
3464 /* FALL THROUGH. */
3465 case LONGJMP:
3466 do_longjump:
3467 next = scan + ARG(scan);
3468 if (next == scan)
3469 next = NULL;
a0d0e21e
LW
3470 break;
3471 default:
b900a521 3472 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3473 PTR2UV(scan), OP(scan));
cea2e8a9 3474 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3475 }
02db2b7b 3476 reenter:
a0d0e21e
LW
3477 scan = next;
3478 }
a687059c 3479
a0d0e21e
LW
3480 /*
3481 * We get here only if there's trouble -- normally "case END" is
3482 * the terminating point.
3483 */
cea2e8a9 3484 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3485 /*NOTREACHED*/
4633a7c4
LW
3486 sayNO;
3487
7821416a
IZ
3488yes_loud:
3489 DEBUG_r(
3490 PerlIO_printf(Perl_debug_log,
3491 "%*s %scould match...%s\n",
3492 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3493 );
3494 goto yes;
3495yes_final:
3496 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3497 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3498yes:
3499#ifdef DEBUGGING
3280af22 3500 PL_regindent--;
4633a7c4 3501#endif
02db2b7b
IZ
3502
3503#if 0 /* Breaks $^R */
3504 if (unwind)
3505 regcpblow(firstcp);
3506#endif
4633a7c4
LW
3507 return 1;
3508
3509no:
7821416a
IZ
3510 DEBUG_r(
3511 PerlIO_printf(Perl_debug_log,
3512 "%*s %sfailed...%s\n",
3513 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3514 );
3515 goto do_no;
3516no_final:
3517do_no:
02db2b7b
IZ
3518 if (unwind) {
3519 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3520
3521 switch (uw->type) {
3522 case RE_UNWIND_BRANCH:
3523 case RE_UNWIND_BRANCHJ:
3524 {
3525 re_unwind_branch_t *uwb = &(uw->branch);
3526 I32 lastparen = uwb->lastparen;
9041c2e3 3527
02db2b7b
IZ
3528 REGCP_UNWIND(uwb->lastcp);
3529 for (n = *PL_reglastparen; n > lastparen; n--)
3530 PL_regendp[n] = -1;
3531 *PL_reglastparen = n;
3532 scan = next = uwb->next;
9041c2e3
NIS
3533 if ( !scan ||
3534 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3535 ? BRANCH : BRANCHJ) ) { /* Failure */
3536 unwind = uwb->prev;
3537#ifdef DEBUGGING
3538 PL_regindent--;
3539#endif
3540 goto do_no;
3541 }
3542 /* Have more choice yet. Reuse the same uwb. */
3543 /*SUPPRESS 560*/
3544 if ((n = (uwb->type == RE_UNWIND_BRANCH
3545 ? NEXT_OFF(next) : ARG(next))))
3546 next += n;
3547 else
3548 next = NULL; /* XXXX Needn't unwinding in this case... */
3549 uwb->next = next;
3550 next = NEXTOPER(scan);
3551 if (uwb->type == RE_UNWIND_BRANCHJ)
3552 next = NEXTOPER(next);
3553 locinput = uwb->locinput;
3554 nextchr = uwb->nextchr;
3555#ifdef DEBUGGING
3556 PL_regindent = uwb->regindent;
3557#endif
3558
3559 goto reenter;
3560 }
3561 /* NOT REACHED */
3562 default:
3563 Perl_croak(aTHX_ "regexp unwind memory corruption");
3564 }
3565 /* NOT REACHED */
3566 }
4633a7c4 3567#ifdef DEBUGGING
3280af22 3568 PL_regindent--;
4633a7c4 3569#endif
a0d0e21e 3570 return 0;
a687059c
LW
3571}
3572
3573/*
3574 - regrepeat - repeatedly match something simple, report how many
3575 */
3576/*
3577 * [This routine now assumes that it will only match on things of length 1.
3578 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3579 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3580 */
76e3520e 3581STATIC I32
cea2e8a9 3582S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3583{
a0d0e21e 3584 register char *scan;
a0d0e21e 3585 register I32 c;
3280af22 3586 register char *loceol = PL_regeol;
a0ed51b3 3587 register I32 hardcount = 0;
ffc61ed2 3588 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3589
3280af22 3590 scan = PL_reginput;
c277df42 3591 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3592 loceol = scan + max;
a0d0e21e 3593 switch (OP(p)) {
22c35a8c 3594 case REG_ANY:
1aa99e6b 3595 if (do_utf8) {
ffc61ed2 3596 loceol = PL_regeol;
1aa99e6b 3597 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3598 scan += UTF8SKIP(scan);
3599 hardcount++;
3600 }
3601 } else {
3602 while (scan < loceol && *scan != '\n')
3603 scan++;
a0ed51b3
LW
3604 }
3605 break;
ffc61ed2 3606 case SANY:
3baa4c62 3607 scan = loceol;
a0ed51b3 3608 break;
f33976b4
DB
3609 case CANY:
3610 scan = loceol;
3611 break;
bbce6d69 3612 case EXACT: /* length of string is 1 */
cd439c50 3613 c = (U8)*STRING(p);
bbce6d69 3614 while (scan < loceol && UCHARAT(scan) == c)
3615 scan++;
3616 break;
3617 case EXACTF: /* length of string is 1 */
cd439c50 3618 c = (U8)*STRING(p);
bbce6d69 3619 while (scan < loceol &&
22c35a8c 3620 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3621 scan++;
3622 break;
3623 case EXACTFL: /* length of string is 1 */
3280af22 3624 PL_reg_flags |= RF_tainted;
cd439c50 3625 c = (U8)*STRING(p);
bbce6d69 3626 while (scan < loceol &&
22c35a8c 3627 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3628 scan++;
3629 break;
3630 case ANYOF:
ffc61ed2
JH
3631 if (do_utf8) {
3632 loceol = PL_regeol;
1aa99e6b
IH
3633 while (hardcount < max && scan < loceol &&
3634 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3635 scan += UTF8SKIP(scan);
3636 hardcount++;
3637 }
3638 } else {
3639 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3640 scan++;
3641 }
a0d0e21e
LW
3642 break;
3643 case ALNUM:
1aa99e6b 3644 if (do_utf8) {
ffc61ed2 3645 loceol = PL_regeol;
8269fa76 3646 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3647 while (hardcount < max && scan < loceol &&
3568d838 3648 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3649 scan += UTF8SKIP(scan);
3650 hardcount++;
3651 }
3652 } else {
3653 while (scan < loceol && isALNUM(*scan))
3654 scan++;
a0ed51b3
LW
3655 }
3656 break;
bbce6d69 3657 case ALNUML:
3280af22 3658 PL_reg_flags |= RF_tainted;
1aa99e6b 3659 if (do_utf8) {
ffc61ed2 3660 loceol = PL_regeol;
1aa99e6b
IH
3661 while (hardcount < max && scan < loceol &&
3662 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3663 scan += UTF8SKIP(scan);
3664 hardcount++;
3665 }
3666 } else {
3667 while (scan < loceol && isALNUM_LC(*scan))
3668 scan++;
a0ed51b3
LW
3669 }
3670 break;
a0d0e21e 3671 case NALNUM:
1aa99e6b 3672 if (do_utf8) {
ffc61ed2 3673 loceol = PL_regeol;
8269fa76 3674 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3675 while (hardcount < max && scan < loceol &&
3568d838 3676 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3677 scan += UTF8SKIP(scan);
3678 hardcount++;
3679 }
3680 } else {
3681 while (scan < loceol && !isALNUM(*scan))
3682 scan++;
a0ed51b3
LW
3683 }
3684 break;
bbce6d69 3685 case NALNUML:
3280af22 3686 PL_reg_flags |= RF_tainted;
1aa99e6b 3687 if (do_utf8) {
ffc61ed2 3688 loceol = PL_regeol;
1aa99e6b
IH
3689 while (hardcount < max && scan < loceol &&
3690 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3691 scan += UTF8SKIP(scan);
3692 hardcount++;
3693 }
3694 } else {
3695 while (scan < loceol && !isALNUM_LC(*scan))
3696 scan++;
a0ed51b3
LW
3697 }
3698 break;
a0d0e21e 3699 case SPACE:
1aa99e6b 3700 if (do_utf8) {
ffc61ed2 3701 loceol = PL_regeol;
8269fa76 3702 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3703 while (hardcount < max && scan < loceol &&
3568d838
JH
3704 (*scan == ' ' ||
3705 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3706 scan += UTF8SKIP(scan);
3707 hardcount++;
3708 }
3709 } else {
3710 while (scan < loceol && isSPACE(*scan))
3711 scan++;
a0ed51b3
LW
3712 }
3713 break;
bbce6d69 3714 case SPACEL:
3280af22 3715 PL_reg_flags |= RF_tainted;
1aa99e6b 3716 if (do_utf8) {
ffc61ed2 3717 loceol = PL_regeol;
1aa99e6b 3718 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3719 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3720 scan += UTF8SKIP(scan);
3721 hardcount++;
3722 }
3723 } else {
3724 while (scan < loceol && isSPACE_LC(*scan))
3725 scan++;
a0ed51b3
LW
3726 }
3727 break;
a0d0e21e 3728 case NSPACE:
1aa99e6b 3729 if (do_utf8) {
ffc61ed2 3730 loceol = PL_regeol;
8269fa76 3731 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3732 while (hardcount < max && scan < loceol &&
3568d838
JH
3733 !(*scan == ' ' ||
3734 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3735 scan += UTF8SKIP(scan);
3736 hardcount++;
3737 }
3738 } else {
3739 while (scan < loceol && !isSPACE(*scan))
3740 scan++;
3741 break;
a0ed51b3 3742 }
bbce6d69 3743 case NSPACEL:
3280af22 3744 PL_reg_flags |= RF_tainted;
1aa99e6b 3745 if (do_utf8) {
ffc61ed2 3746 loceol = PL_regeol;
1aa99e6b 3747 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3748 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3749 scan += UTF8SKIP(scan);
3750 hardcount++;
3751 }
3752 } else {
3753 while (scan < loceol && !isSPACE_LC(*scan))
3754 scan++;
a0ed51b3
LW
3755 }
3756 break;
a0d0e21e 3757 case DIGIT:
1aa99e6b 3758 if (do_utf8) {
ffc61ed2 3759 loceol = PL_regeol;
8269fa76 3760 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3761 while (hardcount < max && scan < loceol &&
3568d838 3762 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3763 scan += UTF8SKIP(scan);
3764 hardcount++;
3765 }
3766 } else {
3767 while (scan < loceol && isDIGIT(*scan))
3768 scan++;
a0ed51b3
LW
3769 }
3770 break;
a0d0e21e 3771 case NDIGIT:
1aa99e6b 3772 if (do_utf8) {
ffc61ed2 3773 loceol = PL_regeol;
8269fa76 3774 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3775 while (hardcount < max && scan < loceol &&
3568d838 3776 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3777 scan += UTF8SKIP(scan);
3778 hardcount++;
3779 }
3780 } else {
3781 while (scan < loceol && !isDIGIT(*scan))
3782 scan++;
a0ed51b3
LW
3783 }
3784 break;
a0d0e21e
LW
3785 default: /* Called on something of 0 width. */
3786 break; /* So match right here or not at all. */
3787 }
a687059c 3788
a0ed51b3
LW
3789 if (hardcount)
3790 c = hardcount;
3791 else
3792 c = scan - PL_reginput;
3280af22 3793 PL_reginput = scan;
a687059c 3794
9041c2e3 3795 DEBUG_r(
c277df42
IZ
3796 {
3797 SV *prop = sv_newmortal();
3798
3799 regprop(prop, p);
9041c2e3
NIS
3800 PerlIO_printf(Perl_debug_log,
3801 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3802 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3803 });
9041c2e3 3804
a0d0e21e 3805 return(c);
a687059c
LW
3806}
3807
3808/*
c277df42 3809 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3810 *
c277df42
IZ
3811 * The repeater is supposed to have constant length.
3812 */
3813
76e3520e 3814STATIC I32
cea2e8a9 3815S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3816{
b7953727 3817 register char *scan = Nullch;
c277df42 3818 register char *start;
3280af22 3819 register char *loceol = PL_regeol;
a0ed51b3 3820 I32 l = 0;
708e3b05 3821 I32 count = 0, res = 1;
a0ed51b3
LW
3822
3823 if (!max)
3824 return 0;
c277df42 3825
3280af22 3826 start = PL_reginput;
1aa99e6b 3827 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3828 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3829 if (!count++) {
3830 l = 0;
3831 while (start < PL_reginput) {
3832 l++;
3833 start += UTF8SKIP(start);
3834 }
3835 *lp = l;
3836 if (l == 0)
3837 return max;
3838 }
3839 if (count == max)
3840 return count;
3841 }
3842 }
3843 else {
708e3b05 3844 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3845 if (!count++) {
3846 *lp = l = PL_reginput - start;
3847 if (max != REG_INFTY && l*max < loceol - scan)
3848 loceol = scan + l*max;
3849 if (l == 0)
3850 return max;
c277df42
IZ
3851 }
3852 }
3853 }
708e3b05 3854 if (!res)
3280af22 3855 PL_reginput = scan;
9041c2e3 3856
a0ed51b3 3857 return count;
c277df42
IZ
3858}
3859
3860/*
ffc61ed2
JH
3861- regclass_swash - prepare the utf8 swash
3862*/
3863
3864SV *
3865Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3866{
3867 SV *sw = NULL;
3868 SV *si = NULL;
3869
3870 if (PL_regdata && PL_regdata->count) {
3871 U32 n = ARG(node);
3872
3873 if (PL_regdata->what[n] == 's') {
3874 SV *rv = (SV*)PL_regdata->data[n];
3875 AV *av = (AV*)SvRV((SV*)rv);
3876 SV **a;
9041c2e3 3877
ffc61ed2
JH
3878 si = *av_fetch(av, 0, FALSE);
3879 a = av_fetch(av, 1, FALSE);
9041c2e3 3880
ffc61ed2
JH
3881 if (a)
3882 sw = *a;
3883 else if (si && doinit) {
3884 sw = swash_init("utf8", "", si, 1, 0);
3885 (void)av_store(av, 1, sw);
3886 }
3887 }
3888 }
3889
3890 if (initsvp)
3891 *initsvp = si;
3892
3893 return sw;
3894}
3895
3896/*
cb8d8820 3897 - reginclass - determine if a character falls into a character class
bbce6d69 3898 */
3899
76e3520e 3900STATIC bool
ffc61ed2 3901S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3902{
ffc61ed2 3903 char flags = ANYOF_FLAGS(n);
bbce6d69 3904 bool match = FALSE;
1aa99e6b 3905 UV c;
3568d838 3906 STRLEN len = 0;
1aa99e6b 3907
3568d838 3908 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3909
ffc61ed2
JH
3910 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3911 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3912 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3913 match = TRUE;
bbce6d69 3914 }
3568d838 3915 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3916 match = TRUE;
ffc61ed2
JH
3917 if (!match) {
3918 SV *sw = regclass_swash(n, TRUE, 0);
3919
3920 if (sw) {
3568d838 3921 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3922 match = TRUE;
3923 else if (flags & ANYOF_FOLD) {
3924 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3925
ffc61ed2
JH
3926 if (flags & ANYOF_LOCALE) {
3927 PL_reg_flags |= RF_tainted;
9041c2e3 3928 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3929 }
1aa99e6b 3930 else
9041c2e3 3931 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3932 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3933 match = TRUE;
3934 }
3935 }
bbce6d69 3936 }
3937 }
1aa99e6b 3938 if (!match && c < 256) {
ffc61ed2
JH
3939 if (ANYOF_BITMAP_TEST(n, c))
3940 match = TRUE;
3941 else if (flags & ANYOF_FOLD) {
3568d838 3942 I32 f;
a0ed51b3 3943
ffc61ed2
JH
3944 if (flags & ANYOF_LOCALE) {
3945 PL_reg_flags |= RF_tainted;
3946 f = PL_fold_locale[c];
3947 }
3948 else
3949 f = PL_fold[c];
3950 if (f != c && ANYOF_BITMAP_TEST(n, f))
3951 match = TRUE;
3952 }
3953
3954 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3955 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3956 if (
3957 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3958 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3959 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3960 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3961 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3962 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3963 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3964 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3965 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3966 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3967 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3968 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3969 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3970 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3971 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3972 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3973 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3974 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3975 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3976 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3977 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3978 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3979 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3980 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3981 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3982 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3983 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3984 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3985 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3986 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3987 ) /* How's that for a conditional? */
3988 {
3989 match = TRUE;
3990 }
a0ed51b3 3991 }
a0ed51b3
LW
3992 }
3993
a0ed51b3
LW
3994 return (flags & ANYOF_INVERT) ? !match : match;
3995}
161b471a 3996
dfe13c55 3997STATIC U8 *
cea2e8a9 3998S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3999{
1aa99e6b
IH
4000 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4001}
4002
4003STATIC U8 *
4004S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4005{
a0ed51b3 4006 if (off >= 0) {
1aa99e6b 4007 while (off-- && s < lim) {
ffc61ed2 4008 /* XXX could check well-formedness here */
a0ed51b3 4009 s += UTF8SKIP(s);
ffc61ed2 4010 }
a0ed51b3
LW
4011 }
4012 else {
4013 while (off++) {
1aa99e6b 4014 if (s > lim) {
a0ed51b3 4015 s--;
ffc61ed2 4016 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4017 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4018 s--;
ffc61ed2
JH
4019 }
4020 /* XXX could check well-formedness here */
a0ed51b3
LW
4021 }
4022 }
4023 }
4024 return s;
4025}
161b471a 4026
dfe13c55 4027STATIC U8 *
1aa99e6b 4028S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4029{
1aa99e6b
IH
4030 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4031}
4032
4033STATIC U8 *
4034S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4035{
4036 if (off >= 0) {
1aa99e6b 4037 while (off-- && s < lim) {
ffc61ed2 4038 /* XXX could check well-formedness here */
a0ed51b3 4039 s += UTF8SKIP(s);
ffc61ed2 4040 }
a0ed51b3
LW
4041 if (off >= 0)
4042 return 0;
4043 }
4044 else {
4045 while (off++) {
1aa99e6b 4046 if (s > lim) {
a0ed51b3 4047 s--;
ffc61ed2 4048 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4049 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4050 s--;
ffc61ed2
JH
4051 }
4052 /* XXX could check well-formedness here */
a0ed51b3
LW
4053 }
4054 else
4055 break;
4056 }
4057 if (off <= 0)
4058 return 0;
4059 }
4060 return s;
4061}
51371543
GS
4062
4063#ifdef PERL_OBJECT
51371543
GS
4064#include "XSUB.h"
4065#endif
4066
4067static void
4068restore_pos(pTHXo_ void *arg)
4069{
51371543
GS
4070 if (PL_reg_eval_set) {
4071 if (PL_reg_oldsaved) {
4072 PL_reg_re->subbeg = PL_reg_oldsaved;
4073 PL_reg_re->sublen = PL_reg_oldsavedlen;
4074 RX_MATCH_COPIED_on(PL_reg_re);
4075 }
4076 PL_reg_magic->mg_len = PL_reg_oldpos;
4077 PL_reg_eval_set = 0;
4078 PL_curpm = PL_reg_oldcurpm;
4079 }
4080}