This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[ID 20010630.001] Editorial nits in README.solaris
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
e49a9654
IH
147 if (paren_elems_to_push < 0)
148 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
149
b1ce53c5
JH
150#define REGCP_OTHER_ELEMS 5
151 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 152 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 153/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
154 SSPUSHINT(PL_regendp[p]);
155 SSPUSHINT(PL_regstartp[p]);
3280af22 156 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
157 SSPUSHINT(p);
158 }
b1ce53c5 159/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
160 SSPUSHINT(PL_regsize);
161 SSPUSHINT(*PL_reglastparen);
162 SSPUSHPTR(PL_reginput);
41123dfd
JH
163#define REGCP_FRAME_ELEMS 2
164/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
165 * are needed for the regexp context stack bookkeeping. */
166 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 167 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 168
a0d0e21e
LW
169 return retval;
170}
171
c277df42 172/* These are needed since we do not localize EVAL nodes: */
02db2b7b 173# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 174 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 175 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 176
02db2b7b 177# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 178 PerlIO_printf(Perl_debug_log, \
faccc32b 179 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 180 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 181
76e3520e 182STATIC char *
cea2e8a9 183S_regcppop(pTHX)
a0d0e21e 184{
b1ce53c5 185 I32 i;
a0d0e21e
LW
186 U32 paren = 0;
187 char *input;
cf93c79d 188 I32 tmps;
b1ce53c5
JH
189
190 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 191 i = SSPOPINT;
b1ce53c5
JH
192 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
193 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 194 input = (char *) SSPOPPTR;
3280af22
NIS
195 *PL_reglastparen = SSPOPINT;
196 PL_regsize = SSPOPINT;
b1ce53c5
JH
197
198 /* Now restore the parentheses context. */
41123dfd
JH
199 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
200 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 201 paren = (U32)SSPOPINT;
3280af22 202 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
203 PL_regstartp[paren] = SSPOPINT;
204 tmps = SSPOPINT;
3280af22
NIS
205 if (paren <= *PL_reglastparen)
206 PL_regendp[paren] = tmps;
c277df42 207 DEBUG_r(
c3464db5 208 PerlIO_printf(Perl_debug_log,
b900a521 209 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 210 (UV)paren, (IV)PL_regstartp[paren],
b900a521 211 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 212 (IV)PL_regendp[paren],
3280af22 213 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 214 );
a0d0e21e 215 }
c277df42 216 DEBUG_r(
3280af22 217 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 218 PerlIO_printf(Perl_debug_log,
faccc32b
JH
219 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
220 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
221 }
222 );
daf18116 223#if 1
dafc8851
JH
224 /* It would seem that the similar code in regtry()
225 * already takes care of this, and in fact it is in
226 * a better location to since this code can #if 0-ed out
227 * but the code in regtry() is needed or otherwise tests
228 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
229 * (as of patchlevel 7877) will fail. Then again,
230 * this code seems to be necessary or otherwise
231 * building DynaLoader will fail:
232 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
233 * --jhi */
3280af22
NIS
234 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
235 if (paren > PL_regsize)
cf93c79d
IZ
236 PL_regstartp[paren] = -1;
237 PL_regendp[paren] = -1;
a0d0e21e 238 }
dafc8851 239#endif
a0d0e21e
LW
240 return input;
241}
242
0f5d15d6 243STATIC char *
cea2e8a9 244S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
245{
246 I32 tmp = PL_savestack_ix;
247
248 PL_savestack_ix = ss;
249 regcppop();
250 PL_savestack_ix = tmp;
942e002e 251 return Nullch;
0f5d15d6
IZ
252}
253
254typedef struct re_cc_state
255{
256 I32 ss;
257 regnode *node;
258 struct re_cc_state *prev;
259 CURCUR *cc;
260 regexp *re;
261} re_cc_state;
262
02db2b7b 263#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 264
29d1e993
HS
265#define TRYPAREN(paren, n, input) { \
266 if (paren) { \
267 if (n) { \
268 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
269 PL_regendp[paren] = input - PL_bostr; \
270 } \
271 else \
272 PL_regendp[paren] = -1; \
273 } \
274 if (regmatch(next)) \
275 sayYES; \
276 if (paren && n) \
277 PL_regendp[paren] = -1; \
278}
279
280
a687059c 281/*
e50aee73 282 * pregexec and friends
a687059c
LW
283 */
284
285/*
c277df42 286 - pregexec - match a regexp against a string
a687059c 287 */
c277df42 288I32
864dbfa3 289Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 290 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
291/* strend: pointer to null at end of string */
292/* strbeg: real beginning of string */
293/* minend: end of match must be >=minend after stringarg. */
294/* nosave: For optimizations. */
295{
296 return
9041c2e3 297 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
298 nosave ? 0 : REXEC_COPY_STR);
299}
0f5d15d6
IZ
300
301STATIC void
cea2e8a9 302S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
303{
304 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
305#ifdef DEBUGGING
306 PL_regprogram = prog->program;
307#endif
308 PL_regnpar = prog->nparens;
9041c2e3
NIS
309 PL_regdata = prog->data;
310 PL_reg_re = prog;
0f5d15d6 311}
22e551b9 312
9041c2e3 313/*
cad2e5aa
JH
314 * Need to implement the following flags for reg_anch:
315 *
316 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
317 * USE_INTUIT_ML
318 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
319 * INTUIT_AUTORITATIVE_ML
320 * INTUIT_ONCE_NOML - Intuit can match in one location only.
321 * INTUIT_ONCE_ML
322 *
323 * Another flag for this function: SECOND_TIME (so that float substrs
324 * with giant delta may be not rechecked).
325 */
326
327/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
328
2c2d71f5 329/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
330 Otherwise, only SvCUR(sv) is used to get strbeg. */
331
332/* XXXX We assume that strpos is strbeg unless sv. */
333
6eb5f6b9
JH
334/* XXXX Some places assume that there is a fixed substring.
335 An update may be needed if optimizer marks as "INTUITable"
336 RExen without fixed substrings. Similarly, it is assumed that
337 lengths of all the strings are no more than minlen, thus they
338 cannot come from lookahead.
339 (Or minlen should take into account lookahead.) */
340
2c2d71f5
JH
341/* A failure to find a constant substring means that there is no need to make
342 an expensive call to REx engine, thus we celebrate a failure. Similarly,
343 finding a substring too deep into the string means that less calls to
30944b6d
IZ
344 regtry() should be needed.
345
346 REx compiler's optimizer found 4 possible hints:
347 a) Anchored substring;
348 b) Fixed substring;
349 c) Whether we are anchored (beginning-of-line or \G);
350 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 351 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
352 string which does not contradict any of them.
353 */
2c2d71f5 354
6eb5f6b9
JH
355/* Most of decisions we do here should have been done at compile time.
356 The nodes of the REx which we used for the search should have been
357 deleted from the finite automaton. */
358
cad2e5aa
JH
359char *
360Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
361 char *strend, U32 flags, re_scream_pos_data *data)
362{
b7953727 363 register I32 start_shift = 0;
cad2e5aa 364 /* Should be nonnegative! */
b7953727 365 register I32 end_shift = 0;
2c2d71f5
JH
366 register char *s;
367 register SV *check;
a1933d95 368 char *strbeg;
cad2e5aa
JH
369 char *t;
370 I32 ml_anch;
6eb5f6b9 371 register char *other_last = Nullch; /* other substr checked before this */
b7953727 372 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
373#ifdef DEBUGGING
374 char *i_strpos = strpos;
375#endif
cad2e5aa
JH
376
377 DEBUG_r( if (!PL_colorset) reginitcolors() );
378 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 379 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
380 PL_colors[4],PL_colors[5],PL_colors[0],
381 prog->precomp,
382 PL_colors[1],
383 (strlen(prog->precomp) > 60 ? "..." : ""),
384 PL_colors[0],
b900a521 385 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
386 strpos, PL_colors[1],
387 (strend - strpos > 60 ? "..." : ""))
388 );
389
1aa99e6b
IH
390 if (prog->reganch & ROPT_UTF8)
391 PL_reg_flags |= RF_utf8;
392
393 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 394 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 395 goto fail;
2c2d71f5 396 }
a1933d95 397 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 398 PL_regeol = strend;
653099ff 399 check = prog->check_substr;
2c2d71f5 400 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
401 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
402 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 403 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 404
7e25d62c
JH
405 if (!ml_anch) {
406 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
407 | ROPT_IMPLICIT)) /* not a real BOL */
408 /* SvCUR is not set on references: SvRV and SvPVX overlap */
409 && sv && !SvROK(sv)
410 && (strpos != strbeg)) {
411 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
412 goto fail;
413 }
414 if (prog->check_offset_min == prog->check_offset_max &&
415 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 416 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
417 I32 slen;
418
1aa99e6b 419 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
420 if (SvTAIL(check)) {
421 slen = SvCUR(check); /* >= 1 */
cad2e5aa 422
9041c2e3 423 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
424 || (strend - s == slen && strend[-1] != '\n')) {
425 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
426 goto fail_finish;
cad2e5aa
JH
427 }
428 /* Now should match s[0..slen-2] */
429 slen--;
653099ff 430 if (slen && (*SvPVX(check) != *s
cad2e5aa 431 || (slen > 1
653099ff 432 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
433 report_neq:
434 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
435 goto fail_finish;
436 }
cad2e5aa 437 }
653099ff
GS
438 else if (*SvPVX(check) != *s
439 || ((slen = SvCUR(check)) > 1
440 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
441 goto report_neq;
442 goto success_at_start;
7e25d62c 443 }
cad2e5aa 444 }
2c2d71f5 445 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 446 s = strpos;
2c2d71f5 447 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 448 end_shift = prog->minlen - start_shift -
653099ff 449 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 450 if (!ml_anch) {
653099ff
GS
451 I32 end = prog->check_offset_max + CHR_SVLEN(check)
452 - (SvTAIL(check) != 0);
1aa99e6b 453 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
454
455 if (end_shift < eshift)
456 end_shift = eshift;
457 }
cad2e5aa 458 }
2c2d71f5 459 else { /* Can match at random position */
cad2e5aa
JH
460 ml_anch = 0;
461 s = strpos;
2c2d71f5
JH
462 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
463 /* Should be nonnegative! */
464 end_shift = prog->minlen - start_shift -
653099ff 465 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
466 }
467
2c2d71f5 468#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 469 if (end_shift < 0)
6bbae5e6 470 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
471#endif
472
2c2d71f5
JH
473 restart:
474 /* Find a possible match in the region s..strend by looking for
475 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 476 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
477 I32 p = -1; /* Internal iterator of scream. */
478 I32 *pp = data ? data->scream_pos : &p;
479
2c2d71f5
JH
480 if (PL_screamfirst[BmRARE(check)] >= 0
481 || ( BmRARE(check) == '\n'
482 && (BmPREVIOUS(check) == SvCUR(check) - 1)
483 && SvTAIL(check) ))
9041c2e3 484 s = screaminstr(sv, check,
2c2d71f5 485 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 486 else
2c2d71f5 487 goto fail_finish;
cad2e5aa
JH
488 if (data)
489 *data->scream_olds = s;
490 }
3baa4c62
JH
491 else if (prog->reganch & ROPT_SANY_SEEN)
492 s = fbm_instr((U8*)(s + start_shift),
493 (U8*)(strend - end_shift),
494 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 495 else
1aa99e6b
IH
496 s = fbm_instr(HOP3(s, start_shift, strend),
497 HOP3(strend, -end_shift, strbeg),
2c2d71f5 498 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
499
500 /* Update the count-of-usability, remove useless subpatterns,
501 unshift s. */
2c2d71f5
JH
502
503 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
504 (s ? "Found" : "Did not find"),
505 ((check == prog->anchored_substr) ? "anchored" : "floating"),
506 PL_colors[0],
7b0972df
JH
507 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
508 SvPVX(check),
2c2d71f5
JH
509 PL_colors[1], (SvTAIL(check) ? "$" : ""),
510 (s ? " at offset " : "...\n") ) );
511
512 if (!s)
513 goto fail_finish;
514
6eb5f6b9
JH
515 check_at = s;
516
2c2d71f5 517 /* Finish the diagnostic message */
30944b6d 518 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
519
520 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
521 Start with the other substr.
522 XXXX no SCREAM optimization yet - and a very coarse implementation
523 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
524 *always* match. Probably should be marked during compile...
525 Probably it is right to do no SCREAM here...
526 */
527
528 if (prog->float_substr && prog->anchored_substr) {
30944b6d 529 /* Take into account the "other" substring. */
2c2d71f5
JH
530 /* XXXX May be hopelessly wrong for UTF... */
531 if (!other_last)
6eb5f6b9 532 other_last = strpos;
2c2d71f5 533 if (check == prog->float_substr) {
30944b6d
IZ
534 do_other_anchored:
535 {
1aa99e6b 536 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
537 char *s1 = s;
538
2c2d71f5
JH
539 t = s - prog->check_offset_max;
540 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
541 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 542 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 543 && t > strpos)))
30944b6d 544 /* EMPTY */;
2c2d71f5
JH
545 else
546 t = strpos;
1aa99e6b 547 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
548 if (t < other_last) /* These positions already checked */
549 t = other_last;
1aa99e6b 550 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
551 if (last < last1)
552 last1 = last;
553 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
554 /* On end-of-str: see comment below. */
555 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
556 HOP3(HOP3(last1, prog->anchored_offset, strend)
557 + SvCUR(prog->anchored_substr),
558 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
559 prog->anchored_substr,
560 PL_multiline ? FBMrf_MULTILINE : 0);
561 DEBUG_r(PerlIO_printf(Perl_debug_log,
562 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
563 (s ? "Found" : "Contradicts"),
564 PL_colors[0],
7b0972df
JH
565 (int)(SvCUR(prog->anchored_substr)
566 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
567 SvPVX(prog->anchored_substr),
568 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
569 if (!s) {
570 if (last1 >= last2) {
571 DEBUG_r(PerlIO_printf(Perl_debug_log,
572 ", giving up...\n"));
573 goto fail_finish;
574 }
575 DEBUG_r(PerlIO_printf(Perl_debug_log,
576 ", trying floating at offset %ld...\n",
1aa99e6b
IH
577 (long)(HOP3c(s1, 1, strend) - i_strpos)));
578 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
579 s = HOP3c(last, 1, strend);
2c2d71f5
JH
580 goto restart;
581 }
582 else {
583 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 584 (long)(s - i_strpos)));
1aa99e6b
IH
585 t = HOP3c(s, -prog->anchored_offset, strbeg);
586 other_last = HOP3c(s, 1, strend);
30944b6d 587 s = s1;
2c2d71f5
JH
588 if (t == strpos)
589 goto try_at_start;
2c2d71f5
JH
590 goto try_at_offset;
591 }
30944b6d 592 }
2c2d71f5
JH
593 }
594 else { /* Take into account the floating substring. */
595 char *last, *last1;
596 char *s1 = s;
597
1aa99e6b
IH
598 t = HOP3c(s, -start_shift, strbeg);
599 last1 = last =
600 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
601 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
602 last = HOP3c(t, prog->float_max_offset, strend);
603 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
604 if (s < other_last)
605 s = other_last;
2c2d71f5
JH
606 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
607 /* fbm_instr() takes into account exact value of end-of-str
608 if the check is SvTAIL(ed). Since false positives are OK,
609 and end-of-str is not later than strend we are OK. */
610 s = fbm_instr((unsigned char*)s,
611 (unsigned char*)last + SvCUR(prog->float_substr)
612 - (SvTAIL(prog->float_substr)!=0),
613 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
614 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
615 (s ? "Found" : "Contradicts"),
616 PL_colors[0],
7b0972df
JH
617 (int)(SvCUR(prog->float_substr)
618 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
619 SvPVX(prog->float_substr),
620 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
621 if (!s) {
622 if (last1 == last) {
623 DEBUG_r(PerlIO_printf(Perl_debug_log,
624 ", giving up...\n"));
625 goto fail_finish;
626 }
627 DEBUG_r(PerlIO_printf(Perl_debug_log,
628 ", trying anchored starting at offset %ld...\n",
30944b6d 629 (long)(s1 + 1 - i_strpos)));
803ff556 630 other_last = last;
1aa99e6b 631 s = HOP3c(t, 1, strend);
2c2d71f5
JH
632 goto restart;
633 }
634 else {
635 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 636 (long)(s - i_strpos)));
803ff556 637 other_last = s; /* Fix this later. --Hugo */
30944b6d 638 s = s1;
2c2d71f5
JH
639 if (t == strpos)
640 goto try_at_start;
2c2d71f5
JH
641 goto try_at_offset;
642 }
643 }
cad2e5aa 644 }
2c2d71f5
JH
645
646 t = s - prog->check_offset_max;
2c2d71f5
JH
647 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
648 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
649 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
650 && t > strpos))) {
2c2d71f5
JH
651 /* Fixed substring is found far enough so that the match
652 cannot start at strpos. */
653 try_at_offset:
cad2e5aa 654 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
655 /* Eventually fbm_*() should handle this, but often
656 anchored_offset is not 0, so this check will not be wasted. */
657 /* XXXX In the code below we prefer to look for "^" even in
658 presence of anchored substrings. And we search even
659 beyond the found float position. These pessimizations
660 are historical artefacts only. */
661 find_anchor:
2c2d71f5 662 while (t < strend - prog->minlen) {
cad2e5aa 663 if (*t == '\n') {
4ee3650e 664 if (t < check_at - prog->check_offset_min) {
30944b6d 665 if (prog->anchored_substr) {
4ee3650e
GS
666 /* Since we moved from the found position,
667 we definitely contradict the found anchored
30944b6d
IZ
668 substr. Due to the above check we do not
669 contradict "check" substr.
670 Thus we can arrive here only if check substr
671 is float. Redo checking for "other"=="fixed".
672 */
9041c2e3 673 strpos = t + 1;
30944b6d
IZ
674 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
675 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
676 goto do_other_anchored;
677 }
4ee3650e
GS
678 /* We don't contradict the found floating substring. */
679 /* XXXX Why not check for STCLASS? */
cad2e5aa 680 s = t + 1;
2c2d71f5 681 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 682 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
683 goto set_useful;
684 }
4ee3650e
GS
685 /* Position contradicts check-string */
686 /* XXXX probably better to look for check-string
687 than for "\n", so one should lower the limit for t? */
688 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 689 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 690 other_last = strpos = s = t + 1;
cad2e5aa
JH
691 goto restart;
692 }
693 t++;
694 }
2c2d71f5
JH
695 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
696 PL_colors[0],PL_colors[1]));
697 goto fail_finish;
cad2e5aa 698 }
f5952150
GS
699 else {
700 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
701 PL_colors[0],PL_colors[1]));
702 }
cad2e5aa
JH
703 s = t;
704 set_useful:
2c2d71f5 705 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
706 }
707 else {
f5952150 708 /* The found string does not prohibit matching at strpos,
2c2d71f5 709 - no optimization of calling REx engine can be performed,
f5952150
GS
710 unless it was an MBOL and we are not after MBOL,
711 or a future STCLASS check will fail this. */
2c2d71f5
JH
712 try_at_start:
713 /* Even in this situation we may use MBOL flag if strpos is offset
714 wrt the start of the string. */
05b4157f 715 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 716 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
717 /* May be due to an implicit anchor of m{.*foo} */
718 && !(prog->reganch & ROPT_IMPLICIT))
719 {
cad2e5aa
JH
720 t = strpos;
721 goto find_anchor;
722 }
30944b6d 723 DEBUG_r( if (ml_anch)
f5952150
GS
724 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
725 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 726 );
2c2d71f5 727 success_at_start:
30944b6d 728 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 729 && prog->check_substr /* Could be deleted already */
cad2e5aa 730 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
731 && prog->check_substr == prog->float_substr)
732 {
cad2e5aa 733 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 734 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
735 SvREFCNT_dec(prog->check_substr);
736 prog->check_substr = Nullsv; /* disable */
737 prog->float_substr = Nullsv; /* clear */
5e39e1e5 738 check = Nullsv; /* abort */
cad2e5aa 739 s = strpos;
3cf5c195
IZ
740 /* XXXX This is a remnant of the old implementation. It
741 looks wasteful, since now INTUIT can use many
6eb5f6b9 742 other heuristics. */
cad2e5aa
JH
743 prog->reganch &= ~RE_USE_INTUIT;
744 }
745 else
746 s = strpos;
747 }
748
6eb5f6b9
JH
749 /* Last resort... */
750 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
751 if (prog->regstclass) {
752 /* minlen == 0 is possible if regstclass is \b or \B,
753 and the fixed substr is ''$.
754 Since minlen is already taken into account, s+1 is before strend;
755 accidentally, minlen >= 1 guaranties no false positives at s + 1
756 even for \b or \B. But (minlen? 1 : 0) below assumes that
757 regstclass does not come from lookahead... */
758 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
759 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 760 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 761 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 762 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 763 : 1);
6eb5f6b9 764 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
765 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
766 : (prog->float_substr
767 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
768 cl_l, strend)
769 : strend);
a1933d95 770 char *startpos = strbeg;
6eb5f6b9
JH
771
772 t = s;
76384e4a 773 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 774 PL_regdata = prog->data;
76384e4a
GS
775 PL_bostr = startpos;
776 }
6eb5f6b9
JH
777 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
778 if (!s) {
779#ifdef DEBUGGING
b7953727 780 char *what = 0;
6eb5f6b9
JH
781#endif
782 if (endpos == strend) {
783 DEBUG_r( PerlIO_printf(Perl_debug_log,
784 "Could not match STCLASS...\n") );
785 goto fail;
786 }
66e933ab
GS
787 DEBUG_r( PerlIO_printf(Perl_debug_log,
788 "This position contradicts STCLASS...\n") );
653099ff
GS
789 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
790 goto fail;
6eb5f6b9
JH
791 /* Contradict one of substrings */
792 if (prog->anchored_substr) {
6eb5f6b9
JH
793 if (prog->anchored_substr == check) {
794 DEBUG_r( what = "anchored" );
795 hop_and_restart:
1aa99e6b 796 s = HOP3c(t, 1, strend);
66e933ab
GS
797 if (s + start_shift + end_shift > strend) {
798 /* XXXX Should be taken into account earlier? */
799 DEBUG_r( PerlIO_printf(Perl_debug_log,
800 "Could not match STCLASS...\n") );
801 goto fail;
802 }
5e39e1e5
HS
803 if (!check)
804 goto giveup;
6eb5f6b9 805 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 806 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
807 what, (long)(s + start_shift - i_strpos)) );
808 goto restart;
809 }
66e933ab 810 /* Have both, check_string is floating */
6eb5f6b9
JH
811 if (t + start_shift >= check_at) /* Contradicts floating=check */
812 goto retry_floating_check;
813 /* Recheck anchored substring, but not floating... */
9041c2e3 814 s = check_at;
5e39e1e5
HS
815 if (!check)
816 goto giveup;
6eb5f6b9 817 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 818 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
819 (long)(other_last - i_strpos)) );
820 goto do_other_anchored;
821 }
60e71179
GS
822 /* Another way we could have checked stclass at the
823 current position only: */
824 if (ml_anch) {
825 s = t = t + 1;
5e39e1e5
HS
826 if (!check)
827 goto giveup;
60e71179 828 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
829 "Looking for /%s^%s/m starting at offset %ld...\n",
830 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 831 goto try_at_offset;
66e933ab 832 }
60e71179
GS
833 if (!prog->float_substr) /* Could have been deleted */
834 goto fail;
6eb5f6b9
JH
835 /* Check is floating subtring. */
836 retry_floating_check:
837 t = check_at - start_shift;
838 DEBUG_r( what = "floating" );
839 goto hop_and_restart;
840 }
b7953727
JH
841 if (t != s) {
842 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 843 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
844 (long)(t - i_strpos), (long)(s - i_strpos))
845 );
846 }
847 else {
848 DEBUG_r(PerlIO_printf(Perl_debug_log,
849 "Does not contradict STCLASS...\n");
850 );
851 }
6eb5f6b9 852 }
5e39e1e5
HS
853 giveup:
854 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
855 PL_colors[4], (check ? "Guessed" : "Giving up"),
856 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 857 return s;
2c2d71f5
JH
858
859 fail_finish: /* Substring not found */
66e933ab
GS
860 if (prog->check_substr) /* could be removed already */
861 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 862 fail:
2c2d71f5 863 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
864 PL_colors[4],PL_colors[5]));
865 return Nullch;
866}
9661b544 867
6eb5f6b9 868/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
869STATIC char *
870S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 871{
6eb5f6b9
JH
872 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
873 char *m;
d8093b23
G
874 STRLEN ln;
875 unsigned int c1;
876 unsigned int c2;
6eb5f6b9
JH
877 char *e;
878 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 879 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 880
6eb5f6b9
JH
881 /* We know what class it must start with. */
882 switch (OP(c)) {
6eb5f6b9 883 case ANYOF:
a0ed51b3 884 while (s < strend) {
ffc61ed2 885 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
886 if (tmp && (norun || regtry(prog, s)))
887 goto got_it;
888 else
889 tmp = doevery;
a0ed51b3 890 }
6eb5f6b9
JH
891 else
892 tmp = 1;
ffc61ed2 893 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 894 }
6eb5f6b9
JH
895 break;
896 case EXACTF:
897 m = STRING(c);
898 ln = STR_LEN(c);
1aa99e6b
IH
899 if (UTF) {
900 c1 = to_utf8_lower((U8*)m);
901 c2 = to_utf8_upper((U8*)m);
902 }
903 else {
904 c1 = *(U8*)m;
905 c2 = PL_fold[c1];
906 }
6eb5f6b9
JH
907 goto do_exactf;
908 case EXACTFL:
909 m = STRING(c);
910 ln = STR_LEN(c);
d8093b23 911 c1 = *(U8*)m;
6eb5f6b9
JH
912 c2 = PL_fold_locale[c1];
913 do_exactf:
914 e = strend - ln;
b3c9acc1 915
6eb5f6b9
JH
916 if (norun && e < s)
917 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
918
919 if (do_utf8) {
920 STRLEN len;
921 if (c1 == c2)
922 while (s <= e) {
9041c2e3 923 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
924 && regtry(prog, s) )
925 goto got_it;
926 s += len;
927 }
928 else
929 while (s <= e) {
9041c2e3 930 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
931 if ( (c == c1 || c == c2) && regtry(prog, s) )
932 goto got_it;
933 s += len;
934 }
935 }
936 else {
937 if (c1 == c2)
938 while (s <= e) {
939 if ( *(U8*)s == c1
940 && (ln == 1 || !(OP(c) == EXACTF
941 ? ibcmp(s, m, ln)
942 : ibcmp_locale(s, m, ln)))
943 && (norun || regtry(prog, s)) )
944 goto got_it;
945 s++;
946 }
947 else
948 while (s <= e) {
949 if ( (*(U8*)s == c1 || *(U8*)s == c2)
950 && (ln == 1 || !(OP(c) == EXACTF
951 ? ibcmp(s, m, ln)
952 : ibcmp_locale(s, m, ln)))
953 && (norun || regtry(prog, s)) )
954 goto got_it;
955 s++;
956 }
b3c9acc1
IZ
957 }
958 break;
bbce6d69 959 case BOUNDL:
3280af22 960 PL_reg_flags |= RF_tainted;
bbce6d69 961 /* FALL THROUGH */
a0d0e21e 962 case BOUND:
ffc61ed2 963 if (do_utf8) {
12d33761 964 if (s == PL_bostr)
ffc61ed2
JH
965 tmp = '\n';
966 else {
1aa99e6b 967 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 968
0064a8a9
JH
969 if (s > (char*)r)
970 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
971 }
972 tmp = ((OP(c) == BOUND ?
9041c2e3 973 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 974 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
975 while (s < strend) {
976 if (tmp == !(OP(c) == BOUND ?
3568d838 977 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
978 isALNUM_LC_utf8((U8*)s)))
979 {
980 tmp = !tmp;
981 if ((norun || regtry(prog, s)))
982 goto got_it;
983 }
984 s += UTF8SKIP(s);
a687059c 985 }
a0d0e21e 986 }
667bb95a 987 else {
12d33761 988 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
989 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
990 while (s < strend) {
991 if (tmp ==
992 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
993 tmp = !tmp;
994 if ((norun || regtry(prog, s)))
995 goto got_it;
996 }
997 s++;
a0ed51b3 998 }
a0ed51b3 999 }
6eb5f6b9 1000 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1001 goto got_it;
1002 break;
bbce6d69 1003 case NBOUNDL:
3280af22 1004 PL_reg_flags |= RF_tainted;
bbce6d69 1005 /* FALL THROUGH */
a0d0e21e 1006 case NBOUND:
ffc61ed2 1007 if (do_utf8) {
12d33761 1008 if (s == PL_bostr)
ffc61ed2
JH
1009 tmp = '\n';
1010 else {
1aa99e6b 1011 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1012
0064a8a9
JH
1013 if (s > (char*)r)
1014 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1015 }
1016 tmp = ((OP(c) == NBOUND ?
9041c2e3 1017 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1018 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1019 while (s < strend) {
1020 if (tmp == !(OP(c) == NBOUND ?
3568d838 1021 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1022 isALNUM_LC_utf8((U8*)s)))
1023 tmp = !tmp;
1024 else if ((norun || regtry(prog, s)))
1025 goto got_it;
1026 s += UTF8SKIP(s);
1027 }
a0d0e21e 1028 }
667bb95a 1029 else {
12d33761 1030 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1031 tmp = ((OP(c) == NBOUND ?
1032 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1033 while (s < strend) {
1034 if (tmp ==
1035 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1036 tmp = !tmp;
1037 else if ((norun || regtry(prog, s)))
1038 goto got_it;
1039 s++;
1040 }
a0ed51b3 1041 }
6eb5f6b9 1042 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1043 goto got_it;
1044 break;
a0d0e21e 1045 case ALNUM:
ffc61ed2 1046 if (do_utf8) {
8269fa76 1047 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1048 while (s < strend) {
3568d838 1049 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1050 if (tmp && (norun || regtry(prog, s)))
1051 goto got_it;
1052 else
1053 tmp = doevery;
1054 }
bbce6d69 1055 else
ffc61ed2
JH
1056 tmp = 1;
1057 s += UTF8SKIP(s);
bbce6d69 1058 }
bbce6d69 1059 }
ffc61ed2
JH
1060 else {
1061 while (s < strend) {
1062 if (isALNUM(*s)) {
1063 if (tmp && (norun || regtry(prog, s)))
1064 goto got_it;
1065 else
1066 tmp = doevery;
1067 }
a0ed51b3 1068 else
ffc61ed2
JH
1069 tmp = 1;
1070 s++;
a0ed51b3 1071 }
a0ed51b3
LW
1072 }
1073 break;
bbce6d69 1074 case ALNUML:
3280af22 1075 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1076 if (do_utf8) {
1077 while (s < strend) {
1078 if (isALNUM_LC_utf8((U8*)s)) {
1079 if (tmp && (norun || regtry(prog, s)))
1080 goto got_it;
1081 else
1082 tmp = doevery;
1083 }
a687059c 1084 else
ffc61ed2
JH
1085 tmp = 1;
1086 s += UTF8SKIP(s);
a0d0e21e 1087 }
a0d0e21e 1088 }
ffc61ed2
JH
1089 else {
1090 while (s < strend) {
1091 if (isALNUM_LC(*s)) {
1092 if (tmp && (norun || regtry(prog, s)))
1093 goto got_it;
1094 else
1095 tmp = doevery;
1096 }
a0ed51b3 1097 else
ffc61ed2
JH
1098 tmp = 1;
1099 s++;
a0ed51b3 1100 }
a0ed51b3
LW
1101 }
1102 break;
a0d0e21e 1103 case NALNUM:
ffc61ed2 1104 if (do_utf8) {
8269fa76 1105 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1106 while (s < strend) {
3568d838 1107 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1108 if (tmp && (norun || regtry(prog, s)))
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
bbce6d69 1113 else
ffc61ed2
JH
1114 tmp = 1;
1115 s += UTF8SKIP(s);
bbce6d69 1116 }
bbce6d69 1117 }
ffc61ed2
JH
1118 else {
1119 while (s < strend) {
1120 if (!isALNUM(*s)) {
1121 if (tmp && (norun || regtry(prog, s)))
1122 goto got_it;
1123 else
1124 tmp = doevery;
1125 }
a0ed51b3 1126 else
ffc61ed2
JH
1127 tmp = 1;
1128 s++;
a0ed51b3 1129 }
a0ed51b3
LW
1130 }
1131 break;
bbce6d69 1132 case NALNUML:
3280af22 1133 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1134 if (do_utf8) {
1135 while (s < strend) {
1136 if (!isALNUM_LC_utf8((U8*)s)) {
1137 if (tmp && (norun || regtry(prog, s)))
1138 goto got_it;
1139 else
1140 tmp = doevery;
1141 }
a687059c 1142 else
ffc61ed2
JH
1143 tmp = 1;
1144 s += UTF8SKIP(s);
a687059c 1145 }
a0d0e21e 1146 }
ffc61ed2
JH
1147 else {
1148 while (s < strend) {
1149 if (!isALNUM_LC(*s)) {
1150 if (tmp && (norun || regtry(prog, s)))
1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
a0ed51b3 1155 else
ffc61ed2
JH
1156 tmp = 1;
1157 s++;
a0ed51b3 1158 }
a0ed51b3
LW
1159 }
1160 break;
a0d0e21e 1161 case SPACE:
ffc61ed2 1162 if (do_utf8) {
8269fa76 1163 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1164 while (s < strend) {
3568d838 1165 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1166 if (tmp && (norun || regtry(prog, s)))
1167 goto got_it;
1168 else
1169 tmp = doevery;
1170 }
a0d0e21e 1171 else
ffc61ed2
JH
1172 tmp = 1;
1173 s += UTF8SKIP(s);
2304df62 1174 }
a0d0e21e 1175 }
ffc61ed2
JH
1176 else {
1177 while (s < strend) {
1178 if (isSPACE(*s)) {
1179 if (tmp && (norun || regtry(prog, s)))
1180 goto got_it;
1181 else
1182 tmp = doevery;
1183 }
a0ed51b3 1184 else
ffc61ed2
JH
1185 tmp = 1;
1186 s++;
a0ed51b3 1187 }
a0ed51b3
LW
1188 }
1189 break;
bbce6d69 1190 case SPACEL:
3280af22 1191 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1192 if (do_utf8) {
1193 while (s < strend) {
1194 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1195 if (tmp && (norun || regtry(prog, s)))
1196 goto got_it;
1197 else
1198 tmp = doevery;
1199 }
bbce6d69 1200 else
ffc61ed2
JH
1201 tmp = 1;
1202 s += UTF8SKIP(s);
bbce6d69 1203 }
bbce6d69 1204 }
ffc61ed2
JH
1205 else {
1206 while (s < strend) {
1207 if (isSPACE_LC(*s)) {
1208 if (tmp && (norun || regtry(prog, s)))
1209 goto got_it;
1210 else
1211 tmp = doevery;
1212 }
a0ed51b3 1213 else
ffc61ed2
JH
1214 tmp = 1;
1215 s++;
a0ed51b3 1216 }
a0ed51b3
LW
1217 }
1218 break;
a0d0e21e 1219 case NSPACE:
ffc61ed2 1220 if (do_utf8) {
8269fa76 1221 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1222 while (s < strend) {
3568d838 1223 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1224 if (tmp && (norun || regtry(prog, s)))
1225 goto got_it;
1226 else
1227 tmp = doevery;
1228 }
a0d0e21e 1229 else
ffc61ed2
JH
1230 tmp = 1;
1231 s += UTF8SKIP(s);
a687059c 1232 }
a0d0e21e 1233 }
ffc61ed2
JH
1234 else {
1235 while (s < strend) {
1236 if (!isSPACE(*s)) {
1237 if (tmp && (norun || regtry(prog, s)))
1238 goto got_it;
1239 else
1240 tmp = doevery;
1241 }
a0ed51b3 1242 else
ffc61ed2
JH
1243 tmp = 1;
1244 s++;
a0ed51b3 1245 }
a0ed51b3
LW
1246 }
1247 break;
bbce6d69 1248 case NSPACEL:
3280af22 1249 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1250 if (do_utf8) {
1251 while (s < strend) {
1252 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1253 if (tmp && (norun || regtry(prog, s)))
1254 goto got_it;
1255 else
1256 tmp = doevery;
1257 }
bbce6d69 1258 else
ffc61ed2
JH
1259 tmp = 1;
1260 s += UTF8SKIP(s);
bbce6d69 1261 }
bbce6d69 1262 }
ffc61ed2
JH
1263 else {
1264 while (s < strend) {
1265 if (!isSPACE_LC(*s)) {
1266 if (tmp && (norun || regtry(prog, s)))
1267 goto got_it;
1268 else
1269 tmp = doevery;
1270 }
a0ed51b3 1271 else
ffc61ed2
JH
1272 tmp = 1;
1273 s++;
a0ed51b3 1274 }
a0ed51b3
LW
1275 }
1276 break;
a0d0e21e 1277 case DIGIT:
ffc61ed2 1278 if (do_utf8) {
8269fa76 1279 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1280 while (s < strend) {
3568d838 1281 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1282 if (tmp && (norun || regtry(prog, s)))
1283 goto got_it;
1284 else
1285 tmp = doevery;
1286 }
a0d0e21e 1287 else
ffc61ed2
JH
1288 tmp = 1;
1289 s += UTF8SKIP(s);
2b69d0c2 1290 }
a0d0e21e 1291 }
ffc61ed2
JH
1292 else {
1293 while (s < strend) {
1294 if (isDIGIT(*s)) {
1295 if (tmp && (norun || regtry(prog, s)))
1296 goto got_it;
1297 else
1298 tmp = doevery;
1299 }
a0ed51b3 1300 else
ffc61ed2
JH
1301 tmp = 1;
1302 s++;
a0ed51b3 1303 }
a0ed51b3
LW
1304 }
1305 break;
b8c5462f
JH
1306 case DIGITL:
1307 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1308 if (do_utf8) {
1309 while (s < strend) {
1310 if (isDIGIT_LC_utf8((U8*)s)) {
1311 if (tmp && (norun || regtry(prog, s)))
1312 goto got_it;
1313 else
1314 tmp = doevery;
1315 }
b8c5462f 1316 else
ffc61ed2
JH
1317 tmp = 1;
1318 s += UTF8SKIP(s);
b8c5462f 1319 }
b8c5462f 1320 }
ffc61ed2
JH
1321 else {
1322 while (s < strend) {
1323 if (isDIGIT_LC(*s)) {
1324 if (tmp && (norun || regtry(prog, s)))
1325 goto got_it;
1326 else
1327 tmp = doevery;
1328 }
b8c5462f 1329 else
ffc61ed2
JH
1330 tmp = 1;
1331 s++;
b8c5462f 1332 }
b8c5462f
JH
1333 }
1334 break;
a0d0e21e 1335 case NDIGIT:
ffc61ed2 1336 if (do_utf8) {
8269fa76 1337 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1338 while (s < strend) {
3568d838 1339 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1340 if (tmp && (norun || regtry(prog, s)))
1341 goto got_it;
1342 else
1343 tmp = doevery;
1344 }
a0d0e21e 1345 else
ffc61ed2
JH
1346 tmp = 1;
1347 s += UTF8SKIP(s);
a687059c 1348 }
a0d0e21e 1349 }
ffc61ed2
JH
1350 else {
1351 while (s < strend) {
1352 if (!isDIGIT(*s)) {
1353 if (tmp && (norun || regtry(prog, s)))
1354 goto got_it;
1355 else
1356 tmp = doevery;
1357 }
a0ed51b3 1358 else
ffc61ed2
JH
1359 tmp = 1;
1360 s++;
a0ed51b3 1361 }
a0ed51b3
LW
1362 }
1363 break;
b8c5462f
JH
1364 case NDIGITL:
1365 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1366 if (do_utf8) {
1367 while (s < strend) {
1368 if (!isDIGIT_LC_utf8((U8*)s)) {
1369 if (tmp && (norun || regtry(prog, s)))
1370 goto got_it;
1371 else
1372 tmp = doevery;
1373 }
b8c5462f 1374 else
ffc61ed2
JH
1375 tmp = 1;
1376 s += UTF8SKIP(s);
b8c5462f 1377 }
a0ed51b3 1378 }
ffc61ed2
JH
1379 else {
1380 while (s < strend) {
1381 if (!isDIGIT_LC(*s)) {
1382 if (tmp && (norun || regtry(prog, s)))
1383 goto got_it;
1384 else
1385 tmp = doevery;
1386 }
cf93c79d 1387 else
ffc61ed2
JH
1388 tmp = 1;
1389 s++;
b8c5462f 1390 }
b8c5462f
JH
1391 }
1392 break;
b3c9acc1 1393 default:
3c3eec57
GS
1394 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1395 break;
d6a28714 1396 }
6eb5f6b9
JH
1397 return 0;
1398 got_it:
1399 return s;
1400}
1401
1402/*
1403 - regexec_flags - match a regexp against a string
1404 */
1405I32
1406Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1407 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1408/* strend: pointer to null at end of string */
1409/* strbeg: real beginning of string */
1410/* minend: end of match must be >=minend after stringarg. */
1411/* data: May be used for some additional optimizations. */
1412/* nosave: For optimizations. */
1413{
6eb5f6b9
JH
1414 register char *s;
1415 register regnode *c;
1416 register char *startpos = stringarg;
6eb5f6b9
JH
1417 I32 minlen; /* must match at least this many chars */
1418 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1419 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1420 constant substr. */ /* CC */
1421 I32 end_shift = 0; /* Same for the end. */ /* CC */
1422 I32 scream_pos = -1; /* Internal iterator of scream. */
1423 char *scream_olds;
1424 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1425 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1426
1427 PL_regcc = 0;
1428
1429 cache_re(prog);
1430#ifdef DEBUGGING
aea4f609 1431 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1432#endif
1433
1434 /* Be paranoid... */
1435 if (prog == NULL || startpos == NULL) {
1436 Perl_croak(aTHX_ "NULL regexp parameter");
1437 return 0;
1438 }
1439
1440 minlen = prog->minlen;
1aa99e6b 1441 if (do_utf8) {
3baa4c62
JH
1442 if (!(prog->reganch & ROPT_SANY_SEEN))
1443 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1444 }
1445 else {
1446 if (strend - startpos < minlen) goto phooey;
1447 }
6eb5f6b9 1448
6eb5f6b9
JH
1449 /* Check validity of program. */
1450 if (UCHARAT(prog->program) != REG_MAGIC) {
1451 Perl_croak(aTHX_ "corrupted regexp program");
1452 }
1453
1454 PL_reg_flags = 0;
1455 PL_reg_eval_set = 0;
1456 PL_reg_maxiter = 0;
1457
1458 if (prog->reganch & ROPT_UTF8)
1459 PL_reg_flags |= RF_utf8;
1460
1461 /* Mark beginning of line for ^ and lookbehind. */
1462 PL_regbol = startpos;
1463 PL_bostr = strbeg;
1464 PL_reg_sv = sv;
1465
1466 /* Mark end of line for $ (and such) */
1467 PL_regeol = strend;
1468
1469 /* see how far we have to get to not match where we matched before */
1470 PL_regtill = startpos+minend;
1471
1472 /* We start without call_cc context. */
1473 PL_reg_call_cc = 0;
1474
1475 /* If there is a "must appear" string, look for it. */
1476 s = startpos;
1477
1478 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1479 MAGIC *mg;
1480
1481 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1482 PL_reg_ganch = startpos;
1483 else if (sv && SvTYPE(sv) >= SVt_PVMG
1484 && SvMAGIC(sv)
14befaf4
DM
1485 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1486 && mg->mg_len >= 0) {
6eb5f6b9
JH
1487 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1488 if (prog->reganch & ROPT_ANCH_GPOS) {
1489 if (s > PL_reg_ganch)
1490 goto phooey;
1491 s = PL_reg_ganch;
1492 }
1493 }
1494 else /* pos() not defined */
1495 PL_reg_ganch = strbeg;
1496 }
1497
699c3c34
JH
1498 if (do_utf8 == (UTF!=0) &&
1499 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1500 re_scream_pos_data d;
1501
1502 d.scream_olds = &scream_olds;
1503 d.scream_pos = &scream_pos;
1504 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1505 if (!s)
1506 goto phooey; /* not present */
1507 }
1508
1509 DEBUG_r( if (!PL_colorset) reginitcolors() );
1510 DEBUG_r(PerlIO_printf(Perl_debug_log,
1511 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1512 PL_colors[4],PL_colors[5],PL_colors[0],
1513 prog->precomp,
1514 PL_colors[1],
1515 (strlen(prog->precomp) > 60 ? "..." : ""),
1516 PL_colors[0],
1517 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1518 startpos, PL_colors[1],
1519 (strend - startpos > 60 ? "..." : ""))
1520 );
1521
1522 /* Simplest case: anchored match need be tried only once. */
1523 /* [unless only anchor is BOL and multiline is set] */
1524 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1525 if (s == startpos && regtry(prog, startpos))
1526 goto got_it;
1527 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1528 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1529 {
1530 char *end;
1531
1532 if (minlen)
1533 dontbother = minlen - 1;
1aa99e6b 1534 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1535 /* for multiline we only have to try after newlines */
1536 if (prog->check_substr) {
1537 if (s == startpos)
1538 goto after_try;
1539 while (1) {
1540 if (regtry(prog, s))
1541 goto got_it;
1542 after_try:
1543 if (s >= end)
1544 goto phooey;
1545 if (prog->reganch & RE_USE_INTUIT) {
1546 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1547 if (!s)
1548 goto phooey;
1549 }
1550 else
1551 s++;
1552 }
1553 } else {
1554 if (s > startpos)
1555 s--;
1556 while (s < end) {
1557 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1558 if (regtry(prog, s))
1559 goto got_it;
1560 }
1561 }
1562 }
1563 }
1564 goto phooey;
1565 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1566 if (regtry(prog, PL_reg_ganch))
1567 goto got_it;
1568 goto phooey;
1569 }
1570
1571 /* Messy cases: unanchored match. */
9041c2e3 1572 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1573 /* we have /x+whatever/ */
1574 /* it must be a one character string (XXXX Except UTF?) */
1575 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1576#ifdef DEBUGGING
1577 int did_match = 0;
1578#endif
1579
1aa99e6b 1580 if (do_utf8) {
6eb5f6b9
JH
1581 while (s < strend) {
1582 if (*s == ch) {
bf93d4cc 1583 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1584 if (regtry(prog, s)) goto got_it;
1585 s += UTF8SKIP(s);
1586 while (s < strend && *s == ch)
1587 s += UTF8SKIP(s);
1588 }
1589 s += UTF8SKIP(s);
1590 }
1591 }
1592 else {
1593 while (s < strend) {
1594 if (*s == ch) {
bf93d4cc 1595 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1596 if (regtry(prog, s)) goto got_it;
1597 s++;
1598 while (s < strend && *s == ch)
1599 s++;
1600 }
1601 s++;
1602 }
1603 }
b7953727 1604 DEBUG_r(if (!did_match)
bf93d4cc 1605 PerlIO_printf(Perl_debug_log,
b7953727
JH
1606 "Did not find anchored character...\n")
1607 );
6eb5f6b9
JH
1608 }
1609 /*SUPPRESS 560*/
1aa99e6b
IH
1610 else if (do_utf8 == (UTF!=0) &&
1611 (prog->anchored_substr != Nullsv
9041c2e3 1612 || (prog->float_substr != Nullsv
1aa99e6b 1613 && prog->float_max_offset < strend - s))) {
9041c2e3 1614 SV *must = prog->anchored_substr
6eb5f6b9 1615 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1616 I32 back_max =
6eb5f6b9 1617 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1618 I32 back_min =
6eb5f6b9 1619 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1620 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1621 -(I32)(CHR_SVLEN(must)
1aa99e6b 1622 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1623 char *last1; /* Last position checked before */
bf93d4cc
GS
1624#ifdef DEBUGGING
1625 int did_match = 0;
1626#endif
6eb5f6b9
JH
1627
1628 if (s > PL_bostr)
1629 last1 = HOPc(s, -1);
1630 else
1631 last1 = s - 1; /* bogus */
1632
1633 /* XXXX check_substr already used to find `s', can optimize if
1634 check_substr==must. */
1635 scream_pos = -1;
1636 dontbother = end_shift;
1637 strend = HOPc(strend, -dontbother);
1638 while ( (s <= last) &&
9041c2e3 1639 ((flags & REXEC_SCREAM)
1aa99e6b 1640 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1641 end_shift, &scream_pos, 0))
1aa99e6b 1642 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1643 (unsigned char*)strend, must,
6eb5f6b9 1644 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1645 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1646 if (HOPc(s, -back_max) > last1) {
1647 last1 = HOPc(s, -back_min);
1648 s = HOPc(s, -back_max);
1649 }
1650 else {
1651 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1652
1653 last1 = HOPc(s, -back_min);
1654 s = t;
1655 }
1aa99e6b 1656 if (do_utf8) {
6eb5f6b9
JH
1657 while (s <= last1) {
1658 if (regtry(prog, s))
1659 goto got_it;
1660 s += UTF8SKIP(s);
1661 }
1662 }
1663 else {
1664 while (s <= last1) {
1665 if (regtry(prog, s))
1666 goto got_it;
1667 s++;
1668 }
1669 }
1670 }
b7953727
JH
1671 DEBUG_r(if (!did_match)
1672 PerlIO_printf(Perl_debug_log,
1673 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1674 ((must == prog->anchored_substr)
1675 ? "anchored" : "floating"),
1676 PL_colors[0],
1677 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1678 SvPVX(must),
b7953727
JH
1679 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1680 );
6eb5f6b9
JH
1681 goto phooey;
1682 }
155aba94 1683 else if ((c = prog->regstclass)) {
66e933ab
GS
1684 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1685 /* don't bother with what can't match */
6eb5f6b9 1686 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1687 DEBUG_r({
1688 SV *prop = sv_newmortal();
1689 regprop(prop, c);
1690 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1691 });
6eb5f6b9
JH
1692 if (find_byclass(prog, c, s, strend, startpos, 0))
1693 goto got_it;
bf93d4cc 1694 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1695 }
1696 else {
1697 dontbother = 0;
1698 if (prog->float_substr != Nullsv) { /* Trim the end. */
1699 char *last;
d6a28714
JH
1700
1701 if (flags & REXEC_SCREAM) {
1702 last = screaminstr(sv, prog->float_substr, s - strbeg,
1703 end_shift, &scream_pos, 1); /* last one */
1704 if (!last)
ffc61ed2 1705 last = scream_olds; /* Only one occurrence. */
b8c5462f 1706 }
d6a28714
JH
1707 else {
1708 STRLEN len;
1709 char *little = SvPV(prog->float_substr, len);
1710
1711 if (SvTAIL(prog->float_substr)) {
1712 if (memEQ(strend - len + 1, little, len - 1))
1713 last = strend - len + 1;
1714 else if (!PL_multiline)
9041c2e3 1715 last = memEQ(strend - len, little, len)
d6a28714 1716 ? strend - len : Nullch;
b8c5462f 1717 else
d6a28714
JH
1718 goto find_last;
1719 } else {
1720 find_last:
9041c2e3 1721 if (len)
d6a28714 1722 last = rninstr(s, strend, little, little + len);
b8c5462f 1723 else
d6a28714 1724 last = strend; /* matching `$' */
b8c5462f 1725 }
b8c5462f 1726 }
bf93d4cc
GS
1727 if (last == NULL) {
1728 DEBUG_r(PerlIO_printf(Perl_debug_log,
1729 "%sCan't trim the tail, match fails (should not happen)%s\n",
1730 PL_colors[4],PL_colors[5]));
1731 goto phooey; /* Should not happen! */
1732 }
d6a28714
JH
1733 dontbother = strend - last + prog->float_min_offset;
1734 }
1735 if (minlen && (dontbother < minlen))
1736 dontbother = minlen - 1;
1737 strend -= dontbother; /* this one's always in bytes! */
1738 /* We don't know much -- general case. */
1aa99e6b 1739 if (do_utf8) {
d6a28714
JH
1740 for (;;) {
1741 if (regtry(prog, s))
1742 goto got_it;
1743 if (s >= strend)
1744 break;
b8c5462f 1745 s += UTF8SKIP(s);
d6a28714
JH
1746 };
1747 }
1748 else {
1749 do {
1750 if (regtry(prog, s))
1751 goto got_it;
1752 } while (s++ < strend);
1753 }
1754 }
1755
1756 /* Failure. */
1757 goto phooey;
1758
1759got_it:
1760 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1761
1762 if (PL_reg_eval_set) {
1763 /* Preserve the current value of $^R */
1764 if (oreplsv != GvSV(PL_replgv))
1765 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1766 restored, the value remains
1767 the same. */
1768 restore_pos(aTHXo_ 0);
1769 }
1770
1771 /* make sure $`, $&, $', and $digit will work later */
1772 if ( !(flags & REXEC_NOT_FIRST) ) {
1773 if (RX_MATCH_COPIED(prog)) {
1774 Safefree(prog->subbeg);
1775 RX_MATCH_COPIED_off(prog);
1776 }
1777 if (flags & REXEC_COPY_STR) {
1778 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1779
1780 s = savepvn(strbeg, i);
1781 prog->subbeg = s;
1782 prog->sublen = i;
1783 RX_MATCH_COPIED_on(prog);
1784 }
1785 else {
1786 prog->subbeg = strbeg;
1787 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1788 }
1789 }
9041c2e3 1790
d6a28714
JH
1791 return 1;
1792
1793phooey:
bf93d4cc
GS
1794 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1795 PL_colors[4],PL_colors[5]));
d6a28714
JH
1796 if (PL_reg_eval_set)
1797 restore_pos(aTHXo_ 0);
1798 return 0;
1799}
1800
1801/*
1802 - regtry - try match at specific point
1803 */
1804STATIC I32 /* 0 failure, 1 success */
1805S_regtry(pTHX_ regexp *prog, char *startpos)
1806{
d6a28714
JH
1807 register I32 i;
1808 register I32 *sp;
1809 register I32 *ep;
1810 CHECKPOINT lastcp;
1811
02db2b7b
IZ
1812#ifdef DEBUGGING
1813 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1814#endif
d6a28714
JH
1815 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1816 MAGIC *mg;
1817
1818 PL_reg_eval_set = RS_init;
1819 DEBUG_r(DEBUG_s(
b900a521
JH
1820 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1821 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1822 ));
e8347627 1823 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1824 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1825 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1826 SAVETMPS;
1827 /* Apparently this is not needed, judging by wantarray. */
e8347627 1828 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1829 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1830
1831 if (PL_reg_sv) {
1832 /* Make $_ available to executed code. */
1833 if (PL_reg_sv != DEFSV) {
1834 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1835 SAVESPTR(DEFSV);
1836 DEFSV = PL_reg_sv;
b8c5462f 1837 }
d6a28714 1838
9041c2e3 1839 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1840 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1841 /* prepare for quick setting of pos */
14befaf4
DM
1842 sv_magic(PL_reg_sv, (SV*)0,
1843 PERL_MAGIC_regex_global, Nullch, 0);
1844 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1845 mg->mg_len = -1;
b8c5462f 1846 }
d6a28714
JH
1847 PL_reg_magic = mg;
1848 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1849 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1850 }
1851 if (!PL_reg_curpm)
0f79a09d 1852 Newz(22,PL_reg_curpm, 1, PMOP);
aaa362c4 1853 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1854 PL_reg_oldcurpm = PL_curpm;
1855 PL_curpm = PL_reg_curpm;
1856 if (RX_MATCH_COPIED(prog)) {
1857 /* Here is a serious problem: we cannot rewrite subbeg,
1858 since it may be needed if this match fails. Thus
1859 $` inside (?{}) could fail... */
1860 PL_reg_oldsaved = prog->subbeg;
1861 PL_reg_oldsavedlen = prog->sublen;
1862 RX_MATCH_COPIED_off(prog);
1863 }
1864 else
1865 PL_reg_oldsaved = Nullch;
1866 prog->subbeg = PL_bostr;
1867 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1868 }
1869 prog->startp[0] = startpos - PL_bostr;
1870 PL_reginput = startpos;
1871 PL_regstartp = prog->startp;
1872 PL_regendp = prog->endp;
1873 PL_reglastparen = &prog->lastparen;
1874 prog->lastparen = 0;
1875 PL_regsize = 0;
1876 DEBUG_r(PL_reg_starttry = startpos);
1877 if (PL_reg_start_tmpl <= prog->nparens) {
1878 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1879 if(PL_reg_start_tmp)
1880 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1881 else
1882 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1883 }
1884
1885 /* XXXX What this code is doing here?!!! There should be no need
1886 to do this again and again, PL_reglastparen should take care of
3dd2943c 1887 this! --ilya*/
dafc8851
JH
1888
1889 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1890 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1891 * PL_reglastparen), is not needed at all by the test suite
1892 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1893 * enough, for building DynaLoader, or otherwise this
1894 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1895 * will happen. Meanwhile, this code *is* needed for the
1896 * above-mentioned test suite tests to succeed. The common theme
1897 * on those tests seems to be returning null fields from matches.
1898 * --jhi */
dafc8851 1899#if 1
d6a28714
JH
1900 sp = prog->startp;
1901 ep = prog->endp;
1902 if (prog->nparens) {
09e8ae3b 1903 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1904 *++sp = -1;
1905 *++ep = -1;
1906 }
1907 }
dafc8851 1908#endif
02db2b7b 1909 REGCP_SET(lastcp);
d6a28714
JH
1910 if (regmatch(prog->program + 1)) {
1911 prog->endp[0] = PL_reginput - PL_bostr;
1912 return 1;
1913 }
02db2b7b 1914 REGCP_UNWIND(lastcp);
d6a28714
JH
1915 return 0;
1916}
1917
02db2b7b
IZ
1918#define RE_UNWIND_BRANCH 1
1919#define RE_UNWIND_BRANCHJ 2
1920
1921union re_unwind_t;
1922
1923typedef struct { /* XX: makes sense to enlarge it... */
1924 I32 type;
1925 I32 prev;
1926 CHECKPOINT lastcp;
1927} re_unwind_generic_t;
1928
1929typedef struct {
1930 I32 type;
1931 I32 prev;
1932 CHECKPOINT lastcp;
1933 I32 lastparen;
1934 regnode *next;
1935 char *locinput;
1936 I32 nextchr;
1937#ifdef DEBUGGING
1938 int regindent;
1939#endif
1940} re_unwind_branch_t;
1941
1942typedef union re_unwind_t {
1943 I32 type;
1944 re_unwind_generic_t generic;
1945 re_unwind_branch_t branch;
1946} re_unwind_t;
1947
d6a28714
JH
1948/*
1949 - regmatch - main matching routine
1950 *
1951 * Conceptually the strategy is simple: check to see whether the current
1952 * node matches, call self recursively to see whether the rest matches,
1953 * and then act accordingly. In practice we make some effort to avoid
1954 * recursion, in particular by going through "ordinary" nodes (that don't
1955 * need to know whether the rest of the match failed) by a loop instead of
1956 * by recursion.
1957 */
1958/* [lwall] I've hoisted the register declarations to the outer block in order to
1959 * maybe save a little bit of pushing and popping on the stack. It also takes
1960 * advantage of machines that use a register save mask on subroutine entry.
1961 */
1962STATIC I32 /* 0 failure, 1 success */
1963S_regmatch(pTHX_ regnode *prog)
1964{
d6a28714
JH
1965 register regnode *scan; /* Current node. */
1966 regnode *next; /* Next node. */
1967 regnode *inner; /* Next node in internal branch. */
1968 register I32 nextchr; /* renamed nextchr - nextchar colides with
1969 function of same name */
1970 register I32 n; /* no or next */
b7953727
JH
1971 register I32 ln = 0; /* len or last */
1972 register char *s = Nullch; /* operand or save */
d6a28714 1973 register char *locinput = PL_reginput;
b7953727 1974 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 1975 int minmod = 0, sw = 0, logical = 0;
02db2b7b 1976 I32 unwind = 0;
b7953727 1977#if 0
02db2b7b 1978 I32 firstcp = PL_savestack_ix;
b7953727 1979#endif
ffc61ed2 1980 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1981
d6a28714
JH
1982#ifdef DEBUGGING
1983 PL_regindent++;
1984#endif
1985
1986 /* Note that nextchr is a byte even in UTF */
1987 nextchr = UCHARAT(locinput);
1988 scan = prog;
1989 while (scan != NULL) {
1990#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1991#if 1
d6a28714
JH
1992# define sayYES goto yes
1993# define sayNO goto no
7821416a
IZ
1994# define sayYES_FINAL goto yes_final
1995# define sayYES_LOUD goto yes_loud
1996# define sayNO_FINAL goto no_final
1997# define sayNO_SILENT goto do_no
d6a28714
JH
1998# define saySAME(x) if (x) goto yes; else goto no
1999# define REPORT_CODE_OFF 24
2000#else
2001# define sayYES return 1
2002# define sayNO return 0
7821416a
IZ
2003# define sayYES_FINAL return 1
2004# define sayYES_LOUD return 1
2005# define sayNO_FINAL return 0
2006# define sayNO_SILENT return 0
d6a28714
JH
2007# define saySAME(x) return x
2008#endif
2009 DEBUG_r( {
2010 SV *prop = sv_newmortal();
2011 int docolor = *PL_colors[0];
2012 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2013 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2014 /* The part of the string before starttry has one color
2015 (pref0_len chars), between starttry and current
2016 position another one (pref_len - pref0_len chars),
2017 after the current position the third one.
2018 We assume that pref0_len <= pref_len, otherwise we
2019 decrease pref0_len. */
9041c2e3 2020 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2021 ? (5 + taill) - l : locinput - PL_bostr;
2022 int pref0_len;
d6a28714 2023
1aa99e6b
IH
2024 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2025 pref_len++;
2026 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2027 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2028 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2029 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2030 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2031 l--;
d6a28714
JH
2032 if (pref0_len < 0)
2033 pref0_len = 0;
2034 if (pref0_len > pref_len)
2035 pref0_len = pref_len;
2036 regprop(prop, scan);
9041c2e3 2037 PerlIO_printf(Perl_debug_log,
b900a521 2038 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2039 (IV)(locinput - PL_bostr),
2040 PL_colors[4], pref0_len,
d6a28714 2041 locinput - pref_len, PL_colors[5],
9041c2e3 2042 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2043 locinput - pref_len + pref0_len, PL_colors[3],
2044 (docolor ? "" : "> <"),
2045 PL_colors[0], l, locinput, PL_colors[1],
2046 15 - l - pref_len + 1,
2047 "",
b900a521 2048 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2049 SvPVX(prop));
2050 } );
2051
2052 next = scan + NEXT_OFF(scan);
2053 if (next == scan)
2054 next = NULL;
2055
2056 switch (OP(scan)) {
2057 case BOL:
12d33761
HS
2058 if (locinput == PL_bostr || (PL_multiline &&
2059 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2060 {
2061 /* regtill = regbol; */
b8c5462f
JH
2062 break;
2063 }
d6a28714
JH
2064 sayNO;
2065 case MBOL:
12d33761
HS
2066 if (locinput == PL_bostr ||
2067 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2068 {
b8c5462f
JH
2069 break;
2070 }
d6a28714
JH
2071 sayNO;
2072 case SBOL:
c2a73568 2073 if (locinput == PL_bostr)
b8c5462f 2074 break;
d6a28714
JH
2075 sayNO;
2076 case GPOS:
2077 if (locinput == PL_reg_ganch)
2078 break;
2079 sayNO;
2080 case EOL:
2081 if (PL_multiline)
2082 goto meol;
2083 else
2084 goto seol;
2085 case MEOL:
2086 meol:
2087 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2088 sayNO;
b8c5462f 2089 break;
d6a28714
JH
2090 case SEOL:
2091 seol:
2092 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2093 sayNO;
d6a28714 2094 if (PL_regeol - locinput > 1)
b8c5462f 2095 sayNO;
b8c5462f 2096 break;
d6a28714
JH
2097 case EOS:
2098 if (PL_regeol != locinput)
b8c5462f 2099 sayNO;
d6a28714 2100 break;
ffc61ed2 2101 case SANY:
d6a28714 2102 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2103 sayNO;
b8c5462f 2104 nextchr = UCHARAT(++locinput);
a0d0e21e 2105 break;
ffc61ed2 2106 case REG_ANY:
1aa99e6b
IH
2107 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2108 sayNO;
2109 if (do_utf8) {
b8c5462f 2110 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2111 if (locinput > PL_regeol)
2112 sayNO;
a0ed51b3 2113 nextchr = UCHARAT(locinput);
a0ed51b3 2114 }
1aa99e6b
IH
2115 else
2116 nextchr = UCHARAT(++locinput);
a0ed51b3 2117 break;
d6a28714 2118 case EXACT:
cd439c50
IZ
2119 s = STRING(scan);
2120 ln = STR_LEN(scan);
1aa99e6b
IH
2121 if (do_utf8 != (UTF!=0)) {
2122 char *l = locinput;
2123 char *e = s + ln;
2124 STRLEN len;
2125 if (do_utf8)
2126 while (s < e) {
2127 if (l >= PL_regeol)
2128 sayNO;
9041c2e3 2129 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2130 sayNO;
2131 s++;
2132 l += len;
2133 }
2134 else
2135 while (s < e) {
2136 if (l >= PL_regeol)
2137 sayNO;
9041c2e3 2138 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2139 sayNO;
2140 s += len;
2141 l++;
2142 }
2143 locinput = l;
2144 nextchr = UCHARAT(locinput);
2145 break;
2146 }
d6a28714
JH
2147 /* Inline the first character, for speed. */
2148 if (UCHARAT(s) != nextchr)
2149 sayNO;
2150 if (PL_regeol - locinput < ln)
2151 sayNO;
2152 if (ln > 1 && memNE(s, locinput, ln))
2153 sayNO;
2154 locinput += ln;
2155 nextchr = UCHARAT(locinput);
2156 break;
2157 case EXACTFL:
b8c5462f
JH
2158 PL_reg_flags |= RF_tainted;
2159 /* FALL THROUGH */
d6a28714 2160 case EXACTF:
cd439c50
IZ
2161 s = STRING(scan);
2162 ln = STR_LEN(scan);
d6a28714 2163
1aa99e6b 2164 if (do_utf8) {
d6a28714 2165 char *l = locinput;
1aa99e6b
IH
2166 char *e;
2167 e = s + ln;
d6a28714
JH
2168 c1 = OP(scan) == EXACTF;
2169 while (s < e) {
1aa99e6b 2170 if (l >= PL_regeol) {
d6a28714
JH
2171 sayNO;
2172 }
2b9d42f0 2173 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2174 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2175 sayNO;
2176 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2177 l += UTF8SKIP(l);
b8c5462f 2178 }
d6a28714 2179 locinput = l;
a0ed51b3
LW
2180 nextchr = UCHARAT(locinput);
2181 break;
2182 }
d6a28714
JH
2183
2184 /* Inline the first character, for speed. */
2185 if (UCHARAT(s) != nextchr &&
2186 UCHARAT(s) != ((OP(scan) == EXACTF)
2187 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2188 sayNO;
d6a28714 2189 if (PL_regeol - locinput < ln)
b8c5462f 2190 sayNO;
d6a28714
JH
2191 if (ln > 1 && (OP(scan) == EXACTF
2192 ? ibcmp(s, locinput, ln)
2193 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2194 sayNO;
d6a28714
JH
2195 locinput += ln;
2196 nextchr = UCHARAT(locinput);
a0d0e21e 2197 break;
d6a28714 2198 case ANYOF:
ffc61ed2
JH
2199 if (do_utf8) {
2200 if (!reginclass(scan, (U8*)locinput, do_utf8))
2201 sayNO;
2202 if (locinput >= PL_regeol)
2203 sayNO;
2204 locinput += PL_utf8skip[nextchr];
b8c5462f 2205 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2206 }
2207 else {
2208 if (nextchr < 0)
2209 nextchr = UCHARAT(locinput);
2210 if (!reginclass(scan, (U8*)locinput, do_utf8))
2211 sayNO;
2212 if (!nextchr && locinput >= PL_regeol)
2213 sayNO;
2214 nextchr = UCHARAT(++locinput);
2215 }
b8c5462f 2216 break;
d6a28714 2217 case ALNUML:
b8c5462f
JH
2218 PL_reg_flags |= RF_tainted;
2219 /* FALL THROUGH */
d6a28714 2220 case ALNUM:
b8c5462f 2221 if (!nextchr)
4633a7c4 2222 sayNO;
ffc61ed2 2223 if (do_utf8) {
ad24be35 2224 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2225 if (!(OP(scan) == ALNUM
3568d838 2226 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2227 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2228 {
2229 sayNO;
a0ed51b3 2230 }
b8c5462f 2231 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2232 nextchr = UCHARAT(locinput);
2233 break;
2234 }
ffc61ed2 2235 if (!(OP(scan) == ALNUM
d6a28714 2236 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2237 sayNO;
b8c5462f 2238 nextchr = UCHARAT(++locinput);
a0d0e21e 2239 break;
d6a28714 2240 case NALNUML:
b8c5462f
JH
2241 PL_reg_flags |= RF_tainted;
2242 /* FALL THROUGH */
d6a28714
JH
2243 case NALNUM:
2244 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2245 sayNO;
ffc61ed2 2246 if (do_utf8) {
8269fa76 2247 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2248 if (OP(scan) == NALNUM
3568d838 2249 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2250 : isALNUM_LC_utf8((U8*)locinput))
2251 {
b8c5462f 2252 sayNO;
d6a28714 2253 }
b8c5462f
JH
2254 locinput += PL_utf8skip[nextchr];
2255 nextchr = UCHARAT(locinput);
2256 break;
2257 }
ffc61ed2 2258 if (OP(scan) == NALNUM
d6a28714 2259 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2260 sayNO;
76e3520e 2261 nextchr = UCHARAT(++locinput);
a0d0e21e 2262 break;
d6a28714
JH
2263 case BOUNDL:
2264 case NBOUNDL:
3280af22 2265 PL_reg_flags |= RF_tainted;
bbce6d69 2266 /* FALL THROUGH */
d6a28714
JH
2267 case BOUND:
2268 case NBOUND:
2269 /* was last char in word? */
ffc61ed2 2270 if (do_utf8) {
12d33761
HS
2271 if (locinput == PL_bostr)
2272 ln = '\n';
ffc61ed2
JH
2273 else {
2274 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2275
2b9d42f0 2276 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2277 }
2278 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2279 ln = isALNUM_uni(ln);
8269fa76 2280 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2281 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2282 }
2283 else {
9041c2e3 2284 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2285 n = isALNUM_LC_utf8((U8*)locinput);
2286 }
a0ed51b3 2287 }
d6a28714 2288 else {
12d33761
HS
2289 ln = (locinput != PL_bostr) ?
2290 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2291 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2292 ln = isALNUM(ln);
2293 n = isALNUM(nextchr);
2294 }
2295 else {
2296 ln = isALNUM_LC(ln);
2297 n = isALNUM_LC(nextchr);
2298 }
d6a28714 2299 }
ffc61ed2
JH
2300 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2301 OP(scan) == BOUNDL))
2302 sayNO;
a0ed51b3 2303 break;
d6a28714 2304 case SPACEL:
3280af22 2305 PL_reg_flags |= RF_tainted;
bbce6d69 2306 /* FALL THROUGH */
d6a28714 2307 case SPACE:
9442cb0e 2308 if (!nextchr)
4633a7c4 2309 sayNO;
1aa99e6b 2310 if (do_utf8) {
fd400ab9 2311 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2312 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2313 if (!(OP(scan) == SPACE
3568d838 2314 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2315 : isSPACE_LC_utf8((U8*)locinput)))
2316 {
2317 sayNO;
2318 }
2319 locinput += PL_utf8skip[nextchr];
2320 nextchr = UCHARAT(locinput);
2321 break;
d6a28714 2322 }
ffc61ed2
JH
2323 if (!(OP(scan) == SPACE
2324 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2325 sayNO;
2326 nextchr = UCHARAT(++locinput);
2327 }
2328 else {
2329 if (!(OP(scan) == SPACE
2330 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2331 sayNO;
2332 nextchr = UCHARAT(++locinput);
a0ed51b3 2333 }
a0ed51b3 2334 break;
d6a28714 2335 case NSPACEL:
3280af22 2336 PL_reg_flags |= RF_tainted;
bbce6d69 2337 /* FALL THROUGH */
d6a28714 2338 case NSPACE:
9442cb0e 2339 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2340 sayNO;
1aa99e6b 2341 if (do_utf8) {
8269fa76 2342 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2343 if (OP(scan) == NSPACE
3568d838 2344 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2345 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2346 {
2347 sayNO;
2348 }
2349 locinput += PL_utf8skip[nextchr];
2350 nextchr = UCHARAT(locinput);
2351 break;
a0ed51b3 2352 }
ffc61ed2 2353 if (OP(scan) == NSPACE
d6a28714 2354 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2355 sayNO;
76e3520e 2356 nextchr = UCHARAT(++locinput);
a0d0e21e 2357 break;
d6a28714 2358 case DIGITL:
a0ed51b3
LW
2359 PL_reg_flags |= RF_tainted;
2360 /* FALL THROUGH */
d6a28714 2361 case DIGIT:
9442cb0e 2362 if (!nextchr)
a0ed51b3 2363 sayNO;
1aa99e6b 2364 if (do_utf8) {
8269fa76 2365 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2366 if (!(OP(scan) == DIGIT
3568d838 2367 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2368 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2369 {
a0ed51b3 2370 sayNO;
dfe13c55 2371 }
6f06b55f 2372 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2373 nextchr = UCHARAT(locinput);
2374 break;
2375 }
ffc61ed2 2376 if (!(OP(scan) == DIGIT
9442cb0e 2377 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2378 sayNO;
2379 nextchr = UCHARAT(++locinput);
2380 break;
d6a28714 2381 case NDIGITL:
b8c5462f
JH
2382 PL_reg_flags |= RF_tainted;
2383 /* FALL THROUGH */
d6a28714 2384 case NDIGIT:
9442cb0e 2385 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2386 sayNO;
1aa99e6b 2387 if (do_utf8) {
8269fa76 2388 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2389 if (OP(scan) == NDIGIT
3568d838 2390 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2391 : isDIGIT_LC_utf8((U8*)locinput))
2392 {
a0ed51b3 2393 sayNO;
9442cb0e 2394 }
6f06b55f 2395 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2396 nextchr = UCHARAT(locinput);
2397 break;
2398 }
ffc61ed2 2399 if (OP(scan) == NDIGIT
9442cb0e 2400 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2401 sayNO;
2402 nextchr = UCHARAT(++locinput);
2403 break;
2404 case CLUMP:
8269fa76 2405 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2406 if (locinput >= PL_regeol ||
2407 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2408 sayNO;
6f06b55f 2409 locinput += PL_utf8skip[nextchr];
3568d838
JH
2410 while (locinput < PL_regeol &&
2411 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2412 locinput += UTF8SKIP(locinput);
2413 if (locinput > PL_regeol)
2414 sayNO;
2415 nextchr = UCHARAT(locinput);
2416 break;
c8756f30 2417 case REFFL:
3280af22 2418 PL_reg_flags |= RF_tainted;
c8756f30 2419 /* FALL THROUGH */
c277df42 2420 case REF:
c8756f30 2421 case REFF:
c277df42 2422 n = ARG(scan); /* which paren pair */
cf93c79d 2423 ln = PL_regstartp[n];
2c2d71f5 2424 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2425 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2426 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2427 if (ln == PL_regendp[n])
a0d0e21e 2428 break;
a0ed51b3 2429
cf93c79d 2430 s = PL_bostr + ln;
1aa99e6b 2431 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2432 char *l = locinput;
cf93c79d 2433 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2434 /*
2435 * Note that we can't do the "other character" lookup trick as
2436 * in the 8-bit case (no pun intended) because in Unicode we
2437 * have to map both upper and title case to lower case.
2438 */
2439 if (OP(scan) == REFF) {
2440 while (s < e) {
2441 if (l >= PL_regeol)
2442 sayNO;
dfe13c55 2443 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2444 sayNO;
2445 s += UTF8SKIP(s);
2446 l += UTF8SKIP(l);
2447 }
2448 }
2449 else {
2450 while (s < e) {
2451 if (l >= PL_regeol)
2452 sayNO;
dfe13c55 2453 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2454 sayNO;
2455 s += UTF8SKIP(s);
2456 l += UTF8SKIP(l);
2457 }
2458 }
2459 locinput = l;
2460 nextchr = UCHARAT(locinput);
2461 break;
2462 }
2463
a0d0e21e 2464 /* Inline the first character, for speed. */
76e3520e 2465 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2466 (OP(scan) == REF ||
2467 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2468 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2469 sayNO;
cf93c79d 2470 ln = PL_regendp[n] - ln;
3280af22 2471 if (locinput + ln > PL_regeol)
4633a7c4 2472 sayNO;
c8756f30
AK
2473 if (ln > 1 && (OP(scan) == REF
2474 ? memNE(s, locinput, ln)
2475 : (OP(scan) == REFF
2476 ? ibcmp(s, locinput, ln)
2477 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2478 sayNO;
a0d0e21e 2479 locinput += ln;
76e3520e 2480 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2481 break;
2482
2483 case NOTHING:
c277df42 2484 case TAIL:
a0d0e21e
LW
2485 break;
2486 case BACK:
2487 break;
c277df42
IZ
2488 case EVAL:
2489 {
2490 dSP;
533c011a 2491 OP_4tree *oop = PL_op;
3280af22
NIS
2492 COP *ocurcop = PL_curcop;
2493 SV **ocurpad = PL_curpad;
c277df42 2494 SV *ret;
9041c2e3 2495
c277df42 2496 n = ARG(scan);
533c011a 2497 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2498 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2499 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2500 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2501
cea2e8a9 2502 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2503 SPAGAIN;
2504 ret = POPs;
2505 PUTBACK;
9041c2e3 2506
0f5d15d6
IZ
2507 PL_op = oop;
2508 PL_curpad = ocurpad;
2509 PL_curcop = ocurcop;
c277df42 2510 if (logical) {
0f5d15d6
IZ
2511 if (logical == 2) { /* Postponed subexpression. */
2512 regexp *re;
22c35a8c 2513 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2514 re_cc_state state;
0f5d15d6
IZ
2515 CHECKPOINT cp, lastcp;
2516
2517 if(SvROK(ret) || SvRMAGICAL(ret)) {
2518 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2519
2520 if(SvMAGICAL(sv))
14befaf4 2521 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2522 }
2523 if (mg) {
2524 re = (regexp *)mg->mg_obj;
df0003d4 2525 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2526 }
2527 else {
2528 STRLEN len;
2529 char *t = SvPV(ret, len);
2530 PMOP pm;
2531 char *oprecomp = PL_regprecomp;
2532 I32 osize = PL_regsize;
2533 I32 onpar = PL_regnpar;
2534
5fcd1c1b 2535 Zero(&pm, 1, PMOP);
cea2e8a9 2536 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2537 if (!(SvFLAGS(ret)
0f5d15d6 2538 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2539 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2540 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2541 PL_regprecomp = oprecomp;
2542 PL_regsize = osize;
2543 PL_regnpar = onpar;
2544 }
2545 DEBUG_r(
9041c2e3 2546 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2547 "Entering embedded `%s%.60s%s%s'\n",
2548 PL_colors[0],
2549 re->precomp,
2550 PL_colors[1],
2551 (strlen(re->precomp) > 60 ? "..." : ""))
2552 );
2553 state.node = next;
2554 state.prev = PL_reg_call_cc;
2555 state.cc = PL_regcc;
2556 state.re = PL_reg_re;
2557
2ab05381 2558 PL_regcc = 0;
9041c2e3 2559
0f5d15d6 2560 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2561 REGCP_SET(lastcp);
0f5d15d6
IZ
2562 cache_re(re);
2563 state.ss = PL_savestack_ix;
2564 *PL_reglastparen = 0;
2565 PL_reg_call_cc = &state;
2566 PL_reginput = locinput;
2c2d71f5
JH
2567
2568 /* XXXX This is too dramatic a measure... */
2569 PL_reg_maxiter = 0;
2570
0f5d15d6 2571 if (regmatch(re->program + 1)) {
2c914db6
IZ
2572 /* Even though we succeeded, we need to restore
2573 global variables, since we may be wrapped inside
2574 SUSPEND, thus the match may be not finished yet. */
2575
2576 /* XXXX Do this only if SUSPENDed? */
2577 PL_reg_call_cc = state.prev;
2578 PL_regcc = state.cc;
2579 PL_reg_re = state.re;
2580 cache_re(PL_reg_re);
2581
2582 /* XXXX This is too dramatic a measure... */
2583 PL_reg_maxiter = 0;
2584
2585 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2586 ReREFCNT_dec(re);
2587 regcpblow(cp);
2588 sayYES;
2589 }
0f5d15d6 2590 ReREFCNT_dec(re);
02db2b7b 2591 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2592 regcppop();
2593 PL_reg_call_cc = state.prev;
2594 PL_regcc = state.cc;
2595 PL_reg_re = state.re;
d3790889 2596 cache_re(PL_reg_re);
2c2d71f5
JH
2597
2598 /* XXXX This is too dramatic a measure... */
2599 PL_reg_maxiter = 0;
2600
0f5d15d6
IZ
2601 sayNO;
2602 }
c277df42 2603 sw = SvTRUE(ret);
0f5d15d6 2604 logical = 0;
a0ed51b3
LW
2605 }
2606 else
3280af22 2607 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2608 break;
2609 }
a0d0e21e 2610 case OPEN:
c277df42 2611 n = ARG(scan); /* which paren pair */
3280af22
NIS
2612 PL_reg_start_tmp[n] = locinput;
2613 if (n > PL_regsize)
2614 PL_regsize = n;
a0d0e21e
LW
2615 break;
2616 case CLOSE:
c277df42 2617 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2618 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2619 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2620 if (n > *PL_reglastparen)
2621 *PL_reglastparen = n;
a0d0e21e 2622 break;
c277df42
IZ
2623 case GROUPP:
2624 n = ARG(scan); /* which paren pair */
cf93c79d 2625 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2626 break;
2627 case IFTHEN:
2c2d71f5 2628 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2629 if (sw)
2630 next = NEXTOPER(NEXTOPER(scan));
2631 else {
2632 next = scan + ARG(scan);
2633 if (OP(next) == IFTHEN) /* Fake one. */
2634 next = NEXTOPER(NEXTOPER(next));
2635 }
2636 break;
2637 case LOGICAL:
0f5d15d6 2638 logical = scan->flags;
c277df42 2639 break;
2ab05381
IZ
2640/*******************************************************************
2641 PL_regcc contains infoblock about the innermost (...)* loop, and
2642 a pointer to the next outer infoblock.
2643
2644 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2645
2646 1) After matching X, regnode for CURLYX is processed;
2647
9041c2e3 2648 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2649 regmatch() recursively with the starting point at WHILEM node;
2650
2651 3) Each hit of WHILEM node tries to match A and Z (in the order
2652 depending on the current iteration, min/max of {min,max} and
2653 greediness). The information about where are nodes for "A"
2654 and "Z" is read from the infoblock, as is info on how many times "A"
2655 was already matched, and greediness.
2656
2657 4) After A matches, the same WHILEM node is hit again.
2658
2659 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2660 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2661 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2662 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2663 of the external loop.
2664
2665 Currently present infoblocks form a tree with a stem formed by PL_curcc
2666 and whatever it mentions via ->next, and additional attached trees
2667 corresponding to temporarily unset infoblocks as in "5" above.
2668
9041c2e3 2669 In the following picture infoblocks for outer loop of
2ab05381
IZ
2670 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2671 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2672 infoblocks are drawn below the "reset" infoblock.
2673
2674 In fact in the picture below we do not show failed matches for Z and T
2675 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2676 more obvious *why* one needs to *temporary* unset infoblocks.]
2677
2678 Matched REx position InfoBlocks Comment
2679 (Y(A)*?Z)*?T x
2680 Y(A)*?Z)*?T x <- O
2681 Y (A)*?Z)*?T x <- O
2682 Y A)*?Z)*?T x <- O <- I
2683 YA )*?Z)*?T x <- O <- I
2684 YA A)*?Z)*?T x <- O <- I
2685 YAA )*?Z)*?T x <- O <- I
2686 YAA Z)*?T x <- O # Temporary unset I
2687 I
2688
2689 YAAZ Y(A)*?Z)*?T x <- O
2690 I
2691
2692 YAAZY (A)*?Z)*?T x <- O
2693 I
2694
2695 YAAZY A)*?Z)*?T x <- O <- I
2696 I
2697
2698 YAAZYA )*?Z)*?T x <- O <- I
2699 I
2700
2701 YAAZYA Z)*?T x <- O # Temporary unset I
2702 I,I
2703
2704 YAAZYAZ )*?T x <- O
2705 I,I
2706
2707 YAAZYAZ T x # Temporary unset O
2708 O
2709 I,I
2710
2711 YAAZYAZT x
2712 O
2713 I,I
2714 *******************************************************************/
a0d0e21e
LW
2715 case CURLYX: {
2716 CURCUR cc;
3280af22 2717 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2718 /* No need to save/restore up to this paren */
2719 I32 parenfloor = scan->flags;
c277df42
IZ
2720
2721 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2722 next += ARG(next);
3280af22
NIS
2723 cc.oldcc = PL_regcc;
2724 PL_regcc = &cc;
cb434fcc
IZ
2725 /* XXXX Probably it is better to teach regpush to support
2726 parenfloor > PL_regsize... */
2727 if (parenfloor > *PL_reglastparen)
2728 parenfloor = *PL_reglastparen; /* Pessimization... */
2729 cc.parenfloor = parenfloor;
a0d0e21e
LW
2730 cc.cur = -1;
2731 cc.min = ARG1(scan);
2732 cc.max = ARG2(scan);
c277df42 2733 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2734 cc.next = next;
2735 cc.minmod = minmod;
2736 cc.lastloc = 0;
3280af22 2737 PL_reginput = locinput;
a0d0e21e
LW
2738 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2739 regcpblow(cp);
3280af22 2740 PL_regcc = cc.oldcc;
4633a7c4 2741 saySAME(n);
a0d0e21e
LW
2742 }
2743 /* NOT REACHED */
2744 case WHILEM: {
2745 /*
2746 * This is really hard to understand, because after we match
2747 * what we're trying to match, we must make sure the rest of
2c2d71f5 2748 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2749 * to go back UP the parse tree by recursing ever deeper. And
2750 * if it fails, we have to reset our parent's current state
2751 * that we can try again after backing off.
2752 */
2753
c277df42 2754 CHECKPOINT cp, lastcp;
3280af22 2755 CURCUR* cc = PL_regcc;
c277df42
IZ
2756 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2757
4633a7c4 2758 n = cc->cur + 1; /* how many we know we matched */
3280af22 2759 PL_reginput = locinput;
a0d0e21e 2760
c277df42 2761 DEBUG_r(
9041c2e3
NIS
2762 PerlIO_printf(Perl_debug_log,
2763 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2764 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2765 (long)n, (long)cc->min,
c277df42
IZ
2766 (long)cc->max, (long)cc)
2767 );
4633a7c4 2768
a0d0e21e
LW
2769 /* If degenerate scan matches "", assume scan done. */
2770
579cf2c3 2771 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2772 PL_regcc = cc->oldcc;
2ab05381
IZ
2773 if (PL_regcc)
2774 ln = PL_regcc->cur;
c277df42 2775 DEBUG_r(
c3464db5
DD
2776 PerlIO_printf(Perl_debug_log,
2777 "%*s empty match detected, try continuation...\n",
3280af22 2778 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2779 );
a0d0e21e 2780 if (regmatch(cc->next))
4633a7c4 2781 sayYES;
2ab05381
IZ
2782 if (PL_regcc)
2783 PL_regcc->cur = ln;
3280af22 2784 PL_regcc = cc;
4633a7c4 2785 sayNO;
a0d0e21e
LW
2786 }
2787
2788 /* First just match a string of min scans. */
2789
2790 if (n < cc->min) {
2791 cc->cur = n;
2792 cc->lastloc = locinput;
4633a7c4
LW
2793 if (regmatch(cc->scan))
2794 sayYES;
2795 cc->cur = n - 1;
c277df42 2796 cc->lastloc = lastloc;
4633a7c4 2797 sayNO;
a0d0e21e
LW
2798 }
2799
2c2d71f5
JH
2800 if (scan->flags) {
2801 /* Check whether we already were at this position.
2802 Postpone detection until we know the match is not
2803 *that* much linear. */
2804 if (!PL_reg_maxiter) {
2805 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2806 PL_reg_leftiter = PL_reg_maxiter;
2807 }
2808 if (PL_reg_leftiter-- == 0) {
2809 I32 size = (PL_reg_maxiter + 7)/8;
2810 if (PL_reg_poscache) {
2811 if (PL_reg_poscache_size < size) {
2812 Renew(PL_reg_poscache, size, char);
2813 PL_reg_poscache_size = size;
2814 }
2815 Zero(PL_reg_poscache, size, char);
2816 }
2817 else {
2818 PL_reg_poscache_size = size;
2819 Newz(29, PL_reg_poscache, size, char);
2820 }
2821 DEBUG_r(
2822 PerlIO_printf(Perl_debug_log,
2823 "%sDetected a super-linear match, switching on caching%s...\n",
2824 PL_colors[4], PL_colors[5])
2825 );
2826 }
2827 if (PL_reg_leftiter < 0) {
2828 I32 o = locinput - PL_bostr, b;
2829
2830 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2831 b = o % 8;
2832 o /= 8;
2833 if (PL_reg_poscache[o] & (1<<b)) {
2834 DEBUG_r(
2835 PerlIO_printf(Perl_debug_log,
2836 "%*s already tried at this position...\n",
2837 REPORT_CODE_OFF+PL_regindent*2, "")
2838 );
7821416a 2839 sayNO_SILENT;
2c2d71f5
JH
2840 }
2841 PL_reg_poscache[o] |= (1<<b);
2842 }
2843 }
2844
a0d0e21e
LW
2845 /* Prefer next over scan for minimal matching. */
2846
2847 if (cc->minmod) {
3280af22 2848 PL_regcc = cc->oldcc;
2ab05381
IZ
2849 if (PL_regcc)
2850 ln = PL_regcc->cur;
5f05dabc 2851 cp = regcppush(cc->parenfloor);
02db2b7b 2852 REGCP_SET(lastcp);
5f05dabc 2853 if (regmatch(cc->next)) {
c277df42 2854 regcpblow(cp);
4633a7c4 2855 sayYES; /* All done. */
5f05dabc 2856 }
02db2b7b 2857 REGCP_UNWIND(lastcp);
5f05dabc 2858 regcppop();
2ab05381
IZ
2859 if (PL_regcc)
2860 PL_regcc->cur = ln;
3280af22 2861 PL_regcc = cc;
a0d0e21e 2862
c277df42 2863 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2864 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2865 && !(PL_reg_flags & RF_warned)) {
2866 PL_reg_flags |= RF_warned;
e476b1b5 2867 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2868 "Complex regular subexpression recursion",
2869 REG_INFTY - 1);
c277df42 2870 }
4633a7c4 2871 sayNO;
c277df42 2872 }
a687059c 2873
c277df42 2874 DEBUG_r(
c3464db5
DD
2875 PerlIO_printf(Perl_debug_log,
2876 "%*s trying longer...\n",
3280af22 2877 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2878 );
a0d0e21e 2879 /* Try scanning more and see if it helps. */
3280af22 2880 PL_reginput = locinput;
a0d0e21e
LW
2881 cc->cur = n;
2882 cc->lastloc = locinput;
5f05dabc 2883 cp = regcppush(cc->parenfloor);
02db2b7b 2884 REGCP_SET(lastcp);
5f05dabc 2885 if (regmatch(cc->scan)) {
c277df42 2886 regcpblow(cp);
4633a7c4 2887 sayYES;
5f05dabc 2888 }
02db2b7b 2889 REGCP_UNWIND(lastcp);
5f05dabc 2890 regcppop();
4633a7c4 2891 cc->cur = n - 1;
c277df42 2892 cc->lastloc = lastloc;
4633a7c4 2893 sayNO;
a0d0e21e
LW
2894 }
2895
2896 /* Prefer scan over next for maximal matching. */
2897
2898 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2899 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2900 cc->cur = n;
2901 cc->lastloc = locinput;
02db2b7b 2902 REGCP_SET(lastcp);
5f05dabc 2903 if (regmatch(cc->scan)) {
c277df42 2904 regcpblow(cp);
4633a7c4 2905 sayYES;
5f05dabc 2906 }
02db2b7b 2907 REGCP_UNWIND(lastcp);
a0d0e21e 2908 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2909 PL_reginput = locinput;
c277df42 2910 DEBUG_r(
c3464db5
DD
2911 PerlIO_printf(Perl_debug_log,
2912 "%*s failed, try continuation...\n",
3280af22 2913 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2914 );
2915 }
9041c2e3 2916 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2917 && !(PL_reg_flags & RF_warned)) {
3280af22 2918 PL_reg_flags |= RF_warned;
e476b1b5 2919 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2920 "Complex regular subexpression recursion",
2921 REG_INFTY - 1);
a0d0e21e
LW
2922 }
2923
2924 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2925 PL_regcc = cc->oldcc;
2ab05381
IZ
2926 if (PL_regcc)
2927 ln = PL_regcc->cur;
a0d0e21e 2928 if (regmatch(cc->next))
4633a7c4 2929 sayYES;
2ab05381
IZ
2930 if (PL_regcc)
2931 PL_regcc->cur = ln;
3280af22 2932 PL_regcc = cc;
4633a7c4 2933 cc->cur = n - 1;
c277df42 2934 cc->lastloc = lastloc;
4633a7c4 2935 sayNO;
a0d0e21e
LW
2936 }
2937 /* NOT REACHED */
9041c2e3 2938 case BRANCHJ:
c277df42
IZ
2939 next = scan + ARG(scan);
2940 if (next == scan)
2941 next = NULL;
2942 inner = NEXTOPER(NEXTOPER(scan));
2943 goto do_branch;
9041c2e3 2944 case BRANCH:
c277df42
IZ
2945 inner = NEXTOPER(scan);
2946 do_branch:
2947 {
c277df42
IZ
2948 c1 = OP(scan);
2949 if (OP(next) != c1) /* No choice. */
2950 next = inner; /* Avoid recursion. */
a0d0e21e 2951 else {
02db2b7b
IZ
2952 I32 lastparen = *PL_reglastparen;
2953 I32 unwind1;
2954 re_unwind_branch_t *uw;
2955
2956 /* Put unwinding data on stack */
2957 unwind1 = SSNEWt(1,re_unwind_branch_t);
2958 uw = SSPTRt(unwind1,re_unwind_branch_t);
2959 uw->prev = unwind;
2960 unwind = unwind1;
2961 uw->type = ((c1 == BRANCH)
2962 ? RE_UNWIND_BRANCH
2963 : RE_UNWIND_BRANCHJ);
2964 uw->lastparen = lastparen;
2965 uw->next = next;
2966 uw->locinput = locinput;
2967 uw->nextchr = nextchr;
2968#ifdef DEBUGGING
2969 uw->regindent = ++PL_regindent;
2970#endif
c277df42 2971
02db2b7b
IZ
2972 REGCP_SET(uw->lastcp);
2973
2974 /* Now go into the first branch */
2975 next = inner;
a687059c 2976 }
a0d0e21e
LW
2977 }
2978 break;
2979 case MINMOD:
2980 minmod = 1;
2981 break;
c277df42
IZ
2982 case CURLYM:
2983 {
00db4c45 2984 I32 l = 0;
c277df42 2985 CHECKPOINT lastcp;
9041c2e3 2986
c277df42
IZ
2987 /* We suppose that the next guy does not need
2988 backtracking: in particular, it is of constant length,
2989 and has no parenths to influence future backrefs. */
2990 ln = ARG1(scan); /* min to match */
2991 n = ARG2(scan); /* max to match */
c277df42
IZ
2992 paren = scan->flags;
2993 if (paren) {
3280af22
NIS
2994 if (paren > PL_regsize)
2995 PL_regsize = paren;
2996 if (paren > *PL_reglastparen)
2997 *PL_reglastparen = paren;
c277df42 2998 }
dc45a647 2999 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3000 if (paren)
3001 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3002 PL_reginput = locinput;
c277df42
IZ
3003 if (minmod) {
3004 minmod = 0;
3005 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3006 sayNO;
5f4b28b2 3007 if (ln && l == 0 && n >= ln
c277df42
IZ
3008 /* In fact, this is tricky. If paren, then the
3009 fact that we did/didnot match may influence
3010 future execution. */
3011 && !(paren && ln == 0))
3012 ln = n;
3280af22 3013 locinput = PL_reginput;
22c35a8c 3014 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3015 c1 = (U8)*STRING(next);
c277df42 3016 if (OP(next) == EXACTF)
22c35a8c 3017 c2 = PL_fold[c1];
c277df42 3018 else if (OP(next) == EXACTFL)
22c35a8c 3019 c2 = PL_fold_locale[c1];
c277df42
IZ
3020 else
3021 c2 = c1;
a0ed51b3
LW
3022 }
3023 else
c277df42 3024 c1 = c2 = -1000;
02db2b7b 3025 REGCP_SET(lastcp);
5f4b28b2 3026 /* This may be improved if l == 0. */
c277df42
IZ
3027 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3028 /* If it could work, try it. */
3029 if (c1 == -1000 ||
3280af22
NIS
3030 UCHARAT(PL_reginput) == c1 ||
3031 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3032 {
3033 if (paren) {
3034 if (n) {
cf93c79d
IZ
3035 PL_regstartp[paren] =
3036 HOPc(PL_reginput, -l) - PL_bostr;
3037 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3038 }
3039 else
cf93c79d 3040 PL_regendp[paren] = -1;
c277df42
IZ
3041 }
3042 if (regmatch(next))
3043 sayYES;
02db2b7b 3044 REGCP_UNWIND(lastcp);
c277df42
IZ
3045 }
3046 /* Couldn't or didn't -- move forward. */
3280af22 3047 PL_reginput = locinput;
c277df42
IZ
3048 if (regrepeat_hard(scan, 1, &l)) {
3049 ln++;
3280af22 3050 locinput = PL_reginput;
c277df42
IZ
3051 }
3052 else
3053 sayNO;
3054 }
a0ed51b3
LW
3055 }
3056 else {
c277df42
IZ
3057 n = regrepeat_hard(scan, n, &l);
3058 if (n != 0 && l == 0
3059 /* In fact, this is tricky. If paren, then the
3060 fact that we did/didnot match may influence
3061 future execution. */
3062 && !(paren && ln == 0))
3063 ln = n;
3280af22 3064 locinput = PL_reginput;
c277df42 3065 DEBUG_r(
5c0ca799 3066 PerlIO_printf(Perl_debug_log,
faccc32b 3067 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3068 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3069 (IV) n, (IV)l)
c277df42
IZ
3070 );
3071 if (n >= ln) {
22c35a8c 3072 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3073 c1 = (U8)*STRING(next);
c277df42 3074 if (OP(next) == EXACTF)
22c35a8c 3075 c2 = PL_fold[c1];
c277df42 3076 else if (OP(next) == EXACTFL)
22c35a8c 3077 c2 = PL_fold_locale[c1];
c277df42
IZ
3078 else
3079 c2 = c1;
a0ed51b3
LW
3080 }
3081 else
c277df42
IZ
3082 c1 = c2 = -1000;
3083 }
02db2b7b 3084 REGCP_SET(lastcp);
c277df42
IZ
3085 while (n >= ln) {
3086 /* If it could work, try it. */
3087 if (c1 == -1000 ||
3280af22
NIS
3088 UCHARAT(PL_reginput) == c1 ||
3089 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3090 {
3091 DEBUG_r(
c3464db5 3092 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3093 "%*s trying tail with n=%"IVdf"...\n",
3094 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3095 );
3096 if (paren) {
3097 if (n) {
cf93c79d
IZ
3098 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3099 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3100 }
a0ed51b3 3101 else
cf93c79d 3102 PL_regendp[paren] = -1;
c277df42 3103 }
a0ed51b3
LW
3104 if (regmatch(next))
3105 sayYES;
02db2b7b 3106 REGCP_UNWIND(lastcp);
a0ed51b3 3107 }
c277df42
IZ
3108 /* Couldn't or didn't -- back up. */
3109 n--;
dfe13c55 3110 locinput = HOPc(locinput, -l);
3280af22 3111 PL_reginput = locinput;
c277df42
IZ
3112 }
3113 }
3114 sayNO;
3115 break;
3116 }
3117 case CURLYN:
3118 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3119 if (paren > PL_regsize)
3120 PL_regsize = paren;
3121 if (paren > *PL_reglastparen)
3122 *PL_reglastparen = paren;
c277df42
IZ
3123 ln = ARG1(scan); /* min to match */
3124 n = ARG2(scan); /* max to match */
dc45a647 3125 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3126 goto repeat;
a0d0e21e 3127 case CURLY:
c277df42 3128 paren = 0;
a0d0e21e
LW
3129 ln = ARG1(scan); /* min to match */
3130 n = ARG2(scan); /* max to match */
dc45a647 3131 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3132 goto repeat;
3133 case STAR:
3134 ln = 0;
c277df42 3135 n = REG_INFTY;
a0d0e21e 3136 scan = NEXTOPER(scan);
c277df42 3137 paren = 0;
a0d0e21e
LW
3138 goto repeat;
3139 case PLUS:
c277df42
IZ
3140 ln = 1;
3141 n = REG_INFTY;
3142 scan = NEXTOPER(scan);
3143 paren = 0;
3144 repeat:
a0d0e21e
LW
3145 /*
3146 * Lookahead to avoid useless match attempts
3147 * when we know what character comes next.
3148 */
22c35a8c 3149 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3150 U8 *s = (U8*)STRING(next);
3151 if (!UTF) {
3152 c2 = c1 = *s;
3153 if (OP(next) == EXACTF)
3154 c2 = PL_fold[c1];
3155 else if (OP(next) == EXACTFL)
3156 c2 = PL_fold_locale[c1];
3157 }
3158 else { /* UTF */
3159 if (OP(next) == EXACTF) {
3160 c1 = to_utf8_lower(s);
3161 c2 = to_utf8_upper(s);
3162 }
3163 else {
9041c2e3 3164 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3165 }
3166 }
bbce6d69 3167 }
a0d0e21e 3168 else
bbce6d69 3169 c1 = c2 = -1000;
3280af22 3170 PL_reginput = locinput;
a0d0e21e 3171 if (minmod) {
c277df42 3172 CHECKPOINT lastcp;
a0d0e21e
LW
3173 minmod = 0;
3174 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3175 sayNO;
a0ed51b3 3176 locinput = PL_reginput;
02db2b7b 3177 REGCP_SET(lastcp);
0fe9bf95 3178 if (c1 != -1000) {
1aa99e6b 3179 char *e; /* Should not check after this */
0fe9bf95
IZ
3180 char *old = locinput;
3181
1aa99e6b 3182 if (n == REG_INFTY) {
0fe9bf95 3183 e = PL_regeol - 1;
1aa99e6b
IH
3184 if (do_utf8)
3185 while (UTF8_IS_CONTINUATION(*(U8*)e))
3186 e--;
3187 }
3188 else if (do_utf8) {
3189 int m = n - ln;
3190 for (e = locinput;
3191 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3192 e += UTF8SKIP(e);
3193 }
3194 else {
3195 e = locinput + n - ln;
3196 if (e >= PL_regeol)
3197 e = PL_regeol - 1;
3198 }
0fe9bf95 3199 while (1) {
1aa99e6b 3200 int count;
0fe9bf95 3201 /* Find place 'next' could work */
1aa99e6b
IH
3202 if (!do_utf8) {
3203 if (c1 == c2) {
3204 while (locinput <= e && *locinput != c1)
3205 locinput++;
3206 } else {
9041c2e3 3207 while (locinput <= e
1aa99e6b
IH
3208 && *locinput != c1
3209 && *locinput != c2)
3210 locinput++;
3211 }
3212 count = locinput - old;
3213 }
3214 else {
3215 STRLEN len;
3216 if (c1 == c2) {
3217 for (count = 0;
3218 locinput <= e &&
9041c2e3 3219 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3220 count++)
3221 locinput += len;
3222
3223 } else {
3224 for (count = 0; locinput <= e; count++) {
9041c2e3 3225 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3226 if (c == c1 || c == c2)
3227 break;
9041c2e3 3228 locinput += len;
1aa99e6b
IH
3229 }
3230 }
0fe9bf95 3231 }
9041c2e3 3232 if (locinput > e)
0fe9bf95
IZ
3233 sayNO;
3234 /* PL_reginput == old now */
3235 if (locinput != old) {
3236 ln = 1; /* Did some */
1aa99e6b 3237 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3238 sayNO;
3239 }
3240 /* PL_reginput == locinput now */
29d1e993 3241 TRYPAREN(paren, ln, locinput);
0fe9bf95 3242 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3243 REGCP_UNWIND(lastcp);
0fe9bf95 3244 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3245 old = locinput;
3246 if (do_utf8)
3247 locinput += UTF8SKIP(locinput);
3248 else
3249 locinput++;
0fe9bf95
IZ
3250 }
3251 }
3252 else
c277df42 3253 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3254 UV c;
3255 if (c1 != -1000) {
3256 if (do_utf8)
9041c2e3 3257 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3258 else
9041c2e3 3259 c = UCHARAT(PL_reginput);
2390ecbc
PP
3260 /* If it could work, try it. */
3261 if (c == c1 || c == c2)
3262 {
3263 TRYPAREN(paren, n, PL_reginput);
3264 REGCP_UNWIND(lastcp);
3265 }
1aa99e6b 3266 }
a0d0e21e 3267 /* If it could work, try it. */
2390ecbc 3268 else if (c1 == -1000)
bbce6d69 3269 {
29d1e993 3270 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3271 REGCP_UNWIND(lastcp);
bbce6d69 3272 }
c277df42 3273 /* Couldn't or didn't -- move forward. */
a0ed51b3 3274 PL_reginput = locinput;
a0d0e21e
LW
3275 if (regrepeat(scan, 1)) {
3276 ln++;
a0ed51b3
LW
3277 locinput = PL_reginput;
3278 }
3279 else
4633a7c4 3280 sayNO;
a0d0e21e
LW
3281 }
3282 }
3283 else {
c277df42 3284 CHECKPOINT lastcp;
a0d0e21e 3285 n = regrepeat(scan, n);
a0ed51b3 3286 locinput = PL_reginput;
22c35a8c 3287 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3288 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3289 ln = n; /* why back off? */
1aeab75a
GS
3290 /* ...because $ and \Z can match before *and* after
3291 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3292 We should back off by one in this case. */
3293 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3294 ln--;
3295 }
02db2b7b 3296 REGCP_SET(lastcp);
c277df42 3297 if (paren) {
8fa7f367 3298 UV c = 0;
c277df42 3299 while (n >= ln) {
1aa99e6b
IH
3300 if (c1 != -1000) {
3301 if (do_utf8)
9041c2e3 3302 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3303 else
9041c2e3 3304 c = UCHARAT(PL_reginput);
1aa99e6b 3305 }
c277df42 3306 /* If it could work, try it. */
1aa99e6b 3307 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3308 {
29d1e993 3309 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3310 REGCP_UNWIND(lastcp);
c277df42
IZ
3311 }
3312 /* Couldn't or didn't -- back up. */
3313 n--;
dfe13c55 3314 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3315 }
a0ed51b3
LW
3316 }
3317 else {
8fa7f367 3318 UV c = 0;
c277df42 3319 while (n >= ln) {
1aa99e6b
IH
3320 if (c1 != -1000) {
3321 if (do_utf8)
9041c2e3 3322 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3323 else
9041c2e3 3324 c = UCHARAT(PL_reginput);
1aa99e6b 3325 }
c277df42 3326 /* If it could work, try it. */
1aa99e6b 3327 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3328 {
29d1e993 3329 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3330 REGCP_UNWIND(lastcp);
c277df42
IZ
3331 }
3332 /* Couldn't or didn't -- back up. */
3333 n--;
dfe13c55 3334 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3335 }
a0d0e21e
LW
3336 }
3337 }
4633a7c4 3338 sayNO;
c277df42 3339 break;
a0d0e21e 3340 case END:
0f5d15d6
IZ
3341 if (PL_reg_call_cc) {
3342 re_cc_state *cur_call_cc = PL_reg_call_cc;
3343 CURCUR *cctmp = PL_regcc;
3344 regexp *re = PL_reg_re;
3345 CHECKPOINT cp, lastcp;
3346
3347 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3348 REGCP_SET(lastcp);
0f5d15d6
IZ
3349 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3350 the caller. */
3351 PL_reginput = locinput; /* Make position available to
3352 the callcc. */
3353 cache_re(PL_reg_call_cc->re);
3354 PL_regcc = PL_reg_call_cc->cc;
3355 PL_reg_call_cc = PL_reg_call_cc->prev;
3356 if (regmatch(cur_call_cc->node)) {
3357 PL_reg_call_cc = cur_call_cc;
3358 regcpblow(cp);
3359 sayYES;
3360 }
02db2b7b 3361 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3362 regcppop();
3363 PL_reg_call_cc = cur_call_cc;
3364 PL_regcc = cctmp;
3365 PL_reg_re = re;
3366 cache_re(re);
3367
3368 DEBUG_r(
3369 PerlIO_printf(Perl_debug_log,
3370 "%*s continuation failed...\n",
3371 REPORT_CODE_OFF+PL_regindent*2, "")
3372 );
7821416a 3373 sayNO_SILENT;
0f5d15d6 3374 }
7821416a
IZ
3375 if (locinput < PL_regtill) {
3376 DEBUG_r(PerlIO_printf(Perl_debug_log,
3377 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3378 PL_colors[4],
3379 (long)(locinput - PL_reg_starttry),
3380 (long)(PL_regtill - PL_reg_starttry),
3381 PL_colors[5]));
3382 sayNO_FINAL; /* Cannot match: too short. */
3383 }
3384 PL_reginput = locinput; /* put where regtry can find it */
3385 sayYES_FINAL; /* Success! */
7e5428c5 3386 case SUCCEED:
3280af22 3387 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3388 sayYES_LOUD; /* Success! */
c277df42
IZ
3389 case SUSPEND:
3390 n = 1;
9fe1d20c 3391 PL_reginput = locinput;
9041c2e3 3392 goto do_ifmatch;
a0d0e21e 3393 case UNLESSM:
c277df42 3394 n = 0;
a0ed51b3 3395 if (scan->flags) {
efb30f32
HS
3396 s = HOPBACKc(locinput, scan->flags);
3397 if (!s)
3398 goto say_yes;
3399 PL_reginput = s;
a0ed51b3
LW
3400 }
3401 else
3402 PL_reginput = locinput;
c277df42
IZ
3403 goto do_ifmatch;
3404 case IFMATCH:
3405 n = 1;
a0ed51b3 3406 if (scan->flags) {
efb30f32
HS
3407 s = HOPBACKc(locinput, scan->flags);
3408 if (!s)
3409 goto say_no;
3410 PL_reginput = s;
a0ed51b3
LW
3411 }
3412 else
3413 PL_reginput = locinput;
3414
c277df42 3415 do_ifmatch:
c277df42
IZ
3416 inner = NEXTOPER(NEXTOPER(scan));
3417 if (regmatch(inner) != n) {
3418 say_no:
3419 if (logical) {
3420 logical = 0;
3421 sw = 0;
3422 goto do_longjump;
a0ed51b3
LW
3423 }
3424 else
c277df42
IZ
3425 sayNO;
3426 }
3427 say_yes:
3428 if (logical) {
3429 logical = 0;
3430 sw = 1;
3431 }
fe44a5e8 3432 if (OP(scan) == SUSPEND) {
3280af22 3433 locinput = PL_reginput;
565764a8 3434 nextchr = UCHARAT(locinput);
fe44a5e8 3435 }
c277df42
IZ
3436 /* FALL THROUGH. */
3437 case LONGJMP:
3438 do_longjump:
3439 next = scan + ARG(scan);
3440 if (next == scan)
3441 next = NULL;
a0d0e21e
LW
3442 break;
3443 default:
b900a521 3444 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3445 PTR2UV(scan), OP(scan));
cea2e8a9 3446 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3447 }
02db2b7b 3448 reenter:
a0d0e21e
LW
3449 scan = next;
3450 }
a687059c 3451
a0d0e21e
LW
3452 /*
3453 * We get here only if there's trouble -- normally "case END" is
3454 * the terminating point.
3455 */
cea2e8a9 3456 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3457 /*NOTREACHED*/
4633a7c4
LW
3458 sayNO;
3459
7821416a
IZ
3460yes_loud:
3461 DEBUG_r(
3462 PerlIO_printf(Perl_debug_log,
3463 "%*s %scould match...%s\n",
3464 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3465 );
3466 goto yes;
3467yes_final:
3468 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3469 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3470yes:
3471#ifdef DEBUGGING
3280af22 3472 PL_regindent--;
4633a7c4 3473#endif
02db2b7b
IZ
3474
3475#if 0 /* Breaks $^R */
3476 if (unwind)
3477 regcpblow(firstcp);
3478#endif
4633a7c4
LW
3479 return 1;
3480
3481no:
7821416a
IZ
3482 DEBUG_r(
3483 PerlIO_printf(Perl_debug_log,
3484 "%*s %sfailed...%s\n",
3485 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3486 );
3487 goto do_no;
3488no_final:
3489do_no:
02db2b7b
IZ
3490 if (unwind) {
3491 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3492
3493 switch (uw->type) {
3494 case RE_UNWIND_BRANCH:
3495 case RE_UNWIND_BRANCHJ:
3496 {
3497 re_unwind_branch_t *uwb = &(uw->branch);
3498 I32 lastparen = uwb->lastparen;
9041c2e3 3499
02db2b7b
IZ
3500 REGCP_UNWIND(uwb->lastcp);
3501 for (n = *PL_reglastparen; n > lastparen; n--)
3502 PL_regendp[n] = -1;
3503 *PL_reglastparen = n;
3504 scan = next = uwb->next;
9041c2e3
NIS
3505 if ( !scan ||
3506 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3507 ? BRANCH : BRANCHJ) ) { /* Failure */
3508 unwind = uwb->prev;
3509#ifdef DEBUGGING
3510 PL_regindent--;
3511#endif
3512 goto do_no;
3513 }
3514 /* Have more choice yet. Reuse the same uwb. */
3515 /*SUPPRESS 560*/
3516 if ((n = (uwb->type == RE_UNWIND_BRANCH
3517 ? NEXT_OFF(next) : ARG(next))))
3518 next += n;
3519 else
3520 next = NULL; /* XXXX Needn't unwinding in this case... */
3521 uwb->next = next;
3522 next = NEXTOPER(scan);
3523 if (uwb->type == RE_UNWIND_BRANCHJ)
3524 next = NEXTOPER(next);
3525 locinput = uwb->locinput;
3526 nextchr = uwb->nextchr;
3527#ifdef DEBUGGING
3528 PL_regindent = uwb->regindent;
3529#endif
3530
3531 goto reenter;
3532 }
3533 /* NOT REACHED */
3534 default:
3535 Perl_croak(aTHX_ "regexp unwind memory corruption");
3536 }
3537 /* NOT REACHED */
3538 }
4633a7c4 3539#ifdef DEBUGGING
3280af22 3540 PL_regindent--;
4633a7c4 3541#endif
a0d0e21e 3542 return 0;
a687059c
LW
3543}
3544
3545/*
3546 - regrepeat - repeatedly match something simple, report how many
3547 */
3548/*
3549 * [This routine now assumes that it will only match on things of length 1.
3550 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3551 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3552 */
76e3520e 3553STATIC I32
cea2e8a9 3554S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3555{
a0d0e21e 3556 register char *scan;
a0d0e21e 3557 register I32 c;
3280af22 3558 register char *loceol = PL_regeol;
a0ed51b3 3559 register I32 hardcount = 0;
ffc61ed2 3560 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3561
3280af22 3562 scan = PL_reginput;
c277df42 3563 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3564 loceol = scan + max;
a0d0e21e 3565 switch (OP(p)) {
22c35a8c 3566 case REG_ANY:
1aa99e6b 3567 if (do_utf8) {
ffc61ed2 3568 loceol = PL_regeol;
1aa99e6b 3569 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3570 scan += UTF8SKIP(scan);
3571 hardcount++;
3572 }
3573 } else {
3574 while (scan < loceol && *scan != '\n')
3575 scan++;
a0ed51b3
LW
3576 }
3577 break;
ffc61ed2 3578 case SANY:
3baa4c62 3579 scan = loceol;
a0ed51b3 3580 break;
bbce6d69 3581 case EXACT: /* length of string is 1 */
cd439c50 3582 c = (U8)*STRING(p);
bbce6d69 3583 while (scan < loceol && UCHARAT(scan) == c)
3584 scan++;
3585 break;
3586 case EXACTF: /* length of string is 1 */
cd439c50 3587 c = (U8)*STRING(p);
bbce6d69 3588 while (scan < loceol &&
22c35a8c 3589 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3590 scan++;
3591 break;
3592 case EXACTFL: /* length of string is 1 */
3280af22 3593 PL_reg_flags |= RF_tainted;
cd439c50 3594 c = (U8)*STRING(p);
bbce6d69 3595 while (scan < loceol &&
22c35a8c 3596 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3597 scan++;
3598 break;
3599 case ANYOF:
ffc61ed2
JH
3600 if (do_utf8) {
3601 loceol = PL_regeol;
1aa99e6b
IH
3602 while (hardcount < max && scan < loceol &&
3603 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3604 scan += UTF8SKIP(scan);
3605 hardcount++;
3606 }
3607 } else {
3608 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3609 scan++;
3610 }
a0d0e21e
LW
3611 break;
3612 case ALNUM:
1aa99e6b 3613 if (do_utf8) {
ffc61ed2 3614 loceol = PL_regeol;
8269fa76 3615 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3616 while (hardcount < max && scan < loceol &&
3568d838 3617 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3618 scan += UTF8SKIP(scan);
3619 hardcount++;
3620 }
3621 } else {
3622 while (scan < loceol && isALNUM(*scan))
3623 scan++;
a0ed51b3
LW
3624 }
3625 break;
bbce6d69 3626 case ALNUML:
3280af22 3627 PL_reg_flags |= RF_tainted;
1aa99e6b 3628 if (do_utf8) {
ffc61ed2 3629 loceol = PL_regeol;
1aa99e6b
IH
3630 while (hardcount < max && scan < loceol &&
3631 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3632 scan += UTF8SKIP(scan);
3633 hardcount++;
3634 }
3635 } else {
3636 while (scan < loceol && isALNUM_LC(*scan))
3637 scan++;
a0ed51b3
LW
3638 }
3639 break;
a0d0e21e 3640 case NALNUM:
1aa99e6b 3641 if (do_utf8) {
ffc61ed2 3642 loceol = PL_regeol;
8269fa76 3643 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3644 while (hardcount < max && scan < loceol &&
3568d838 3645 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3646 scan += UTF8SKIP(scan);
3647 hardcount++;
3648 }
3649 } else {
3650 while (scan < loceol && !isALNUM(*scan))
3651 scan++;
a0ed51b3
LW
3652 }
3653 break;
bbce6d69 3654 case NALNUML:
3280af22 3655 PL_reg_flags |= RF_tainted;
1aa99e6b 3656 if (do_utf8) {
ffc61ed2 3657 loceol = PL_regeol;
1aa99e6b
IH
3658 while (hardcount < max && scan < loceol &&
3659 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3660 scan += UTF8SKIP(scan);
3661 hardcount++;
3662 }
3663 } else {
3664 while (scan < loceol && !isALNUM_LC(*scan))
3665 scan++;
a0ed51b3
LW
3666 }
3667 break;
a0d0e21e 3668 case SPACE:
1aa99e6b 3669 if (do_utf8) {
ffc61ed2 3670 loceol = PL_regeol;
8269fa76 3671 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3672 while (hardcount < max && scan < loceol &&
3568d838
JH
3673 (*scan == ' ' ||
3674 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3675 scan += UTF8SKIP(scan);
3676 hardcount++;
3677 }
3678 } else {
3679 while (scan < loceol && isSPACE(*scan))
3680 scan++;
a0ed51b3
LW
3681 }
3682 break;
bbce6d69 3683 case SPACEL:
3280af22 3684 PL_reg_flags |= RF_tainted;
1aa99e6b 3685 if (do_utf8) {
ffc61ed2 3686 loceol = PL_regeol;
1aa99e6b 3687 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3688 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3689 scan += UTF8SKIP(scan);
3690 hardcount++;
3691 }
3692 } else {
3693 while (scan < loceol && isSPACE_LC(*scan))
3694 scan++;
a0ed51b3
LW
3695 }
3696 break;
a0d0e21e 3697 case NSPACE:
1aa99e6b 3698 if (do_utf8) {
ffc61ed2 3699 loceol = PL_regeol;
8269fa76 3700 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3701 while (hardcount < max && scan < loceol &&
3568d838
JH
3702 !(*scan == ' ' ||
3703 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3704 scan += UTF8SKIP(scan);
3705 hardcount++;
3706 }
3707 } else {
3708 while (scan < loceol && !isSPACE(*scan))
3709 scan++;
3710 break;
a0ed51b3 3711 }
bbce6d69 3712 case NSPACEL:
3280af22 3713 PL_reg_flags |= RF_tainted;
1aa99e6b 3714 if (do_utf8) {
ffc61ed2 3715 loceol = PL_regeol;
1aa99e6b 3716 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3717 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3718 scan += UTF8SKIP(scan);
3719 hardcount++;
3720 }
3721 } else {
3722 while (scan < loceol && !isSPACE_LC(*scan))
3723 scan++;
a0ed51b3
LW
3724 }
3725 break;
a0d0e21e 3726 case DIGIT:
1aa99e6b 3727 if (do_utf8) {
ffc61ed2 3728 loceol = PL_regeol;
8269fa76 3729 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3730 while (hardcount < max && scan < loceol &&
3568d838 3731 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3732 scan += UTF8SKIP(scan);
3733 hardcount++;
3734 }
3735 } else {
3736 while (scan < loceol && isDIGIT(*scan))
3737 scan++;
a0ed51b3
LW
3738 }
3739 break;
a0d0e21e 3740 case NDIGIT:
1aa99e6b 3741 if (do_utf8) {
ffc61ed2 3742 loceol = PL_regeol;
8269fa76 3743 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3744 while (hardcount < max && scan < loceol &&
3568d838 3745 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3746 scan += UTF8SKIP(scan);
3747 hardcount++;
3748 }
3749 } else {
3750 while (scan < loceol && !isDIGIT(*scan))
3751 scan++;
a0ed51b3
LW
3752 }
3753 break;
a0d0e21e
LW
3754 default: /* Called on something of 0 width. */
3755 break; /* So match right here or not at all. */
3756 }
a687059c 3757
a0ed51b3
LW
3758 if (hardcount)
3759 c = hardcount;
3760 else
3761 c = scan - PL_reginput;
3280af22 3762 PL_reginput = scan;
a687059c 3763
9041c2e3 3764 DEBUG_r(
c277df42
IZ
3765 {
3766 SV *prop = sv_newmortal();
3767
3768 regprop(prop, p);
9041c2e3
NIS
3769 PerlIO_printf(Perl_debug_log,
3770 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3771 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3772 });
9041c2e3 3773
a0d0e21e 3774 return(c);
a687059c
LW
3775}
3776
3777/*
c277df42 3778 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3779 *
c277df42
IZ
3780 * The repeater is supposed to have constant length.
3781 */
3782
76e3520e 3783STATIC I32
cea2e8a9 3784S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3785{
b7953727 3786 register char *scan = Nullch;
c277df42 3787 register char *start;
3280af22 3788 register char *loceol = PL_regeol;
a0ed51b3 3789 I32 l = 0;
708e3b05 3790 I32 count = 0, res = 1;
a0ed51b3
LW
3791
3792 if (!max)
3793 return 0;
c277df42 3794
3280af22 3795 start = PL_reginput;
1aa99e6b 3796 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3797 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3798 if (!count++) {
3799 l = 0;
3800 while (start < PL_reginput) {
3801 l++;
3802 start += UTF8SKIP(start);
3803 }
3804 *lp = l;
3805 if (l == 0)
3806 return max;
3807 }
3808 if (count == max)
3809 return count;
3810 }
3811 }
3812 else {
708e3b05 3813 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3814 if (!count++) {
3815 *lp = l = PL_reginput - start;
3816 if (max != REG_INFTY && l*max < loceol - scan)
3817 loceol = scan + l*max;
3818 if (l == 0)
3819 return max;
c277df42
IZ
3820 }
3821 }
3822 }
708e3b05 3823 if (!res)
3280af22 3824 PL_reginput = scan;
9041c2e3 3825
a0ed51b3 3826 return count;
c277df42
IZ
3827}
3828
3829/*
ffc61ed2
JH
3830- regclass_swash - prepare the utf8 swash
3831*/
3832
3833SV *
3834Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3835{
3836 SV *sw = NULL;
3837 SV *si = NULL;
3838
3839 if (PL_regdata && PL_regdata->count) {
3840 U32 n = ARG(node);
3841
3842 if (PL_regdata->what[n] == 's') {
3843 SV *rv = (SV*)PL_regdata->data[n];
3844 AV *av = (AV*)SvRV((SV*)rv);
3845 SV **a;
9041c2e3 3846
ffc61ed2
JH
3847 si = *av_fetch(av, 0, FALSE);
3848 a = av_fetch(av, 1, FALSE);
9041c2e3 3849
ffc61ed2
JH
3850 if (a)
3851 sw = *a;
3852 else if (si && doinit) {
3853 sw = swash_init("utf8", "", si, 1, 0);
3854 (void)av_store(av, 1, sw);
3855 }
3856 }
3857 }
3858
3859 if (initsvp)
3860 *initsvp = si;
3861
3862 return sw;
3863}
3864
3865/*
cb8d8820 3866 - reginclass - determine if a character falls into a character class
bbce6d69 3867 */
3868
76e3520e 3869STATIC bool
ffc61ed2 3870S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3871{
ffc61ed2 3872 char flags = ANYOF_FLAGS(n);
bbce6d69 3873 bool match = FALSE;
1aa99e6b 3874 UV c;
3568d838 3875 STRLEN len = 0;
1aa99e6b 3876
3568d838 3877 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3878
ffc61ed2
JH
3879 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3880 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3881 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3882 match = TRUE;
bbce6d69 3883 }
3568d838 3884 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3885 match = TRUE;
ffc61ed2
JH
3886 if (!match) {
3887 SV *sw = regclass_swash(n, TRUE, 0);
3888
3889 if (sw) {
3568d838 3890 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3891 match = TRUE;
3892 else if (flags & ANYOF_FOLD) {
3893 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3894
ffc61ed2
JH
3895 if (flags & ANYOF_LOCALE) {
3896 PL_reg_flags |= RF_tainted;
9041c2e3 3897 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3898 }
1aa99e6b 3899 else
9041c2e3 3900 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3901 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3902 match = TRUE;
3903 }
3904 }
bbce6d69 3905 }
3906 }
1aa99e6b 3907 if (!match && c < 256) {
ffc61ed2
JH
3908 if (ANYOF_BITMAP_TEST(n, c))
3909 match = TRUE;
3910 else if (flags & ANYOF_FOLD) {
3568d838 3911 I32 f;
a0ed51b3 3912
ffc61ed2
JH
3913 if (flags & ANYOF_LOCALE) {
3914 PL_reg_flags |= RF_tainted;
3915 f = PL_fold_locale[c];
3916 }
3917 else
3918 f = PL_fold[c];
3919 if (f != c && ANYOF_BITMAP_TEST(n, f))
3920 match = TRUE;
3921 }
3922
3923 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3924 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3925 if (
3926 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3933 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3943 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3944 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3945 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3946 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3947 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3948 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3949 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3950 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3951 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3952 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3953 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3954 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3955 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3956 ) /* How's that for a conditional? */
3957 {
3958 match = TRUE;
3959 }
a0ed51b3 3960 }
a0ed51b3
LW
3961 }
3962
a0ed51b3
LW
3963 return (flags & ANYOF_INVERT) ? !match : match;
3964}
161b471a 3965
dfe13c55 3966STATIC U8 *
cea2e8a9 3967S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3968{
1aa99e6b
IH
3969 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3970}
3971
3972STATIC U8 *
3973S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3974{
a0ed51b3 3975 if (off >= 0) {
1aa99e6b 3976 while (off-- && s < lim) {
ffc61ed2 3977 /* XXX could check well-formedness here */
a0ed51b3 3978 s += UTF8SKIP(s);
ffc61ed2 3979 }
a0ed51b3
LW
3980 }
3981 else {
3982 while (off++) {
1aa99e6b 3983 if (s > lim) {
a0ed51b3 3984 s--;
ffc61ed2 3985 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3986 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3987 s--;
ffc61ed2
JH
3988 }
3989 /* XXX could check well-formedness here */
a0ed51b3
LW
3990 }
3991 }
3992 }
3993 return s;
3994}
161b471a 3995
dfe13c55 3996STATIC U8 *
1aa99e6b 3997S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 3998{
1aa99e6b
IH
3999 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4000}
4001
4002STATIC U8 *
4003S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4004{
4005 if (off >= 0) {
1aa99e6b 4006 while (off-- && s < lim) {
ffc61ed2 4007 /* XXX could check well-formedness here */
a0ed51b3 4008 s += UTF8SKIP(s);
ffc61ed2 4009 }
a0ed51b3
LW
4010 if (off >= 0)
4011 return 0;
4012 }
4013 else {
4014 while (off++) {
1aa99e6b 4015 if (s > lim) {
a0ed51b3 4016 s--;
ffc61ed2 4017 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4018 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4019 s--;
ffc61ed2
JH
4020 }
4021 /* XXX could check well-formedness here */
a0ed51b3
LW
4022 }
4023 else
4024 break;
4025 }
4026 if (off <= 0)
4027 return 0;
4028 }
4029 return s;
4030}
51371543
GS
4031
4032#ifdef PERL_OBJECT
51371543
GS
4033#include "XSUB.h"
4034#endif
4035
4036static void
4037restore_pos(pTHXo_ void *arg)
4038{
51371543
GS
4039 if (PL_reg_eval_set) {
4040 if (PL_reg_oldsaved) {
4041 PL_reg_re->subbeg = PL_reg_oldsaved;
4042 PL_reg_re->sublen = PL_reg_oldsavedlen;
4043 RX_MATCH_COPIED_on(PL_reg_re);
4044 }
4045 PL_reg_magic->mg_len = PL_reg_oldpos;
4046 PL_reg_eval_set = 0;
4047 PL_curpm = PL_reg_oldcurpm;
4048 }
4049}