This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Casts to stop warnings in reg*.c
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
40d34c0d
SB
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
10 * a regular expression.
bb3580c7
NC
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
40d34c0d
SB
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
e6906430 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
04851bb3 81 **** 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
dfe13c55
GS
119#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
120#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
121#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
122#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
123#define HOPc(pos,off) ((char*)HOP(pos,off))
124#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 125
efb30f32 126#define HOPBACK(pos, off) ( \
e54858b0 127 (PL_reg_match_utf8) \
efb30f32
HS
128 ? reghopmaybe((U8*)pos, -off) \
129 : (pos - off >= PL_bostr) \
130 ? (U8*)(pos - off) \
131 : (U8*)NULL \
132)
133#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
134
1aa99e6b
IH
135#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
136#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
137#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
138#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
139#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
140#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
141
fab1f349 142#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
47a91a97 143 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((U8*)str); assert(ok); LEAVE; } } STMT_END
fab1f349
AL
144#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
145#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
146#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
147#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 148
5f80c4cf 149/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
150#define JUMPABLE(rn) ( \
151 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
152 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
153 OP(rn) == PLUS || OP(rn) == MINMOD || \
154 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
155)
156
cca55fe3
JP
157#define HAS_TEXT(rn) ( \
158 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
159)
e2d8ce26 160
a84d97b6
HS
161/*
162 Search for mandatory following text node; for lookahead, the text must
163 follow but for lookbehind (rn->flags != 0) we skip to the next step.
164*/
cca55fe3 165#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 166 while (JUMPABLE(rn)) \
a84d97b6 167 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 168 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
169 else if (OP(rn) == PLUS) \
170 rn = NEXTOPER(rn); \
a84d97b6
HS
171 else if (OP(rn) == IFMATCH) \
172 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 173 else rn += NEXT_OFF(rn); \
5f80c4cf 174} STMT_END
74750237 175
acfe0abc 176static void restore_pos(pTHX_ void *arg);
51371543 177
76e3520e 178STATIC CHECKPOINT
cea2e8a9 179S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 180{
8c18bf38 181 const int retval = PL_savestack_ix;
b1ce53c5 182#define REGCP_PAREN_ELEMS 4
8c18bf38 183 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
184 int p;
185
e49a9654
IH
186 if (paren_elems_to_push < 0)
187 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
188
a01268b5 189#define REGCP_OTHER_ELEMS 6
f3479639 190 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 191 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 192/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
193 SSPUSHINT(PL_regendp[p]);
194 SSPUSHINT(PL_regstartp[p]);
3280af22 195 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
196 SSPUSHINT(p);
197 }
b1ce53c5 198/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
199 SSPUSHINT(PL_regsize);
200 SSPUSHINT(*PL_reglastparen);
a01268b5 201 SSPUSHINT(*PL_reglastcloseparen);
3280af22 202 SSPUSHPTR(PL_reginput);
41123dfd
JH
203#define REGCP_FRAME_ELEMS 2
204/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
205 * are needed for the regexp context stack bookkeeping. */
206 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 207 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 208
a0d0e21e
LW
209 return retval;
210}
211
c277df42 212/* These are needed since we do not localize EVAL nodes: */
02db2b7b 213# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 214 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 215 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 216
02db2b7b 217# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 218 PerlIO_printf(Perl_debug_log, \
faccc32b 219 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 220 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 221
76e3520e 222STATIC char *
cea2e8a9 223S_regcppop(pTHX)
a0d0e21e 224{
b1ce53c5 225 I32 i;
a0d0e21e
LW
226 U32 paren = 0;
227 char *input;
b1ce53c5
JH
228
229 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 230 i = SSPOPINT;
b1ce53c5
JH
231 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
232 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 233 input = (char *) SSPOPPTR;
a01268b5 234 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
235 *PL_reglastparen = SSPOPINT;
236 PL_regsize = SSPOPINT;
b1ce53c5
JH
237
238 /* Now restore the parentheses context. */
41123dfd
JH
239 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240 i > 0; i -= REGCP_PAREN_ELEMS) {
228fe6e6 241 I32 tmps;
a0d0e21e 242 paren = (U32)SSPOPINT;
3280af22 243 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
244 PL_regstartp[paren] = SSPOPINT;
245 tmps = SSPOPINT;
3280af22
NIS
246 if (paren <= *PL_reglastparen)
247 PL_regendp[paren] = tmps;
c277df42 248 DEBUG_r(
c3464db5 249 PerlIO_printf(Perl_debug_log,
b900a521 250 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 251 (UV)paren, (IV)PL_regstartp[paren],
b900a521 252 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 253 (IV)PL_regendp[paren],
3280af22 254 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 255 );
a0d0e21e 256 }
c277df42 257 DEBUG_r(
eb160463 258 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 259 PerlIO_printf(Perl_debug_log,
faccc32b
JH
260 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
261 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
262 }
263 );
daf18116 264#if 1
dafc8851
JH
265 /* It would seem that the similar code in regtry()
266 * already takes care of this, and in fact it is in
267 * a better location to since this code can #if 0-ed out
268 * but the code in regtry() is needed or otherwise tests
269 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
270 * (as of patchlevel 7877) will fail. Then again,
271 * this code seems to be necessary or otherwise
272 * building DynaLoader will fail:
273 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
274 * --jhi */
eb160463
GS
275 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
276 if ((I32)paren > PL_regsize)
cf93c79d
IZ
277 PL_regstartp[paren] = -1;
278 PL_regendp[paren] = -1;
a0d0e21e 279 }
dafc8851 280#endif
a0d0e21e
LW
281 return input;
282}
283
0f5d15d6 284STATIC char *
cea2e8a9 285S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 286{
8c18bf38 287 const I32 tmp = PL_savestack_ix;
0f5d15d6
IZ
288
289 PL_savestack_ix = ss;
290 regcppop();
291 PL_savestack_ix = tmp;
942e002e 292 return Nullch;
0f5d15d6
IZ
293}
294
295typedef struct re_cc_state
296{
297 I32 ss;
298 regnode *node;
299 struct re_cc_state *prev;
300 CURCUR *cc;
301 regexp *re;
302} re_cc_state;
303
02db2b7b 304#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 305
29d1e993
HS
306#define TRYPAREN(paren, n, input) { \
307 if (paren) { \
308 if (n) { \
309 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
310 PL_regendp[paren] = input - PL_bostr; \
311 } \
312 else \
313 PL_regendp[paren] = -1; \
314 } \
315 if (regmatch(next)) \
316 sayYES; \
317 if (paren && n) \
318 PL_regendp[paren] = -1; \
319}
320
321
a687059c 322/*
e50aee73 323 * pregexec and friends
a687059c
LW
324 */
325
326/*
c277df42 327 - pregexec - match a regexp against a string
a687059c 328 */
c277df42 329I32
864dbfa3 330Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 331 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
332/* strend: pointer to null at end of string */
333/* strbeg: real beginning of string */
334/* minend: end of match must be >=minend after stringarg. */
335/* nosave: For optimizations. */
336{
337 return
9041c2e3 338 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
339 nosave ? 0 : REXEC_COPY_STR);
340}
0f5d15d6
IZ
341
342STATIC void
cea2e8a9 343S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
344{
345 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
346#ifdef DEBUGGING
347 PL_regprogram = prog->program;
348#endif
349 PL_regnpar = prog->nparens;
9041c2e3
NIS
350 PL_regdata = prog->data;
351 PL_reg_re = prog;
0f5d15d6 352}
22e551b9 353
9041c2e3 354/*
cad2e5aa
JH
355 * Need to implement the following flags for reg_anch:
356 *
357 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
358 * USE_INTUIT_ML
359 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
360 * INTUIT_AUTORITATIVE_ML
361 * INTUIT_ONCE_NOML - Intuit can match in one location only.
362 * INTUIT_ONCE_ML
363 *
364 * Another flag for this function: SECOND_TIME (so that float substrs
365 * with giant delta may be not rechecked).
366 */
367
368/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
369
fdac8c4b 370/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
371 Otherwise, only SvCUR(sv) is used to get strbeg. */
372
373/* XXXX We assume that strpos is strbeg unless sv. */
374
6eb5f6b9
JH
375/* XXXX Some places assume that there is a fixed substring.
376 An update may be needed if optimizer marks as "INTUITable"
377 RExen without fixed substrings. Similarly, it is assumed that
378 lengths of all the strings are no more than minlen, thus they
379 cannot come from lookahead.
380 (Or minlen should take into account lookahead.) */
381
2c2d71f5
JH
382/* A failure to find a constant substring means that there is no need to make
383 an expensive call to REx engine, thus we celebrate a failure. Similarly,
384 finding a substring too deep into the string means that less calls to
30944b6d
IZ
385 regtry() should be needed.
386
387 REx compiler's optimizer found 4 possible hints:
388 a) Anchored substring;
389 b) Fixed substring;
390 c) Whether we are anchored (beginning-of-line or \G);
391 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 392 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
393 string which does not contradict any of them.
394 */
2c2d71f5 395
6eb5f6b9
JH
396/* Most of decisions we do here should have been done at compile time.
397 The nodes of the REx which we used for the search should have been
398 deleted from the finite automaton. */
399
cad2e5aa
JH
400char *
401Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
402 char *strend, U32 flags, re_scream_pos_data *data)
403{
b7953727 404 register I32 start_shift = 0;
cad2e5aa 405 /* Should be nonnegative! */
b7953727 406 register I32 end_shift = 0;
2c2d71f5
JH
407 register char *s;
408 register SV *check;
a1933d95 409 char *strbeg;
cad2e5aa 410 char *t;
8c18bf38 411 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 412 I32 ml_anch;
6eb5f6b9 413 register char *other_last = Nullch; /* other substr checked before this */
b7953727 414 char *check_at = Nullch; /* check substr found at this pos */
30944b6d 415#ifdef DEBUGGING
c6d79d47
AL
416 const char * const i_strpos = strpos;
417 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 418#endif
2ea5368e 419 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 420
b8d68ded
JH
421 if (prog->reganch & ROPT_UTF8) {
422 DEBUG_r(PerlIO_printf(Perl_debug_log,
423 "UTF-8 regex...\n"));
424 PL_reg_flags |= RF_utf8;
425 }
426
2a782b5b 427 DEBUG_r({
228fe6e6 428 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
429 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
430 strpos;
228fe6e6 431 const int len = PL_reg_match_utf8 ?
b8d68ded 432 strlen(s) : strend - strpos;
2a782b5b
JH
433 if (!PL_colorset)
434 reginitcolors();
b8d68ded
JH
435 if (PL_reg_match_utf8)
436 DEBUG_r(PerlIO_printf(Perl_debug_log,
437 "UTF-8 target...\n"));
2a782b5b 438 PerlIO_printf(Perl_debug_log,
5332c881 439 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
2a782b5b
JH
440 PL_colors[4],PL_colors[5],PL_colors[0],
441 prog->precomp,
442 PL_colors[1],
443 (strlen(prog->precomp) > 60 ? "..." : ""),
444 PL_colors[0],
445 (int)(len > 60 ? 60 : len),
446 s, PL_colors[1],
447 (len > 60 ? "..." : "")
448 );
449 });
cad2e5aa 450
c344f387
JH
451 /* CHR_DIST() would be more correct here but it makes things slow. */
452 if (prog->minlen > strend - strpos) {
a72c7584
JH
453 DEBUG_r(PerlIO_printf(Perl_debug_log,
454 "String too short... [re_intuit_start]\n"));
cad2e5aa 455 goto fail;
2c2d71f5 456 }
a1933d95 457 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 458 PL_regeol = strend;
33b8afdf
JH
459 if (do_utf8) {
460 if (!prog->check_utf8 && prog->check_substr)
461 to_utf8_substr(prog);
462 check = prog->check_utf8;
463 } else {
464 if (!prog->check_substr && prog->check_utf8)
465 to_byte_substr(prog);
466 check = prog->check_substr;
467 }
468 if (check == &PL_sv_undef) {
469 DEBUG_r(PerlIO_printf(Perl_debug_log,
470 "Non-utf string cannot match utf check string\n"));
471 goto fail;
472 }
2c2d71f5 473 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
474 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
475 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 476 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 477
7e25d62c
JH
478 if (!ml_anch) {
479 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
480 | ROPT_IMPLICIT)) /* not a real BOL */
fdac8c4b 481 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
482 && sv && !SvROK(sv)
483 && (strpos != strbeg)) {
484 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
485 goto fail;
486 }
487 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 488 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 489 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
490 I32 slen;
491
1aa99e6b 492 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
493 if (SvTAIL(check)) {
494 slen = SvCUR(check); /* >= 1 */
cad2e5aa 495
9041c2e3 496 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
497 || (strend - s == slen && strend[-1] != '\n')) {
498 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
499 goto fail_finish;
cad2e5aa
JH
500 }
501 /* Now should match s[0..slen-2] */
502 slen--;
fdac8c4b 503 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 504 || (slen > 1
fdac8c4b 505 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5
JH
506 report_neq:
507 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
508 goto fail_finish;
509 }
cad2e5aa 510 }
fdac8c4b 511 else if (*SvPVX_const(check) != *s
653099ff 512 || ((slen = SvCUR(check)) > 1
fdac8c4b 513 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5
JH
514 goto report_neq;
515 goto success_at_start;
7e25d62c 516 }
cad2e5aa 517 }
2c2d71f5 518 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 519 s = strpos;
2c2d71f5 520 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 521 end_shift = prog->minlen - start_shift -
653099ff 522 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 523 if (!ml_anch) {
8c18bf38 524 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 525 - (SvTAIL(check) != 0);
8c18bf38 526 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
527
528 if (end_shift < eshift)
529 end_shift = eshift;
530 }
cad2e5aa 531 }
2c2d71f5 532 else { /* Can match at random position */
cad2e5aa
JH
533 ml_anch = 0;
534 s = strpos;
2c2d71f5
JH
535 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
536 /* Should be nonnegative! */
537 end_shift = prog->minlen - start_shift -
653099ff 538 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
539 }
540
2c2d71f5 541#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 542 if (end_shift < 0)
6bbae5e6 543 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
544#endif
545
2c2d71f5
JH
546 restart:
547 /* Find a possible match in the region s..strend by looking for
548 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 549 if (flags & REXEC_SCREAM) {
cad2e5aa 550 I32 p = -1; /* Internal iterator of scream. */
8c18bf38 551 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 552
2c2d71f5
JH
553 if (PL_screamfirst[BmRARE(check)] >= 0
554 || ( BmRARE(check) == '\n'
555 && (BmPREVIOUS(check) == SvCUR(check) - 1)
556 && SvTAIL(check) ))
9041c2e3 557 s = screaminstr(sv, check,
2c2d71f5 558 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 559 else
2c2d71f5 560 goto fail_finish;
88af3bc3
JH
561 /* we may be pointing at the wrong string */
562 if (s && RX_MATCH_COPIED(prog))
fdac8c4b 563 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
564 if (data)
565 *data->scream_olds = s;
566 }
f33976b4 567 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
568 s = fbm_instr((U8*)(s + start_shift),
569 (U8*)(strend - end_shift),
570 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 571 else
1aa99e6b
IH
572 s = fbm_instr(HOP3(s, start_shift, strend),
573 HOP3(strend, -end_shift, strbeg),
2c2d71f5 574 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
575
576 /* Update the count-of-usability, remove useless subpatterns,
577 unshift s. */
2c2d71f5 578
5332c881
AL
579 /* FIXME - DEBUG_EXECUTE_r if that is merged to maint. */
580 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 581 (s ? "Found" : "Did not find"),
33b8afdf 582 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 583 PL_colors[0],
7b0972df 584 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
fdac8c4b 585 SvPVX_const(check),
2c2d71f5
JH
586 PL_colors[1], (SvTAIL(check) ? "$" : ""),
587 (s ? " at offset " : "...\n") ) );
588
589 if (!s)
590 goto fail_finish;
591
6eb5f6b9
JH
592 check_at = s;
593
2c2d71f5 594 /* Finish the diagnostic message */
30944b6d 595 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
596
597 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
598 Start with the other substr.
599 XXXX no SCREAM optimization yet - and a very coarse implementation
5332c881 600 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
601 *always* match. Probably should be marked during compile...
602 Probably it is right to do no SCREAM here...
603 */
604
33b8afdf 605 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 606 /* Take into account the "other" substring. */
2c2d71f5
JH
607 /* XXXX May be hopelessly wrong for UTF... */
608 if (!other_last)
6eb5f6b9 609 other_last = strpos;
33b8afdf 610 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
611 do_other_anchored:
612 {
c6d79d47
AL
613 char * const last = HOP3c(s, -start_shift, strbeg);
614 char *last1, *last2;
2c2d71f5 615 char *s1 = s;
33b8afdf 616 SV* must;
2c2d71f5 617
2c2d71f5
JH
618 t = s - prog->check_offset_max;
619 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 620 && (!do_utf8
1aa99e6b 621 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 622 && t > strpos)))
30944b6d 623 /* EMPTY */;
2c2d71f5
JH
624 else
625 t = strpos;
1aa99e6b 626 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
627 if (t < other_last) /* These positions already checked */
628 t = other_last;
1aa99e6b 629 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
630 if (last < last1)
631 last1 = last;
632 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
633 /* On end-of-str: see comment below. */
33b8afdf
JH
634 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
635 if (must == &PL_sv_undef) {
636 s = (char*)NULL;
637 DEBUG_r(must = prog->anchored_utf8); /* for debug */
638 }
639 else
640 s = fbm_instr(
641 (unsigned char*)t,
642 HOP3(HOP3(last1, prog->anchored_offset, strend)
643 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
644 must,
645 PL_multiline ? FBMrf_MULTILINE : 0
646 );
1aa99e6b 647 DEBUG_r(PerlIO_printf(Perl_debug_log,
5332c881 648 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
649 (s ? "Found" : "Contradicts"),
650 PL_colors[0],
33b8afdf
JH
651 (int)(SvCUR(must)
652 - (SvTAIL(must)!=0)),
fdac8c4b 653 SvPVX_const(must),
33b8afdf 654 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
655 if (!s) {
656 if (last1 >= last2) {
657 DEBUG_r(PerlIO_printf(Perl_debug_log,
658 ", giving up...\n"));
659 goto fail_finish;
660 }
661 DEBUG_r(PerlIO_printf(Perl_debug_log,
662 ", trying floating at offset %ld...\n",
1aa99e6b
IH
663 (long)(HOP3c(s1, 1, strend) - i_strpos)));
664 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
665 s = HOP3c(last, 1, strend);
2c2d71f5
JH
666 goto restart;
667 }
668 else {
669 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 670 (long)(s - i_strpos)));
1aa99e6b
IH
671 t = HOP3c(s, -prog->anchored_offset, strbeg);
672 other_last = HOP3c(s, 1, strend);
30944b6d 673 s = s1;
2c2d71f5
JH
674 if (t == strpos)
675 goto try_at_start;
2c2d71f5
JH
676 goto try_at_offset;
677 }
30944b6d 678 }
2c2d71f5
JH
679 }
680 else { /* Take into account the floating substring. */
33b8afdf
JH
681 char *last, *last1;
682 char *s1 = s;
683 SV* must;
684
685 t = HOP3c(s, -start_shift, strbeg);
686 last1 = last =
687 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
688 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
689 last = HOP3c(t, prog->float_max_offset, strend);
690 s = HOP3c(t, prog->float_min_offset, strend);
691 if (s < other_last)
692 s = other_last;
2c2d71f5 693 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
694 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
695 /* fbm_instr() takes into account exact value of end-of-str
696 if the check is SvTAIL(ed). Since false positives are OK,
697 and end-of-str is not later than strend we are OK. */
698 if (must == &PL_sv_undef) {
699 s = (char*)NULL;
700 DEBUG_r(must = prog->float_utf8); /* for debug message */
701 }
702 else
2c2d71f5 703 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
704 (unsigned char*)last + SvCUR(must)
705 - (SvTAIL(must)!=0),
706 must, PL_multiline ? FBMrf_MULTILINE : 0);
5332c881
AL
707 /* FIXME - DEBUG_EXECUTE_r if that is merged to maint */
708 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
709 (s ? "Found" : "Contradicts"),
710 PL_colors[0],
711 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
fdac8c4b 712 SvPVX_const(must),
33b8afdf
JH
713 PL_colors[1], (SvTAIL(must) ? "$" : "")));
714 if (!s) {
715 if (last1 == last) {
2c2d71f5 716 DEBUG_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
717 ", giving up...\n"));
718 goto fail_finish;
2c2d71f5 719 }
33b8afdf
JH
720 DEBUG_r(PerlIO_printf(Perl_debug_log,
721 ", trying anchored starting at offset %ld...\n",
722 (long)(s1 + 1 - i_strpos)));
723 other_last = last;
724 s = HOP3c(t, 1, strend);
725 goto restart;
726 }
727 else {
728 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
729 (long)(s - i_strpos)));
730 other_last = s; /* Fix this later. --Hugo */
731 s = s1;
732 if (t == strpos)
733 goto try_at_start;
734 goto try_at_offset;
735 }
2c2d71f5 736 }
cad2e5aa 737 }
2c2d71f5
JH
738
739 t = s - prog->check_offset_max;
2c2d71f5 740 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 741 && (!do_utf8
1aa99e6b
IH
742 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
743 && t > strpos))) {
2c2d71f5
JH
744 /* Fixed substring is found far enough so that the match
745 cannot start at strpos. */
746 try_at_offset:
cad2e5aa 747 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
748 /* Eventually fbm_*() should handle this, but often
749 anchored_offset is not 0, so this check will not be wasted. */
750 /* XXXX In the code below we prefer to look for "^" even in
751 presence of anchored substrings. And we search even
752 beyond the found float position. These pessimizations
753 are historical artefacts only. */
754 find_anchor:
2c2d71f5 755 while (t < strend - prog->minlen) {
cad2e5aa 756 if (*t == '\n') {
4ee3650e 757 if (t < check_at - prog->check_offset_min) {
33b8afdf 758 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
759 /* Since we moved from the found position,
760 we definitely contradict the found anchored
30944b6d
IZ
761 substr. Due to the above check we do not
762 contradict "check" substr.
763 Thus we can arrive here only if check substr
764 is float. Redo checking for "other"=="fixed".
765 */
9041c2e3 766 strpos = t + 1;
30944b6d
IZ
767 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
768 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
769 goto do_other_anchored;
770 }
4ee3650e
GS
771 /* We don't contradict the found floating substring. */
772 /* XXXX Why not check for STCLASS? */
cad2e5aa 773 s = t + 1;
2c2d71f5 774 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 775 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
776 goto set_useful;
777 }
4ee3650e
GS
778 /* Position contradicts check-string */
779 /* XXXX probably better to look for check-string
780 than for "\n", so one should lower the limit for t? */
781 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 782 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 783 other_last = strpos = s = t + 1;
cad2e5aa
JH
784 goto restart;
785 }
786 t++;
787 }
2c2d71f5
JH
788 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
789 PL_colors[0],PL_colors[1]));
790 goto fail_finish;
cad2e5aa 791 }
f5952150
GS
792 else {
793 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
794 PL_colors[0],PL_colors[1]));
795 }
cad2e5aa
JH
796 s = t;
797 set_useful:
33b8afdf 798 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
799 }
800 else {
f5952150 801 /* The found string does not prohibit matching at strpos,
2c2d71f5 802 - no optimization of calling REx engine can be performed,
f5952150
GS
803 unless it was an MBOL and we are not after MBOL,
804 or a future STCLASS check will fail this. */
2c2d71f5
JH
805 try_at_start:
806 /* Even in this situation we may use MBOL flag if strpos is offset
807 wrt the start of the string. */
05b4157f 808 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 809 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
810 /* May be due to an implicit anchor of m{.*foo} */
811 && !(prog->reganch & ROPT_IMPLICIT))
812 {
cad2e5aa
JH
813 t = strpos;
814 goto find_anchor;
815 }
30944b6d 816 DEBUG_r( if (ml_anch)
f5952150
GS
817 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
818 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 819 );
2c2d71f5 820 success_at_start:
30944b6d 821 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
822 && (do_utf8 ? (
823 prog->check_utf8 /* Could be deleted already */
824 && --BmUSEFUL(prog->check_utf8) < 0
825 && (prog->check_utf8 == prog->float_utf8)
826 ) : (
827 prog->check_substr /* Could be deleted already */
828 && --BmUSEFUL(prog->check_substr) < 0
829 && (prog->check_substr == prog->float_substr)
830 )))
66e933ab 831 {
cad2e5aa 832 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 833 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
834 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
835 if (do_utf8 ? prog->check_substr : prog->check_utf8)
836 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
837 prog->check_substr = prog->check_utf8 = Nullsv; /* disable */
838 prog->float_substr = prog->float_utf8 = Nullsv; /* clear */
5e39e1e5 839 check = Nullsv; /* abort */
cad2e5aa 840 s = strpos;
3cf5c195
IZ
841 /* XXXX This is a remnant of the old implementation. It
842 looks wasteful, since now INTUIT can use many
6eb5f6b9 843 other heuristics. */
cad2e5aa
JH
844 prog->reganch &= ~RE_USE_INTUIT;
845 }
846 else
847 s = strpos;
848 }
849
6eb5f6b9
JH
850 /* Last resort... */
851 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
852 if (prog->regstclass) {
853 /* minlen == 0 is possible if regstclass is \b or \B,
854 and the fixed substr is ''$.
855 Since minlen is already taken into account, s+1 is before strend;
856 accidentally, minlen >= 1 guaranties no false positives at s + 1
857 even for \b or \B. But (minlen? 1 : 0) below assumes that
858 regstclass does not come from lookahead... */
859 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
860 This leaves EXACTF only, which is dealt with in find_byclass(). */
c6d79d47 861 const U8* const str = (U8*)STRING(prog->regstclass);
ec6f298e 862 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
47a91a97
NC
863 ? CHR_DIST((U8 *)str+STR_LEN(prog->regstclass),
864 (U8 *)str)
66e933ab 865 : 1);
8c18bf38 866 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 867 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 868 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
869 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
870 cl_l, strend)
871 : strend);
6eb5f6b9
JH
872
873 t = s;
975adce1 874 cache_re(prog);
ec6f298e 875 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
876 if (!s) {
877#ifdef DEBUGGING
c05e0e2f 878 const char *what = 0;
6eb5f6b9
JH
879#endif
880 if (endpos == strend) {
881 DEBUG_r( PerlIO_printf(Perl_debug_log,
882 "Could not match STCLASS...\n") );
883 goto fail;
884 }
66e933ab
GS
885 DEBUG_r( PerlIO_printf(Perl_debug_log,
886 "This position contradicts STCLASS...\n") );
653099ff
GS
887 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
888 goto fail;
6eb5f6b9 889 /* Contradict one of substrings */
33b8afdf
JH
890 if (prog->anchored_substr || prog->anchored_utf8) {
891 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
6eb5f6b9
JH
892 DEBUG_r( what = "anchored" );
893 hop_and_restart:
1aa99e6b 894 s = HOP3c(t, 1, strend);
66e933ab
GS
895 if (s + start_shift + end_shift > strend) {
896 /* XXXX Should be taken into account earlier? */
897 DEBUG_r( PerlIO_printf(Perl_debug_log,
898 "Could not match STCLASS...\n") );
899 goto fail;
900 }
5e39e1e5
HS
901 if (!check)
902 goto giveup;
6eb5f6b9 903 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 904 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
905 what, (long)(s + start_shift - i_strpos)) );
906 goto restart;
907 }
66e933ab 908 /* Have both, check_string is floating */
6eb5f6b9
JH
909 if (t + start_shift >= check_at) /* Contradicts floating=check */
910 goto retry_floating_check;
911 /* Recheck anchored substring, but not floating... */
9041c2e3 912 s = check_at;
5e39e1e5
HS
913 if (!check)
914 goto giveup;
6eb5f6b9 915 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 916 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
917 (long)(other_last - i_strpos)) );
918 goto do_other_anchored;
919 }
60e71179
GS
920 /* Another way we could have checked stclass at the
921 current position only: */
922 if (ml_anch) {
923 s = t = t + 1;
5e39e1e5
HS
924 if (!check)
925 goto giveup;
60e71179 926 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
927 "Looking for /%s^%s/m starting at offset %ld...\n",
928 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 929 goto try_at_offset;
66e933ab 930 }
33b8afdf 931 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 932 goto fail;
6eb5f6b9
JH
933 /* Check is floating subtring. */
934 retry_floating_check:
935 t = check_at - start_shift;
936 DEBUG_r( what = "floating" );
937 goto hop_and_restart;
938 }
b7953727
JH
939 if (t != s) {
940 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 941 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
942 (long)(t - i_strpos), (long)(s - i_strpos))
943 );
944 }
945 else {
946 DEBUG_r(PerlIO_printf(Perl_debug_log,
947 "Does not contradict STCLASS...\n");
948 );
949 }
6eb5f6b9 950 }
5e39e1e5
HS
951 giveup:
952 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
953 PL_colors[4], (check ? "Guessed" : "Giving up"),
954 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 955 return s;
2c2d71f5
JH
956
957 fail_finish: /* Substring not found */
33b8afdf
JH
958 if (prog->check_substr || prog->check_utf8) /* could be removed already */
959 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 960 fail:
2c2d71f5 961 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
962 PL_colors[4],PL_colors[5]));
963 return Nullch;
964}
9661b544 965
6eb5f6b9 966/* We know what class REx starts with. Try to find this position... */
3c3eec57 967STATIC char *
8c18bf38 968S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
a687059c 969{
228fe6e6 970 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 971 char *m;
d8093b23 972 STRLEN ln;
bcefe257 973 STRLEN lnc;
45bfbaf1 974 register STRLEN uskip;
d8093b23
G
975 unsigned int c1;
976 unsigned int c2;
6eb5f6b9
JH
977 char *e;
978 register I32 tmp = 1; /* Scratch variable? */
8c18bf38 979 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 980
6eb5f6b9
JH
981 /* We know what class it must start with. */
982 switch (OP(c)) {
6eb5f6b9 983 case ANYOF:
d46a7032 984 if (do_utf8) {
45bfbaf1 985 while (s + (uskip = UTF8SKIP(s)) <= strend) {
d46a7032
JH
986 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
987 !UTF8_IS_INVARIANT((U8)s[0]) ?
988 reginclass(c, (U8*)s, 0, do_utf8) :
989 REGINCLASS(c, (U8*)s)) {
990 if (tmp && (norun || regtry(prog, s)))
991 goto got_it;
992 else
993 tmp = doevery;
994 }
995 else
996 tmp = 1;
45bfbaf1 997 s += uskip;
d46a7032
JH
998 }
999 }
1000 else {
1001 while (s < strend) {
1002 STRLEN skip = 1;
1003
1004 if (REGINCLASS(c, (U8*)s) ||
1005 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1006 /* The assignment of 2 is intentional:
1007 * for the folded sharp s, the skip is 2. */
1008 (skip = SHARP_S_SKIP))) {
1009 if (tmp && (norun || regtry(prog, s)))
1010 goto got_it;
1011 else
1012 tmp = doevery;
1013 }
1014 else
1015 tmp = 1;
1016 s += skip;
1017 }
a0d0e21e 1018 }
6eb5f6b9 1019 break;
f33976b4
DB
1020 case CANY:
1021 while (s < strend) {
1022 if (tmp && (norun || regtry(prog, s)))
1023 goto got_it;
1024 else
1025 tmp = doevery;
1026 s++;
1027 }
1028 break;
6eb5f6b9 1029 case EXACTF:
bcefe257
NIS
1030 m = STRING(c);
1031 ln = STR_LEN(c); /* length to match in octets/bytes */
1032 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1033 if (UTF) {
a2a2844f 1034 STRLEN ulen1, ulen2;
bcefe257 1035 U8 *sm = (U8 *) m;
a2a469f9
JH
1036 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1037 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
1038
1039 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1040 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1041
a2a469f9 1042 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
872c91ae 1043 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
a2a469f9 1044 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
872c91ae 1045 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
bcefe257
NIS
1046 lnc = 0;
1047 while (sm < ((U8 *) m + ln)) {
1048 lnc++;
1049 sm += UTF8SKIP(sm);
1050 }
1aa99e6b
IH
1051 }
1052 else {
1053 c1 = *(U8*)m;
1054 c2 = PL_fold[c1];
1055 }
6eb5f6b9
JH
1056 goto do_exactf;
1057 case EXACTFL:
bcefe257
NIS
1058 m = STRING(c);
1059 ln = STR_LEN(c);
1060 lnc = (I32) ln;
d8093b23 1061 c1 = *(U8*)m;
6eb5f6b9
JH
1062 c2 = PL_fold_locale[c1];
1063 do_exactf:
dc904a75 1064 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1065
6eb5f6b9
JH
1066 if (norun && e < s)
1067 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1068
60a8b682
JH
1069 /* The idea in the EXACTF* cases is to first find the
1070 * first character of the EXACTF* node and then, if
1071 * necessary, case-insensitively compare the full
1072 * text of the node. The c1 and c2 are the first
1073 * characters (though in Unicode it gets a bit
1074 * more complicated because there are more cases
7f16dd3d
JH
1075 * than just upper and lower: one needs to use
1076 * the so-called folding case for case-insensitive
1077 * matching (called "loose matching" in Unicode).
1078 * ibcmp_utf8() will do just that. */
60a8b682 1079
1aa99e6b 1080 if (do_utf8) {
575cac57 1081 UV c, f;
a2a469f9 1082 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1083 STRLEN len, foldlen;
d7f013c8 1084
09091399 1085 if (c1 == c2) {
bcefe257
NIS
1086 /* Upper and lower of 1st char are equal -
1087 * probably not a "letter". */
1aa99e6b 1088 while (s <= e) {
a2a469f9 1089 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
872c91ae
JH
1090 ckWARN(WARN_UTF8) ?
1091 0 : UTF8_ALLOW_ANY);
80aecb99
JH
1092 if ( c == c1
1093 && (ln == len ||
66423254 1094 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1095 m, (char **)0, ln, (bool)UTF))
55da9344 1096 && (norun || regtry(prog, s)) )
1aa99e6b 1097 goto got_it;
80aecb99 1098 else {
228fe6e6 1099 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1100 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1101 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1102 if ( f != c
1103 && (f == c1 || f == c2)
1104 && (ln == foldlen ||
66423254
JH
1105 !ibcmp_utf8((char *) foldbuf,
1106 (char **)0, foldlen, do_utf8,
d07ddd77 1107 m,
eb160463 1108 (char **)0, ln, (bool)UTF))
80aecb99
JH
1109 && (norun || regtry(prog, s)) )
1110 goto got_it;
1111 }
1aa99e6b
IH
1112 s += len;
1113 }
09091399
JH
1114 }
1115 else {
1aa99e6b 1116 while (s <= e) {
a2a469f9 1117 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
872c91ae
JH
1118 ckWARN(WARN_UTF8) ?
1119 0 : UTF8_ALLOW_ANY);
80aecb99 1120
60a8b682 1121 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1122 * Note that not all the possible combinations
1123 * are handled here: some of them are handled
1124 * by the standard folding rules, and some of
1125 * them (the character class or ANYOF cases)
1126 * are handled during compiletime in
1127 * regexec.c:S_regclass(). */
880bd946
JH
1128 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1129 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1130 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1131
1132 if ( (c == c1 || c == c2)
1133 && (ln == len ||
66423254 1134 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1135 m, (char **)0, ln, (bool)UTF))
55da9344 1136 && (norun || regtry(prog, s)) )
1aa99e6b 1137 goto got_it;
80aecb99 1138 else {
228fe6e6 1139 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1140 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1141 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1142 if ( f != c
1143 && (f == c1 || f == c2)
1144 && (ln == foldlen ||
a6872d42 1145 !ibcmp_utf8((char *) foldbuf,
66423254 1146 (char **)0, foldlen, do_utf8,
d07ddd77 1147 m,
eb160463 1148 (char **)0, ln, (bool)UTF))
80aecb99
JH
1149 && (norun || regtry(prog, s)) )
1150 goto got_it;
1151 }
1aa99e6b
IH
1152 s += len;
1153 }
09091399 1154 }
1aa99e6b
IH
1155 }
1156 else {
1157 if (c1 == c2)
1158 while (s <= e) {
1159 if ( *(U8*)s == c1
1160 && (ln == 1 || !(OP(c) == EXACTF
1161 ? ibcmp(s, m, ln)
1162 : ibcmp_locale(s, m, ln)))
1163 && (norun || regtry(prog, s)) )
1164 goto got_it;
1165 s++;
1166 }
1167 else
1168 while (s <= e) {
1169 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1170 && (ln == 1 || !(OP(c) == EXACTF
1171 ? ibcmp(s, m, ln)
1172 : ibcmp_locale(s, m, ln)))
1173 && (norun || regtry(prog, s)) )
1174 goto got_it;
1175 s++;
1176 }
b3c9acc1
IZ
1177 }
1178 break;
bbce6d69 1179 case BOUNDL:
3280af22 1180 PL_reg_flags |= RF_tainted;
bbce6d69 1181 /* FALL THROUGH */
a0d0e21e 1182 case BOUND:
ffc61ed2 1183 if (do_utf8) {
12d33761 1184 if (s == PL_bostr)
ffc61ed2
JH
1185 tmp = '\n';
1186 else {
b4f7163a 1187 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1188
b4f7163a 1189 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1190 }
1191 tmp = ((OP(c) == BOUND ?
9041c2e3 1192 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
fab1f349 1193 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1194 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1195 if (tmp == !(OP(c) == BOUND ?
3568d838 1196 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1197 isALNUM_LC_utf8((U8*)s)))
1198 {
1199 tmp = !tmp;
1200 if ((norun || regtry(prog, s)))
1201 goto got_it;
1202 }
45bfbaf1 1203 s += uskip;
a687059c 1204 }
a0d0e21e 1205 }
667bb95a 1206 else {
12d33761 1207 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1208 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1209 while (s < strend) {
1210 if (tmp ==
1211 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1212 tmp = !tmp;
1213 if ((norun || regtry(prog, s)))
1214 goto got_it;
1215 }
1216 s++;
a0ed51b3 1217 }
a0ed51b3 1218 }
6eb5f6b9 1219 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1220 goto got_it;
1221 break;
bbce6d69 1222 case NBOUNDL:
3280af22 1223 PL_reg_flags |= RF_tainted;
bbce6d69 1224 /* FALL THROUGH */
a0d0e21e 1225 case NBOUND:
ffc61ed2 1226 if (do_utf8) {
12d33761 1227 if (s == PL_bostr)
ffc61ed2
JH
1228 tmp = '\n';
1229 else {
b4f7163a 1230 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1231
b4f7163a 1232 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1233 }
1234 tmp = ((OP(c) == NBOUND ?
9041c2e3 1235 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
fab1f349 1236 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1237 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1238 if (tmp == !(OP(c) == NBOUND ?
3568d838 1239 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1240 isALNUM_LC_utf8((U8*)s)))
1241 tmp = !tmp;
1242 else if ((norun || regtry(prog, s)))
1243 goto got_it;
45bfbaf1 1244 s += uskip;
ffc61ed2 1245 }
a0d0e21e 1246 }
667bb95a 1247 else {
12d33761 1248 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1249 tmp = ((OP(c) == NBOUND ?
1250 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1251 while (s < strend) {
1252 if (tmp ==
1253 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1254 tmp = !tmp;
1255 else if ((norun || regtry(prog, s)))
1256 goto got_it;
1257 s++;
1258 }
a0ed51b3 1259 }
6eb5f6b9 1260 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1261 goto got_it;
1262 break;
a0d0e21e 1263 case ALNUM:
ffc61ed2 1264 if (do_utf8) {
fab1f349 1265 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1266 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1267 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1268 if (tmp && (norun || regtry(prog, s)))
1269 goto got_it;
1270 else
1271 tmp = doevery;
1272 }
bbce6d69 1273 else
ffc61ed2 1274 tmp = 1;
45bfbaf1 1275 s += uskip;
bbce6d69 1276 }
bbce6d69 1277 }
ffc61ed2
JH
1278 else {
1279 while (s < strend) {
1280 if (isALNUM(*s)) {
1281 if (tmp && (norun || regtry(prog, s)))
1282 goto got_it;
1283 else
1284 tmp = doevery;
1285 }
a0ed51b3 1286 else
ffc61ed2
JH
1287 tmp = 1;
1288 s++;
a0ed51b3 1289 }
a0ed51b3
LW
1290 }
1291 break;
bbce6d69 1292 case ALNUML:
3280af22 1293 PL_reg_flags |= RF_tainted;
ffc61ed2 1294 if (do_utf8) {
45bfbaf1 1295 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1296 if (isALNUM_LC_utf8((U8*)s)) {
1297 if (tmp && (norun || regtry(prog, s)))
1298 goto got_it;
1299 else
1300 tmp = doevery;
1301 }
a687059c 1302 else
ffc61ed2 1303 tmp = 1;
45bfbaf1 1304 s += uskip;
a0d0e21e 1305 }
a0d0e21e 1306 }
ffc61ed2
JH
1307 else {
1308 while (s < strend) {
1309 if (isALNUM_LC(*s)) {
1310 if (tmp && (norun || regtry(prog, s)))
1311 goto got_it;
1312 else
1313 tmp = doevery;
1314 }
a0ed51b3 1315 else
ffc61ed2
JH
1316 tmp = 1;
1317 s++;
a0ed51b3 1318 }
a0ed51b3
LW
1319 }
1320 break;
a0d0e21e 1321 case NALNUM:
ffc61ed2 1322 if (do_utf8) {
fab1f349 1323 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1324 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1325 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1326 if (tmp && (norun || regtry(prog, s)))
1327 goto got_it;
1328 else
1329 tmp = doevery;
1330 }
bbce6d69 1331 else
ffc61ed2 1332 tmp = 1;
45bfbaf1 1333 s += uskip;
bbce6d69 1334 }
bbce6d69 1335 }
ffc61ed2
JH
1336 else {
1337 while (s < strend) {
1338 if (!isALNUM(*s)) {
1339 if (tmp && (norun || regtry(prog, s)))
1340 goto got_it;
1341 else
1342 tmp = doevery;
1343 }
a0ed51b3 1344 else
ffc61ed2
JH
1345 tmp = 1;
1346 s++;
a0ed51b3 1347 }
a0ed51b3
LW
1348 }
1349 break;
bbce6d69 1350 case NALNUML:
3280af22 1351 PL_reg_flags |= RF_tainted;
ffc61ed2 1352 if (do_utf8) {
45bfbaf1 1353 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1354 if (!isALNUM_LC_utf8((U8*)s)) {
1355 if (tmp && (norun || regtry(prog, s)))
1356 goto got_it;
1357 else
1358 tmp = doevery;
1359 }
a687059c 1360 else
ffc61ed2 1361 tmp = 1;
45bfbaf1 1362 s += uskip;
a687059c 1363 }
a0d0e21e 1364 }
ffc61ed2
JH
1365 else {
1366 while (s < strend) {
1367 if (!isALNUM_LC(*s)) {
1368 if (tmp && (norun || regtry(prog, s)))
1369 goto got_it;
1370 else
1371 tmp = doevery;
1372 }
a0ed51b3 1373 else
ffc61ed2
JH
1374 tmp = 1;
1375 s++;
a0ed51b3 1376 }
a0ed51b3
LW
1377 }
1378 break;
a0d0e21e 1379 case SPACE:
ffc61ed2 1380 if (do_utf8) {
fab1f349 1381 LOAD_UTF8_CHARCLASS_SPACE();
45bfbaf1 1382 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1383 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1384 if (tmp && (norun || regtry(prog, s)))
1385 goto got_it;
1386 else
1387 tmp = doevery;
1388 }
a0d0e21e 1389 else
ffc61ed2 1390 tmp = 1;
45bfbaf1 1391 s += uskip;
2304df62 1392 }
a0d0e21e 1393 }
ffc61ed2
JH
1394 else {
1395 while (s < strend) {
1396 if (isSPACE(*s)) {
1397 if (tmp && (norun || regtry(prog, s)))
1398 goto got_it;
1399 else
1400 tmp = doevery;
1401 }
a0ed51b3 1402 else
ffc61ed2
JH
1403 tmp = 1;
1404 s++;
a0ed51b3 1405 }
a0ed51b3
LW
1406 }
1407 break;
bbce6d69 1408 case SPACEL:
3280af22 1409 PL_reg_flags |= RF_tainted;
ffc61ed2 1410 if (do_utf8) {
45bfbaf1 1411 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1412 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1413 if (tmp && (norun || regtry(prog, s)))
1414 goto got_it;
1415 else
1416 tmp = doevery;
1417 }
bbce6d69 1418 else
ffc61ed2 1419 tmp = 1;
45bfbaf1 1420 s += uskip;
bbce6d69 1421 }
bbce6d69 1422 }
ffc61ed2
JH
1423 else {
1424 while (s < strend) {
1425 if (isSPACE_LC(*s)) {
1426 if (tmp && (norun || regtry(prog, s)))
1427 goto got_it;
1428 else
1429 tmp = doevery;
1430 }
a0ed51b3 1431 else
ffc61ed2
JH
1432 tmp = 1;
1433 s++;
a0ed51b3 1434 }
a0ed51b3
LW
1435 }
1436 break;
a0d0e21e 1437 case NSPACE:
ffc61ed2 1438 if (do_utf8) {
fab1f349 1439 LOAD_UTF8_CHARCLASS_SPACE();
45bfbaf1 1440 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1441 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1442 if (tmp && (norun || regtry(prog, s)))
1443 goto got_it;
1444 else
1445 tmp = doevery;
1446 }
a0d0e21e 1447 else
ffc61ed2 1448 tmp = 1;
45bfbaf1 1449 s += uskip;
a687059c 1450 }
a0d0e21e 1451 }
ffc61ed2
JH
1452 else {
1453 while (s < strend) {
1454 if (!isSPACE(*s)) {
1455 if (tmp && (norun || regtry(prog, s)))
1456 goto got_it;
1457 else
1458 tmp = doevery;
1459 }
a0ed51b3 1460 else
ffc61ed2
JH
1461 tmp = 1;
1462 s++;
a0ed51b3 1463 }
a0ed51b3
LW
1464 }
1465 break;
bbce6d69 1466 case NSPACEL:
3280af22 1467 PL_reg_flags |= RF_tainted;
ffc61ed2 1468 if (do_utf8) {
45bfbaf1 1469 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1470 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1471 if (tmp && (norun || regtry(prog, s)))
1472 goto got_it;
1473 else
1474 tmp = doevery;
1475 }
bbce6d69 1476 else
ffc61ed2 1477 tmp = 1;
45bfbaf1 1478 s += uskip;
bbce6d69 1479 }
bbce6d69 1480 }
ffc61ed2
JH
1481 else {
1482 while (s < strend) {
1483 if (!isSPACE_LC(*s)) {
1484 if (tmp && (norun || regtry(prog, s)))
1485 goto got_it;
1486 else
1487 tmp = doevery;
1488 }
a0ed51b3 1489 else
ffc61ed2
JH
1490 tmp = 1;
1491 s++;
a0ed51b3 1492 }
a0ed51b3
LW
1493 }
1494 break;
a0d0e21e 1495 case DIGIT:
ffc61ed2 1496 if (do_utf8) {
fab1f349 1497 LOAD_UTF8_CHARCLASS_DIGIT();
45bfbaf1 1498 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1499 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1500 if (tmp && (norun || regtry(prog, s)))
1501 goto got_it;
1502 else
1503 tmp = doevery;
1504 }
a0d0e21e 1505 else
ffc61ed2 1506 tmp = 1;
45bfbaf1 1507 s += uskip;
2b69d0c2 1508 }
a0d0e21e 1509 }
ffc61ed2
JH
1510 else {
1511 while (s < strend) {
1512 if (isDIGIT(*s)) {
1513 if (tmp && (norun || regtry(prog, s)))
1514 goto got_it;
1515 else
1516 tmp = doevery;
1517 }
a0ed51b3 1518 else
ffc61ed2
JH
1519 tmp = 1;
1520 s++;
a0ed51b3 1521 }
a0ed51b3
LW
1522 }
1523 break;
b8c5462f
JH
1524 case DIGITL:
1525 PL_reg_flags |= RF_tainted;
ffc61ed2 1526 if (do_utf8) {
45bfbaf1 1527 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1528 if (isDIGIT_LC_utf8((U8*)s)) {
1529 if (tmp && (norun || regtry(prog, s)))
1530 goto got_it;
1531 else
1532 tmp = doevery;
1533 }
b8c5462f 1534 else
ffc61ed2 1535 tmp = 1;
45bfbaf1 1536 s += uskip;
b8c5462f 1537 }
b8c5462f 1538 }
ffc61ed2
JH
1539 else {
1540 while (s < strend) {
1541 if (isDIGIT_LC(*s)) {
1542 if (tmp && (norun || regtry(prog, s)))
1543 goto got_it;
1544 else
1545 tmp = doevery;
1546 }
b8c5462f 1547 else
ffc61ed2
JH
1548 tmp = 1;
1549 s++;
b8c5462f 1550 }
b8c5462f
JH
1551 }
1552 break;
a0d0e21e 1553 case NDIGIT:
ffc61ed2 1554 if (do_utf8) {
fab1f349 1555 LOAD_UTF8_CHARCLASS_DIGIT();
45bfbaf1 1556 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1557 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1558 if (tmp && (norun || regtry(prog, s)))
1559 goto got_it;
1560 else
1561 tmp = doevery;
1562 }
a0d0e21e 1563 else
ffc61ed2 1564 tmp = 1;
45bfbaf1 1565 s += uskip;
a687059c 1566 }
a0d0e21e 1567 }
ffc61ed2
JH
1568 else {
1569 while (s < strend) {
1570 if (!isDIGIT(*s)) {
1571 if (tmp && (norun || regtry(prog, s)))
1572 goto got_it;
1573 else
1574 tmp = doevery;
1575 }
a0ed51b3 1576 else
ffc61ed2
JH
1577 tmp = 1;
1578 s++;
a0ed51b3 1579 }
a0ed51b3
LW
1580 }
1581 break;
b8c5462f
JH
1582 case NDIGITL:
1583 PL_reg_flags |= RF_tainted;
ffc61ed2 1584 if (do_utf8) {
45bfbaf1 1585 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1586 if (!isDIGIT_LC_utf8((U8*)s)) {
1587 if (tmp && (norun || regtry(prog, s)))
1588 goto got_it;
1589 else
1590 tmp = doevery;
1591 }
b8c5462f 1592 else
ffc61ed2 1593 tmp = 1;
45bfbaf1 1594 s += uskip;
b8c5462f 1595 }
a0ed51b3 1596 }
ffc61ed2
JH
1597 else {
1598 while (s < strend) {
1599 if (!isDIGIT_LC(*s)) {
1600 if (tmp && (norun || regtry(prog, s)))
1601 goto got_it;
1602 else
1603 tmp = doevery;
1604 }
cf93c79d 1605 else
ffc61ed2
JH
1606 tmp = 1;
1607 s++;
b8c5462f 1608 }
b8c5462f
JH
1609 }
1610 break;
b3c9acc1 1611 default:
3c3eec57
GS
1612 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1613 break;
d6a28714 1614 }
6eb5f6b9
JH
1615 return 0;
1616 got_it:
1617 return s;
1618}
1619
1620/*
1621 - regexec_flags - match a regexp against a string
1622 */
1623I32
1624Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1625 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1626/* strend: pointer to null at end of string */
1627/* strbeg: real beginning of string */
1628/* minend: end of match must be >=minend after stringarg. */
1629/* data: May be used for some additional optimizations. */
1630/* nosave: For optimizations. */
1631{
6eb5f6b9
JH
1632 register char *s;
1633 register regnode *c;
1634 register char *startpos = stringarg;
6eb5f6b9
JH
1635 I32 minlen; /* must match at least this many chars */
1636 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1637 I32 end_shift = 0; /* Same for the end. */ /* CC */
1638 I32 scream_pos = -1; /* Internal iterator of scream. */
1639 char *scream_olds;
1640 SV* oreplsv = GvSV(PL_replgv);
228fe6e6 1641 const bool do_utf8 = DO_UTF8(sv);
2a782b5b 1642#ifdef DEBUGGING
9e55ce06
JH
1643 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1644 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1645#endif
a2592645 1646 PERL_UNUSED_ARG(data);
2ea5368e 1647 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9
JH
1648
1649 PL_regcc = 0;
1650
1651 cache_re(prog);
1652#ifdef DEBUGGING
aea4f609 1653 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1654#endif
1655
1656 /* Be paranoid... */
1657 if (prog == NULL || startpos == NULL) {
1658 Perl_croak(aTHX_ "NULL regexp parameter");
1659 return 0;
1660 }
1661
1662 minlen = prog->minlen;
61a36c01 1663 if (strend - startpos < minlen) {
a72c7584
JH
1664 DEBUG_r(PerlIO_printf(Perl_debug_log,
1665 "String too short [regexec_flags]...\n"));
1666 goto phooey;
1aa99e6b 1667 }
6eb5f6b9 1668
6eb5f6b9
JH
1669 /* Check validity of program. */
1670 if (UCHARAT(prog->program) != REG_MAGIC) {
1671 Perl_croak(aTHX_ "corrupted regexp program");
1672 }
1673
1674 PL_reg_flags = 0;
1675 PL_reg_eval_set = 0;
1676 PL_reg_maxiter = 0;
1677
1678 if (prog->reganch & ROPT_UTF8)
1679 PL_reg_flags |= RF_utf8;
1680
1681 /* Mark beginning of line for ^ and lookbehind. */
1682 PL_regbol = startpos;
1683 PL_bostr = strbeg;
1684 PL_reg_sv = sv;
1685
1686 /* Mark end of line for $ (and such) */
1687 PL_regeol = strend;
1688
1689 /* see how far we have to get to not match where we matched before */
1690 PL_regtill = startpos+minend;
1691
1692 /* We start without call_cc context. */
1693 PL_reg_call_cc = 0;
1694
1695 /* If there is a "must appear" string, look for it. */
1696 s = startpos;
1697
1698 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1699 MAGIC *mg;
1700
1701 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1702 PL_reg_ganch = startpos;
1703 else if (sv && SvTYPE(sv) >= SVt_PVMG
1704 && SvMAGIC(sv)
14befaf4
DM
1705 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1706 && mg->mg_len >= 0) {
6eb5f6b9
JH
1707 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1708 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1709 if (s > PL_reg_ganch)
6eb5f6b9
JH
1710 goto phooey;
1711 s = PL_reg_ganch;
1712 }
1713 }
1714 else /* pos() not defined */
1715 PL_reg_ganch = strbeg;
1716 }
1717
33b8afdf 1718 if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
6eb5f6b9
JH
1719 re_scream_pos_data d;
1720
1721 d.scream_olds = &scream_olds;
1722 d.scream_pos = &scream_pos;
1723 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1724 if (!s) {
1725 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1726 goto phooey; /* not present */
3fa9c3d7 1727 }
6eb5f6b9
JH
1728 }
1729
2a782b5b 1730 DEBUG_r({
228fe6e6
AL
1731 const char * const s0 = UTF
1732 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1733 UNI_DISPLAY_REGEX)
1734 : prog->precomp;
1735 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1736 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1737 UNI_DISPLAY_REGEX) : startpos;
228fe6e6 1738 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1739 if (!PL_colorset)
1740 reginitcolors();
1741 PerlIO_printf(Perl_debug_log,
5332c881 1742 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
2a782b5b 1743 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1744 len0, len0, s0,
2a782b5b 1745 PL_colors[1],
9e55ce06 1746 len0 > 60 ? "..." : "",
2a782b5b 1747 PL_colors[0],
9e55ce06
JH
1748 (int)(len1 > 60 ? 60 : len1),
1749 s1, PL_colors[1],
1750 (len1 > 60 ? "..." : "")
2a782b5b
JH
1751 );
1752 });
6eb5f6b9
JH
1753
1754 /* Simplest case: anchored match need be tried only once. */
1755 /* [unless only anchor is BOL and multiline is set] */
1756 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1757 if (s == startpos && regtry(prog, startpos))
1758 goto got_it;
1759 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1760 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1761 {
1762 char *end;
1763
1764 if (minlen)
1765 dontbother = minlen - 1;
1aa99e6b 1766 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1767 /* for multiline we only have to try after newlines */
33b8afdf 1768 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1769 if (s == startpos)
1770 goto after_try;
1771 while (1) {
1772 if (regtry(prog, s))
1773 goto got_it;
1774 after_try:
1775 if (s >= end)
1776 goto phooey;
1777 if (prog->reganch & RE_USE_INTUIT) {
1778 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1779 if (!s)
1780 goto phooey;
1781 }
1782 else
1783 s++;
1784 }
1785 } else {
1786 if (s > startpos)
1787 s--;
1788 while (s < end) {
1789 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1790 if (regtry(prog, s))
1791 goto got_it;
1792 }
1793 }
1794 }
1795 }
1796 goto phooey;
1797 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1798 if (regtry(prog, PL_reg_ganch))
1799 goto got_it;
1800 goto phooey;
1801 }
1802
1803 /* Messy cases: unanchored match. */
33b8afdf 1804 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1805 /* we have /x+whatever/ */
1806 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1807 char ch;
bf93d4cc
GS
1808#ifdef DEBUGGING
1809 int did_match = 0;
1810#endif
33b8afdf
JH
1811 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1812 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
fdac8c4b 1813 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1814
1aa99e6b 1815 if (do_utf8) {
6eb5f6b9
JH
1816 while (s < strend) {
1817 if (*s == ch) {
bf93d4cc 1818 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1819 if (regtry(prog, s)) goto got_it;
1820 s += UTF8SKIP(s);
1821 while (s < strend && *s == ch)
1822 s += UTF8SKIP(s);
1823 }
1824 s += UTF8SKIP(s);
1825 }
1826 }
1827 else {
1828 while (s < strend) {
1829 if (*s == ch) {
bf93d4cc 1830 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1831 if (regtry(prog, s)) goto got_it;
1832 s++;
1833 while (s < strend && *s == ch)
1834 s++;
1835 }
1836 s++;
1837 }
1838 }
b7953727 1839 DEBUG_r(if (!did_match)
bf93d4cc 1840 PerlIO_printf(Perl_debug_log,
b7953727
JH
1841 "Did not find anchored character...\n")
1842 );
6eb5f6b9 1843 }
33b8afdf
JH
1844 else if (prog->anchored_substr != Nullsv
1845 || prog->anchored_utf8 != Nullsv
1846 || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1847 && prog->float_max_offset < strend - s)) {
1848 SV *must;
1849 I32 back_max;
1850 I32 back_min;
1851 char *last;
6eb5f6b9 1852 char *last1; /* Last position checked before */
bf93d4cc
GS
1853#ifdef DEBUGGING
1854 int did_match = 0;
1855#endif
33b8afdf
JH
1856 if (prog->anchored_substr || prog->anchored_utf8) {
1857 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1858 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1859 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1860 back_max = back_min = prog->anchored_offset;
1861 } else {
1862 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1863 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1864 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1865 back_max = prog->float_max_offset;
1866 back_min = prog->float_min_offset;
1867 }
1868 if (must == &PL_sv_undef)
1869 /* could not downgrade utf8 check substring, so must fail */
1870 goto phooey;
1871
1872 last = HOP3c(strend, /* Cannot start after this */
1873 -(I32)(CHR_SVLEN(must)
1874 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1875
1876 if (s > PL_bostr)
1877 last1 = HOPc(s, -1);
1878 else
1879 last1 = s - 1; /* bogus */
1880
5332c881 1881 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1882 check_substr==must. */
1883 scream_pos = -1;
1884 dontbother = end_shift;
1885 strend = HOPc(strend, -dontbother);
1886 while ( (s <= last) &&
9041c2e3 1887 ((flags & REXEC_SCREAM)
1aa99e6b 1888 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1889 end_shift, &scream_pos, 0))
1aa99e6b 1890 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1891 (unsigned char*)strend, must,
6eb5f6b9 1892 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
88af3bc3
JH
1893 /* we may be pointing at the wrong string */
1894 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
fdac8c4b 1895 s = strbeg + (s - SvPVX_const(sv));
bf93d4cc 1896 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1897 if (HOPc(s, -back_max) > last1) {
1898 last1 = HOPc(s, -back_min);
1899 s = HOPc(s, -back_max);
1900 }
1901 else {
1902 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1903
1904 last1 = HOPc(s, -back_min);
1905 s = t;
1906 }
1aa99e6b 1907 if (do_utf8) {
6eb5f6b9
JH
1908 while (s <= last1) {
1909 if (regtry(prog, s))
1910 goto got_it;
1911 s += UTF8SKIP(s);
1912 }
1913 }
1914 else {
1915 while (s <= last1) {
1916 if (regtry(prog, s))
1917 goto got_it;
1918 s++;
1919 }
1920 }
1921 }
b7953727
JH
1922 DEBUG_r(if (!did_match)
1923 PerlIO_printf(Perl_debug_log,
5332c881 1924 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1925 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1926 ? "anchored" : "floating"),
1927 PL_colors[0],
1928 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
fdac8c4b 1929 SvPVX_const(must),
b7953727
JH
1930 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1931 );
6eb5f6b9
JH
1932 goto phooey;
1933 }
155aba94 1934 else if ((c = prog->regstclass)) {
e19fe81c
JH
1935 if (minlen) {
1936 I32 op = (U8)OP(prog->regstclass);
66e933ab 1937 /* don't bother with what can't match */
e19fe81c
JH
1938 if (PL_regkind[op] != EXACT && op != CANY)
1939 strend = HOPc(strend, -(minlen - 1));
1940 }
ffc61ed2
JH
1941 DEBUG_r({
1942 SV *prop = sv_newmortal();
71a0dd65
NC
1943 const char *s0;
1944 const char *s1;
9e55ce06
JH
1945 int len0;
1946 int len1;
1947
ffc61ed2 1948 regprop(prop, c);
9e55ce06 1949 s0 = UTF ?
fdac8c4b 1950 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1951 UNI_DISPLAY_REGEX) :
71a0dd65 1952 SvPVX_const(prop);
9e55ce06
JH
1953 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1954 s1 = UTF ?
c728cb41 1955 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1956 len1 = UTF ? SvCUR(dsv1) : strend - s;
1957 PerlIO_printf(Perl_debug_log,
5332c881 1958 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1959 len0, len0, s0,
1960 len1, len1, s1);
ffc61ed2 1961 });
ec6f298e 1962 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1963 goto got_it;
bf93d4cc 1964 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1965 }
1966 else {
1967 dontbother = 0;
33b8afdf
JH
1968 if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1969 /* Trim the end. */
d6a28714 1970 char *last;
33b8afdf
JH
1971 SV* float_real;
1972
1973 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1974 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1975 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1976
1977 if (flags & REXEC_SCREAM) {
33b8afdf 1978 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1979 end_shift, &scream_pos, 1); /* last one */
1980 if (!last)
ffc61ed2 1981 last = scream_olds; /* Only one occurrence. */
88af3bc3
JH
1982 /* we may be pointing at the wrong string */
1983 else if (RX_MATCH_COPIED(prog))
fdac8c4b 1984 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1985 }
d6a28714
JH
1986 else {
1987 STRLEN len;
71a0dd65 1988 const char * const little = SvPV_const(float_real, len);
d6a28714 1989
33b8afdf 1990 if (SvTAIL(float_real)) {
d6a28714
JH
1991 if (memEQ(strend - len + 1, little, len - 1))
1992 last = strend - len + 1;
1993 else if (!PL_multiline)
9041c2e3 1994 last = memEQ(strend - len, little, len)
d6a28714 1995 ? strend - len : Nullch;
b8c5462f 1996 else
d6a28714
JH
1997 goto find_last;
1998 } else {
1999 find_last:
9041c2e3 2000 if (len)
d6a28714 2001 last = rninstr(s, strend, little, little + len);
b8c5462f 2002 else
5332c881 2003 last = strend; /* matching "$" */
b8c5462f 2004 }
b8c5462f 2005 }
bf93d4cc
GS
2006 if (last == NULL) {
2007 DEBUG_r(PerlIO_printf(Perl_debug_log,
2008 "%sCan't trim the tail, match fails (should not happen)%s\n",
2009 PL_colors[4],PL_colors[5]));
2010 goto phooey; /* Should not happen! */
2011 }
d6a28714
JH
2012 dontbother = strend - last + prog->float_min_offset;
2013 }
2014 if (minlen && (dontbother < minlen))
2015 dontbother = minlen - 1;
2016 strend -= dontbother; /* this one's always in bytes! */
2017 /* We don't know much -- general case. */
1aa99e6b 2018 if (do_utf8) {
d6a28714
JH
2019 for (;;) {
2020 if (regtry(prog, s))
2021 goto got_it;
2022 if (s >= strend)
2023 break;
b8c5462f 2024 s += UTF8SKIP(s);
d6a28714
JH
2025 };
2026 }
2027 else {
2028 do {
2029 if (regtry(prog, s))
2030 goto got_it;
2031 } while (s++ < strend);
2032 }
2033 }
2034
2035 /* Failure. */
2036 goto phooey;
2037
2038got_it:
2039 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2040
2041 if (PL_reg_eval_set) {
2042 /* Preserve the current value of $^R */
2043 if (oreplsv != GvSV(PL_replgv))
2044 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2045 restored, the value remains
2046 the same. */
acfe0abc 2047 restore_pos(aTHX_ 0);
d6a28714
JH
2048 }
2049
2050 /* make sure $`, $&, $', and $digit will work later */
2051 if ( !(flags & REXEC_NOT_FIRST) ) {
2052 if (RX_MATCH_COPIED(prog)) {
2053 Safefree(prog->subbeg);
2054 RX_MATCH_COPIED_off(prog);
2055 }
2056 if (flags & REXEC_COPY_STR) {
2057 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2058
2059 s = savepvn(strbeg, i);
2060 prog->subbeg = s;
2061 prog->sublen = i;
2062 RX_MATCH_COPIED_on(prog);
2063 }
2064 else {
2065 prog->subbeg = strbeg;
2066 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2067 }
2068 }
9041c2e3 2069
d6a28714
JH
2070 return 1;
2071
2072phooey:
bf93d4cc
GS
2073 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2074 PL_colors[4],PL_colors[5]));
d6a28714 2075 if (PL_reg_eval_set)
acfe0abc 2076 restore_pos(aTHX_ 0);
d6a28714
JH
2077 return 0;
2078}
2079
2080/*
2081 - regtry - try match at specific point
2082 */
2083STATIC I32 /* 0 failure, 1 success */
2084S_regtry(pTHX_ regexp *prog, char *startpos)
2085{
d6a28714
JH
2086 register I32 i;
2087 register I32 *sp;
2088 register I32 *ep;
2089 CHECKPOINT lastcp;
2090
02db2b7b
IZ
2091#ifdef DEBUGGING
2092 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2093#endif
d6a28714
JH
2094 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2095 MAGIC *mg;
2096
2097 PL_reg_eval_set = RS_init;
2098 DEBUG_r(DEBUG_s(
b900a521
JH
2099 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2100 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2101 ));
e8347627 2102 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2103 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2104 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2105 SAVETMPS;
2106 /* Apparently this is not needed, judging by wantarray. */
e8347627 2107 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2108 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2109
2110 if (PL_reg_sv) {
2111 /* Make $_ available to executed code. */
2112 if (PL_reg_sv != DEFSV) {
4d1ff10f 2113 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
2114 SAVESPTR(DEFSV);
2115 DEFSV = PL_reg_sv;
b8c5462f 2116 }
d6a28714 2117
9041c2e3 2118 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2119 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2120 /* prepare for quick setting of pos */
14befaf4
DM
2121 sv_magic(PL_reg_sv, (SV*)0,
2122 PERL_MAGIC_regex_global, Nullch, 0);
2123 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 2124 mg->mg_len = -1;
b8c5462f 2125 }
d6a28714
JH
2126 PL_reg_magic = mg;
2127 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2128 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2129 }
09687e5a 2130 if (!PL_reg_curpm) {
cd7a8267 2131 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2132#ifdef USE_ITHREADS
2133 {
2134 SV* repointer = newSViv(0);
577e12cc 2135 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2136 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2137 av_push(PL_regex_padav,repointer);
2138 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2139 PL_regex_pad = AvARRAY(PL_regex_padav);
2140 }
2141#endif
2142 }
aaa362c4 2143 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2144 PL_reg_oldcurpm = PL_curpm;
2145 PL_curpm = PL_reg_curpm;
2146 if (RX_MATCH_COPIED(prog)) {
2147 /* Here is a serious problem: we cannot rewrite subbeg,
2148 since it may be needed if this match fails. Thus
2149 $` inside (?{}) could fail... */
2150 PL_reg_oldsaved = prog->subbeg;
2151 PL_reg_oldsavedlen = prog->sublen;
2152 RX_MATCH_COPIED_off(prog);
2153 }
2154 else
2155 PL_reg_oldsaved = Nullch;
2156 prog->subbeg = PL_bostr;
2157 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2158 }
2159 prog->startp[0] = startpos - PL_bostr;
2160 PL_reginput = startpos;
2161 PL_regstartp = prog->startp;
2162 PL_regendp = prog->endp;
2163 PL_reglastparen = &prog->lastparen;
a01268b5 2164 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2165 prog->lastparen = 0;
4f4e7967 2166 prog->lastcloseparen = 0;
d6a28714
JH
2167 PL_regsize = 0;
2168 DEBUG_r(PL_reg_starttry = startpos);
2169 if (PL_reg_start_tmpl <= prog->nparens) {
2170 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2171 if(PL_reg_start_tmp)
2172 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2173 else
cd7a8267 2174 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2175 }
2176
2177 /* XXXX What this code is doing here?!!! There should be no need
2178 to do this again and again, PL_reglastparen should take care of
3dd2943c 2179 this! --ilya*/
dafc8851
JH
2180
2181 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2182 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2183 * PL_reglastparen), is not needed at all by the test suite
2184 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2185 * enough, for building DynaLoader, or otherwise this
2186 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2187 * will happen. Meanwhile, this code *is* needed for the
2188 * above-mentioned test suite tests to succeed. The common theme
2189 * on those tests seems to be returning null fields from matches.
2190 * --jhi */
dafc8851 2191#if 1
d6a28714
JH
2192 sp = prog->startp;
2193 ep = prog->endp;
2194 if (prog->nparens) {
eb160463 2195 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2196 *++sp = -1;
2197 *++ep = -1;
2198 }
2199 }
dafc8851 2200#endif
02db2b7b 2201 REGCP_SET(lastcp);
d6a28714
JH
2202 if (regmatch(prog->program + 1)) {
2203 prog->endp[0] = PL_reginput - PL_bostr;
2204 return 1;
2205 }
02db2b7b 2206 REGCP_UNWIND(lastcp);
d6a28714
JH
2207 return 0;
2208}
2209
02db2b7b
IZ
2210#define RE_UNWIND_BRANCH 1
2211#define RE_UNWIND_BRANCHJ 2
2212
2213union re_unwind_t;
2214
2215typedef struct { /* XX: makes sense to enlarge it... */
2216 I32 type;
2217 I32 prev;
2218 CHECKPOINT lastcp;
2219} re_unwind_generic_t;
2220
2221typedef struct {
2222 I32 type;
2223 I32 prev;
2224 CHECKPOINT lastcp;
2225 I32 lastparen;
2226 regnode *next;
2227 char *locinput;
2228 I32 nextchr;
2229#ifdef DEBUGGING
2230 int regindent;
2231#endif
2232} re_unwind_branch_t;
2233
2234typedef union re_unwind_t {
2235 I32 type;
2236 re_unwind_generic_t generic;
2237 re_unwind_branch_t branch;
2238} re_unwind_t;
2239
8ba1375e
MJD
2240#define sayYES goto yes
2241#define sayNO goto no
e0f9d4a8 2242#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2243#define sayYES_FINAL goto yes_final
2244#define sayYES_LOUD goto yes_loud
2245#define sayNO_FINAL goto no_final
2246#define sayNO_SILENT goto do_no
2247#define saySAME(x) if (x) goto yes; else goto no
2248
6e00f9c6
HS
2249#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2250#define POSCACHE_SEEN 1 /* we know what we're caching */
2251#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2252#define CACHEsayYES STMT_START { \
2253 if (cache_offset | cache_bit) { \
2254 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2255 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2256 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2257 /* cache records failure, but this is success */ \
2258 DEBUG_r( \
2259 PerlIO_printf(Perl_debug_log, \
2260 "%*s (remove success from failure cache)\n", \
2261 REPORT_CODE_OFF+PL_regindent*2, "") \
2262 ); \
2263 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2264 } \
2265 } \
2266 sayYES; \
2267} STMT_END
2268#define CACHEsayNO STMT_START { \
2269 if (cache_offset | cache_bit) { \
2270 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2271 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2272 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2273 /* cache records success, but this is failure */ \
2274 DEBUG_r( \
2275 PerlIO_printf(Perl_debug_log, \
2276 "%*s (remove failure from success cache)\n", \
2277 REPORT_CODE_OFF+PL_regindent*2, "") \
2278 ); \
2279 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2280 } \
2281 } \
2282 sayNO; \
2283} STMT_END
2284
8ba1375e
MJD
2285#define REPORT_CODE_OFF 24
2286
d6a28714
JH
2287/*
2288 - regmatch - main matching routine
2289 *
2290 * Conceptually the strategy is simple: check to see whether the current
2291 * node matches, call self recursively to see whether the rest matches,
2292 * and then act accordingly. In practice we make some effort to avoid
2293 * recursion, in particular by going through "ordinary" nodes (that don't
2294 * need to know whether the rest of the match failed) by a loop instead of
2295 * by recursion.
2296 */
2297/* [lwall] I've hoisted the register declarations to the outer block in order to
2298 * maybe save a little bit of pushing and popping on the stack. It also takes
2299 * advantage of machines that use a register save mask on subroutine entry.
2300 */
2301STATIC I32 /* 0 failure, 1 success */
2302S_regmatch(pTHX_ regnode *prog)
2303{
d6a28714
JH
2304 register regnode *scan; /* Current node. */
2305 regnode *next; /* Next node. */
2306 regnode *inner; /* Next node in internal branch. */
2307 register I32 nextchr; /* renamed nextchr - nextchar colides with
2308 function of same name */
2309 register I32 n; /* no or next */
b7953727
JH
2310 register I32 ln = 0; /* len or last */
2311 register char *s = Nullch; /* operand or save */
d6a28714 2312 register char *locinput = PL_reginput;
b7953727 2313 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2314 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2315 I32 unwind = 0;
b7953727 2316#if 0
02db2b7b 2317 I32 firstcp = PL_savestack_ix;
b7953727 2318#endif
8c18bf38 2319 const register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2320#ifdef DEBUGGING
ce333219
JH
2321 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2322 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2323 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2324#endif
02db2b7b 2325
d6a28714
JH
2326#ifdef DEBUGGING
2327 PL_regindent++;
2328#endif
2329
2330 /* Note that nextchr is a byte even in UTF */
2331 nextchr = UCHARAT(locinput);
2332 scan = prog;
2333 while (scan != NULL) {
8ba1375e 2334
2a782b5b 2335 DEBUG_r( {
d6a28714 2336 SV *prop = sv_newmortal();
228fe6e6
AL
2337 const int docolor = *PL_colors[0];
2338 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2339 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2340 /* The part of the string before starttry has one color
2341 (pref0_len chars), between starttry and current
2342 position another one (pref_len - pref0_len chars),
2343 after the current position the third one.
2344 We assume that pref0_len <= pref_len, otherwise we
2345 decrease pref0_len. */
9041c2e3 2346 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2347 ? (5 + taill) - l : locinput - PL_bostr;
2348 int pref0_len;
d6a28714 2349
df1ffd02 2350 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2351 pref_len++;
2352 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2353 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2354 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2355 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2356 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2357 l--;
d6a28714
JH
2358 if (pref0_len < 0)
2359 pref0_len = 0;
2360 if (pref0_len > pref_len)
2361 pref0_len = pref_len;
2362 regprop(prop, scan);
2a782b5b 2363 {
228fe6e6 2364 const char * const s0 =
e19fe81c 2365 do_utf8 && OP(scan) != CANY ?
2a782b5b 2366 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2367 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2368 locinput - pref_len;
228fe6e6
AL
2369 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2370 const char * const s1 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2371 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2372 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2373 locinput - pref_len + pref0_len;
228fe6e6
AL
2374 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2375 const char * const s2 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2376 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2377 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2378 locinput;
228fe6e6 2379 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2380 PerlIO_printf(Perl_debug_log,
2381 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2382 (IV)(locinput - PL_bostr),
2383 PL_colors[4],
2384 len0, s0,
2385 PL_colors[5],
2386 PL_colors[2],
2387 len1, s1,
2388 PL_colors[3],
2389 (docolor ? "" : "> <"),
2390 PL_colors[0],
2391 len2, s2,
2392 PL_colors[1],
2393 15 - l - pref_len + 1,
2394 "",
2395 (IV)(scan - PL_regprogram), PL_regindent*2, "",
fdac8c4b 2396 SvPVX_const(prop));
2a782b5b
JH
2397 }
2398 });
d6a28714
JH
2399
2400 next = scan + NEXT_OFF(scan);
2401 if (next == scan)
2402 next = NULL;
2403
2404 switch (OP(scan)) {
2405 case BOL:
12d33761
HS
2406 if (locinput == PL_bostr || (PL_multiline &&
2407 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2408 {
2409 /* regtill = regbol; */
b8c5462f
JH
2410 break;
2411 }
d6a28714
JH
2412 sayNO;
2413 case MBOL:
12d33761
HS
2414 if (locinput == PL_bostr ||
2415 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2416 {
b8c5462f
JH
2417 break;
2418 }
d6a28714
JH
2419 sayNO;
2420 case SBOL:
c2a73568 2421 if (locinput == PL_bostr)
b8c5462f 2422 break;
d6a28714
JH
2423 sayNO;
2424 case GPOS:
2425 if (locinput == PL_reg_ganch)
2426 break;
2427 sayNO;
2428 case EOL:
2429 if (PL_multiline)
2430 goto meol;
2431 else
2432 goto seol;
2433 case MEOL:
2434 meol:
2435 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2436 sayNO;
b8c5462f 2437 break;
d6a28714
JH
2438 case SEOL:
2439 seol:
2440 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2441 sayNO;
d6a28714 2442 if (PL_regeol - locinput > 1)
b8c5462f 2443 sayNO;
b8c5462f 2444 break;
d6a28714
JH
2445 case EOS:
2446 if (PL_regeol != locinput)
b8c5462f 2447 sayNO;
d6a28714 2448 break;
ffc61ed2 2449 case SANY:
d6a28714 2450 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2451 sayNO;
f33976b4
DB
2452 if (do_utf8) {
2453 locinput += PL_utf8skip[nextchr];
2454 if (locinput > PL_regeol)
2455 sayNO;
2456 nextchr = UCHARAT(locinput);
2457 }
2458 else
2459 nextchr = UCHARAT(++locinput);
2460 break;
2461 case CANY:
2462 if (!nextchr && locinput >= PL_regeol)
2463 sayNO;
b8c5462f 2464 nextchr = UCHARAT(++locinput);
a0d0e21e 2465 break;
ffc61ed2 2466 case REG_ANY:
1aa99e6b
IH
2467 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2468 sayNO;
2469 if (do_utf8) {
b8c5462f 2470 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2471 if (locinput > PL_regeol)
2472 sayNO;
a0ed51b3 2473 nextchr = UCHARAT(locinput);
a0ed51b3 2474 }
1aa99e6b
IH
2475 else
2476 nextchr = UCHARAT(++locinput);
a0ed51b3 2477 break;
8c18bf38 2478
d6a28714 2479 case EXACT:
cd439c50
IZ
2480 s = STRING(scan);
2481 ln = STR_LEN(scan);
eb160463 2482 if (do_utf8 != UTF) {
bc517b45 2483 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2484 char *l = locinput;
8c18bf38 2485 const char *e = s + ln;
a72c7584 2486
5ff6fc6d
JH
2487 if (do_utf8) {
2488 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2489 while (s < e) {
8c18bf38 2490 STRLEN ulen;
1aa99e6b 2491 if (l >= PL_regeol)
5ff6fc6d
JH
2492 sayNO;
2493 if (NATIVE_TO_UNI(*(U8*)s) !=
a2a469f9 2494 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
872c91ae
JH
2495 ckWARN(WARN_UTF8) ?
2496 0 : UTF8_ALLOW_ANY))
5ff6fc6d 2497 sayNO;
bc517b45 2498 l += ulen;
5ff6fc6d 2499 s ++;
1aa99e6b 2500 }
5ff6fc6d
JH
2501 }
2502 else {
2503 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2504 while (s < e) {
8c18bf38 2505 STRLEN ulen;
1aa99e6b
IH
2506 if (l >= PL_regeol)
2507 sayNO;
5ff6fc6d 2508 if (NATIVE_TO_UNI(*((U8*)l)) !=
a2a469f9 2509 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
872c91ae
JH
2510 ckWARN(WARN_UTF8) ?
2511 0 : UTF8_ALLOW_ANY))
1aa99e6b 2512 sayNO;
bc517b45 2513 s += ulen;
a72c7584 2514 l ++;
1aa99e6b 2515 }
5ff6fc6d 2516 }
1aa99e6b
IH
2517 locinput = l;
2518 nextchr = UCHARAT(locinput);
2519 break;
2520 }
bc517b45 2521 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2522 /* Inline the first character, for speed. */
2523 if (UCHARAT(s) != nextchr)
2524 sayNO;
2525 if (PL_regeol - locinput < ln)
2526 sayNO;
2527 if (ln > 1 && memNE(s, locinput, ln))
2528 sayNO;
2529 locinput += ln;
2530 nextchr = UCHARAT(locinput);
2531 break;
2532 case EXACTFL:
b8c5462f
JH
2533 PL_reg_flags |= RF_tainted;
2534 /* FALL THROUGH */
d6a28714 2535 case EXACTF:
cd439c50
IZ
2536 s = STRING(scan);
2537 ln = STR_LEN(scan);
d6a28714 2538
d07ddd77
JH
2539 if (do_utf8 || UTF) {
2540 /* Either target or the pattern are utf8. */
d6a28714 2541 char *l = locinput;
d07ddd77 2542 char *e = PL_regeol;
bc517b45 2543
eb160463 2544 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 2545 l, &e, 0, do_utf8)) {
5486206c
JH
2546 /* One more case for the sharp s:
2547 * pack("U0U*", 0xDF) =~ /ss/i,
2548 * the 0xC3 0x9F are the UTF-8
2549 * byte sequence for the U+00DF. */
2550 if (!(do_utf8 &&
2551 toLOWER(s[0]) == 's' &&
2552 ln >= 2 &&
2553 toLOWER(s[1]) == 's' &&
2554 (U8)l[0] == 0xC3 &&
2555 e - l >= 2 &&
2556 (U8)l[1] == 0x9F))
2557 sayNO;
2558 }
d07ddd77
JH
2559 locinput = e;
2560 nextchr = UCHARAT(locinput);
2561 break;
a0ed51b3 2562 }
d6a28714 2563
bc517b45
JH
2564 /* Neither the target and the pattern are utf8. */
2565
d6a28714
JH
2566 /* Inline the first character, for speed. */
2567 if (UCHARAT(s) != nextchr &&
2568 UCHARAT(s) != ((OP(scan) == EXACTF)
2569 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2570 sayNO;
d6a28714 2571 if (PL_regeol - locinput < ln)
b8c5462f 2572 sayNO;
d6a28714
JH
2573 if (ln > 1 && (OP(scan) == EXACTF
2574 ? ibcmp(s, locinput, ln)
2575 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2576 sayNO;
d6a28714
JH
2577 locinput += ln;
2578 nextchr = UCHARAT(locinput);
a0d0e21e 2579 break;
d6a28714 2580 case ANYOF:
ffc61ed2 2581 if (do_utf8) {
9e55ce06
JH
2582 STRLEN inclasslen = PL_regeol - locinput;
2583
ba7b4546 2584 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2585 sayNO_ANYOF;
ffc61ed2
JH
2586 if (locinput >= PL_regeol)
2587 sayNO;
0f0076b4 2588 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2589 nextchr = UCHARAT(locinput);
e0f9d4a8 2590 break;
ffc61ed2
JH
2591 }
2592 else {
2593 if (nextchr < 0)
2594 nextchr = UCHARAT(locinput);
7d3e948e 2595 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2596 sayNO_ANYOF;
ffc61ed2
JH
2597 if (!nextchr && locinput >= PL_regeol)
2598 sayNO;
2599 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2600 break;
2601 }
2602 no_anyof:
2603 /* If we might have the case of the German sharp s
2604 * in a casefolding Unicode character class. */
2605
ebc501f0
JH
2606 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2607 locinput += SHARP_S_SKIP;
e0f9d4a8 2608 nextchr = UCHARAT(locinput);
ffc61ed2 2609 }
e0f9d4a8
JH
2610 else
2611 sayNO;
b8c5462f 2612 break;
d6a28714 2613 case ALNUML:
b8c5462f
JH
2614 PL_reg_flags |= RF_tainted;
2615 /* FALL THROUGH */
d6a28714 2616 case ALNUM:
b8c5462f 2617 if (!nextchr)
4633a7c4 2618 sayNO;
ffc61ed2 2619 if (do_utf8) {
fab1f349 2620 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2621 if (!(OP(scan) == ALNUM
3568d838 2622 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2623 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2624 {
2625 sayNO;
a0ed51b3 2626 }
b8c5462f 2627 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2628 nextchr = UCHARAT(locinput);
2629 break;
2630 }
ffc61ed2 2631 if (!(OP(scan) == ALNUM
d6a28714 2632 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2633 sayNO;
b8c5462f 2634 nextchr = UCHARAT(++locinput);
a0d0e21e 2635 break;
d6a28714 2636 case NALNUML:
b8c5462f
JH
2637 PL_reg_flags |= RF_tainted;
2638 /* FALL THROUGH */
d6a28714
JH
2639 case NALNUM:
2640 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2641 sayNO;
ffc61ed2 2642 if (do_utf8) {
fab1f349 2643 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2644 if (OP(scan) == NALNUM
3568d838 2645 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2646 : isALNUM_LC_utf8((U8*)locinput))
2647 {
b8c5462f 2648 sayNO;
d6a28714 2649 }
b8c5462f
JH
2650 locinput += PL_utf8skip[nextchr];
2651 nextchr = UCHARAT(locinput);
2652 break;
2653 }
ffc61ed2 2654 if (OP(scan) == NALNUM
d6a28714 2655 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2656 sayNO;
76e3520e 2657 nextchr = UCHARAT(++locinput);
a0d0e21e 2658 break;
d6a28714
JH
2659 case BOUNDL:
2660 case NBOUNDL:
3280af22 2661 PL_reg_flags |= RF_tainted;
bbce6d69 2662 /* FALL THROUGH */
d6a28714
JH
2663 case BOUND:
2664 case NBOUND:
2665 /* was last char in word? */
ffc61ed2 2666 if (do_utf8) {
12d33761
HS
2667 if (locinput == PL_bostr)
2668 ln = '\n';
ffc61ed2 2669 else {
8c18bf38 2670 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 2671
47a91a97 2672 ln = utf8n_to_uvchr((U8 *)r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
2673 }
2674 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2675 ln = isALNUM_uni(ln);
fab1f349 2676 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 2677 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2678 }
2679 else {
9041c2e3 2680 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2681 n = isALNUM_LC_utf8((U8*)locinput);
2682 }
a0ed51b3 2683 }
d6a28714 2684 else {
12d33761
HS
2685 ln = (locinput != PL_bostr) ?
2686 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2687 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2688 ln = isALNUM(ln);
2689 n = isALNUM(nextchr);
2690 }
2691 else {
2692 ln = isALNUM_LC(ln);
2693 n = isALNUM_LC(nextchr);
2694 }
d6a28714 2695 }
ffc61ed2
JH
2696 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2697 OP(scan) == BOUNDL))
2698 sayNO;
a0ed51b3 2699 break;
d6a28714 2700 case SPACEL:
3280af22 2701 PL_reg_flags |= RF_tainted;
bbce6d69 2702 /* FALL THROUGH */
d6a28714 2703 case SPACE:
9442cb0e 2704 if (!nextchr)
4633a7c4 2705 sayNO;
1aa99e6b 2706 if (do_utf8) {
fd400ab9 2707 if (UTF8_IS_CONTINUED(nextchr)) {
fab1f349 2708 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 2709 if (!(OP(scan) == SPACE
3568d838 2710 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2711 : isSPACE_LC_utf8((U8*)locinput)))
2712 {
2713 sayNO;
2714 }
2715 locinput += PL_utf8skip[nextchr];
2716 nextchr = UCHARAT(locinput);
2717 break;
d6a28714 2718 }
ffc61ed2
JH
2719 if (!(OP(scan) == SPACE
2720 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2721 sayNO;
2722 nextchr = UCHARAT(++locinput);
2723 }
2724 else {
2725 if (!(OP(scan) == SPACE
2726 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2727 sayNO;
2728 nextchr = UCHARAT(++locinput);
a0ed51b3 2729 }
a0ed51b3 2730 break;
d6a28714 2731 case NSPACEL:
3280af22 2732 PL_reg_flags |= RF_tainted;
bbce6d69 2733 /* FALL THROUGH */
d6a28714 2734 case NSPACE:
9442cb0e 2735 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2736 sayNO;
1aa99e6b 2737 if (do_utf8) {
fab1f349 2738 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 2739 if (OP(scan) == NSPACE
3568d838 2740 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2741 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2742 {
2743 sayNO;
2744 }
2745 locinput += PL_utf8skip[nextchr];
2746 nextchr = UCHARAT(locinput);
2747 break;
a0ed51b3 2748 }
ffc61ed2 2749 if (OP(scan) == NSPACE
d6a28714 2750 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2751 sayNO;
76e3520e 2752 nextchr = UCHARAT(++locinput);
a0d0e21e 2753 break;
d6a28714 2754 case DIGITL:
a0ed51b3
LW
2755 PL_reg_flags |= RF_tainted;
2756 /* FALL THROUGH */
d6a28714 2757 case DIGIT:
9442cb0e 2758 if (!nextchr)
a0ed51b3 2759 sayNO;
1aa99e6b 2760 if (do_utf8) {
fab1f349 2761 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 2762 if (!(OP(scan) == DIGIT
3568d838 2763 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2764 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2765 {
a0ed51b3 2766 sayNO;
dfe13c55 2767 }
6f06b55f 2768 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2769 nextchr = UCHARAT(locinput);
2770 break;
2771 }
ffc61ed2 2772 if (!(OP(scan) == DIGIT
9442cb0e 2773 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2774 sayNO;
2775 nextchr = UCHARAT(++locinput);
2776 break;
d6a28714 2777 case NDIGITL:
b8c5462f
JH
2778 PL_reg_flags |= RF_tainted;
2779 /* FALL THROUGH */
d6a28714 2780 case NDIGIT:
9442cb0e 2781 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2782 sayNO;
1aa99e6b 2783 if (do_utf8) {
fab1f349 2784 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 2785 if (OP(scan) == NDIGIT
3568d838 2786 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2787 : isDIGIT_LC_utf8((U8*)locinput))
2788 {
a0ed51b3 2789 sayNO;
9442cb0e 2790 }
6f06b55f 2791 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2792 nextchr = UCHARAT(locinput);
2793 break;
2794 }
ffc61ed2 2795 if (OP(scan) == NDIGIT
9442cb0e 2796 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2797 sayNO;
2798 nextchr = UCHARAT(++locinput);
2799 break;
2800 case CLUMP:
b7c83a7e 2801 if (locinput >= PL_regeol)
a0ed51b3 2802 sayNO;
b7c83a7e 2803 if (do_utf8) {
fab1f349 2804 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
2805 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2806 sayNO;
2807 locinput += PL_utf8skip[nextchr];
2808 while (locinput < PL_regeol &&
2809 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2810 locinput += UTF8SKIP(locinput);
2811 if (locinput > PL_regeol)
2812 sayNO;
eb08e2da
JH
2813 }
2814 else
2815 locinput++;
a0ed51b3
LW
2816 nextchr = UCHARAT(locinput);
2817 break;
c8756f30 2818 case REFFL:
3280af22 2819 PL_reg_flags |= RF_tainted;
c8756f30 2820 /* FALL THROUGH */
c277df42 2821 case REF:
c8756f30 2822 case REFF:
c277df42 2823 n = ARG(scan); /* which paren pair */
cf93c79d 2824 ln = PL_regstartp[n];
2c2d71f5 2825 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
eb160463 2826 if ((I32)*PL_reglastparen < n || ln == -1)
af3f8c16 2827 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2828 if (ln == PL_regendp[n])
a0d0e21e 2829 break;
a0ed51b3 2830
cf93c79d 2831 s = PL_bostr + ln;
1aa99e6b 2832 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2833 char *l = locinput;
8c18bf38 2834 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2835 /*
2836 * Note that we can't do the "other character" lookup trick as
2837 * in the 8-bit case (no pun intended) because in Unicode we
2838 * have to map both upper and title case to lower case.
2839 */
2840 if (OP(scan) == REFF) {
2841 while (s < e) {
8c18bf38
AL
2842 STRLEN ulen1, ulen2;
2843 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
2844 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
2845
a0ed51b3
LW
2846 if (l >= PL_regeol)
2847 sayNO;
a2a2844f
JH
2848 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2849 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2850 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2851 sayNO;
a2a2844f
JH
2852 s += ulen1;
2853 l += ulen2;
a0ed51b3
LW
2854 }
2855 }
2856 locinput = l;
2857 nextchr = UCHARAT(locinput);
2858 break;
2859 }
2860
a0d0e21e 2861 /* Inline the first character, for speed. */
76e3520e 2862 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2863 (OP(scan) == REF ||
2864 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2865 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2866 sayNO;
cf93c79d 2867 ln = PL_regendp[n] - ln;
3280af22 2868 if (locinput + ln > PL_regeol)
4633a7c4 2869 sayNO;
c8756f30
AK
2870 if (ln > 1 && (OP(scan) == REF
2871 ? memNE(s, locinput, ln)
2872 : (OP(scan) == REFF
2873 ? ibcmp(s, locinput, ln)
2874 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2875 sayNO;
a0d0e21e 2876 locinput += ln;
76e3520e 2877 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2878 break;
2879
2880 case NOTHING:
c277df42 2881 case TAIL:
a0d0e21e
LW
2882 break;
2883 case BACK:
2884 break;
c277df42
IZ
2885 case EVAL:
2886 {
2887 dSP;
533c011a 2888 OP_4tree *oop = PL_op;
3280af22 2889 COP *ocurcop = PL_curcop;
d7afa7f5 2890 PAD *old_comppad;
c277df42 2891 SV *ret;
9b8ea7f3 2892 struct regexp *oreg = PL_reg_re;
9041c2e3 2893
c277df42 2894 n = ARG(scan);
533c011a 2895 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2896 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
d7afa7f5 2897 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 2898 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2899
8e5e9ebe
RGS
2900 {
2901 SV **before = SP;
2902 CALLRUNOPS(aTHX); /* Scalar context. */
2903 SPAGAIN;
2904 if (SP == before)
075aa684 2905 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
2906 else {
2907 ret = POPs;
2908 PUTBACK;
2909 }
2910 }
2911
0f5d15d6 2912 PL_op = oop;
d7afa7f5 2913 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 2914 PL_curcop = ocurcop;
c277df42 2915 if (logical) {
0f5d15d6
IZ
2916 if (logical == 2) { /* Postponed subexpression. */
2917 regexp *re;
22c35a8c 2918 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2919 re_cc_state state;
0f5d15d6 2920 CHECKPOINT cp, lastcp;
5b7ea690 2921 int toggleutf;
5835a535 2922 register SV *sv;
0f5d15d6 2923
5835a535
JH
2924 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2925 mg = mg_find(sv, PERL_MAGIC_qr);
2926 else if (SvSMAGICAL(ret)) {
2927 if (SvGMAGICAL(ret))
2928 sv_unmagic(ret, PERL_MAGIC_qr);
2929 else
2930 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 2931 }
5835a535 2932
0f5d15d6
IZ
2933 if (mg) {
2934 re = (regexp *)mg->mg_obj;
df0003d4 2935 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2936 }
2937 else {
2938 STRLEN len;
c06c673c 2939 const char *t = SvPV_const(ret, len);
0f5d15d6 2940 PMOP pm;
8c18bf38
AL
2941 char * const oprecomp = PL_regprecomp;
2942 const I32 osize = PL_regsize;
2943 const I32 onpar = PL_regnpar;
0f5d15d6 2944
5fcd1c1b 2945 Zero(&pm, 1, PMOP);
5b7ea690 2946 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
c06c673c 2947 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 2948 if (!(SvFLAGS(ret)
5835a535
JH
2949 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2950 | SVs_GMG)))
14befaf4
DM
2951 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2952 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2953 PL_regprecomp = oprecomp;
2954 PL_regsize = osize;
2955 PL_regnpar = onpar;
2956 }
2957 DEBUG_r(
9041c2e3 2958 PerlIO_printf(Perl_debug_log,
5332c881 2959 "Entering embedded \"%s%.60s%s%s\"\n",
0f5d15d6
IZ
2960 PL_colors[0],
2961 re->precomp,
2962 PL_colors[1],
2963 (strlen(re->precomp) > 60 ? "..." : ""))
2964 );
2965 state.node = next;
2966 state.prev = PL_reg_call_cc;
2967 state.cc = PL_regcc;
2968 state.re = PL_reg_re;
2969
2ab05381 2970 PL_regcc = 0;
9041c2e3 2971
0f5d15d6 2972 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2973 REGCP_SET(lastcp);
0f5d15d6
IZ
2974 cache_re(re);
2975 state.ss = PL_savestack_ix;
2976 *PL_reglastparen = 0;
a01268b5 2977 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2978 PL_reg_call_cc = &state;
2979 PL_reginput = locinput;
5b7ea690
JH
2980 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2981 ((re->reganch & ROPT_UTF8) != 0);
2982 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
2983
2984 /* XXXX This is too dramatic a measure... */
2985 PL_reg_maxiter = 0;
2986
0f5d15d6 2987 if (regmatch(re->program + 1)) {
2c914db6
IZ
2988 /* Even though we succeeded, we need to restore
2989 global variables, since we may be wrapped inside
2990 SUSPEND, thus the match may be not finished yet. */
2991
2992 /* XXXX Do this only if SUSPENDed? */
2993 PL_reg_call_cc = state.prev;
2994 PL_regcc = state.cc;
2995 PL_reg_re = state.re;
2996 cache_re(PL_reg_re);
5b7ea690 2997 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
2998
2999 /* XXXX This is too dramatic a measure... */
3000 PL_reg_maxiter = 0;
3001
3002 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
3003 ReREFCNT_dec(re);
3004 regcpblow(cp);
3005 sayYES;
3006 }
0f5d15d6 3007 ReREFCNT_dec(re);
02db2b7b 3008 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3009 regcppop();
3010 PL_reg_call_cc = state.prev;
3011 PL_regcc = state.cc;
3012 PL_reg_re = state.re;
d3790889 3013 cache_re(PL_reg_re);
5b7ea690 3014 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3015
3016 /* XXXX This is too dramatic a measure... */
3017 PL_reg_maxiter = 0;
3018
8e514ae6 3019 logical = 0;
0f5d15d6
IZ
3020 sayNO;
3021 }
c277df42 3022 sw = SvTRUE(ret);
0f5d15d6 3023 logical = 0;
a0ed51b3 3024 }
9b8ea7f3 3025 else {
3280af22 3026 sv_setsv(save_scalar(PL_replgv), ret);
9b8ea7f3
JH
3027 cache_re(oreg);
3028 }
c277df42
IZ
3029 break;
3030 }
a0d0e21e 3031 case OPEN:
c277df42 3032 n = ARG(scan); /* which paren pair */
3280af22
NIS
3033 PL_reg_start_tmp[n] = locinput;
3034 if (n > PL_regsize)
3035 PL_regsize = n;
a0d0e21e
LW
3036 break;
3037 case CLOSE:
c277df42 3038 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3039 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3040 PL_regendp[n] = locinput - PL_bostr;
eb160463 3041 if (n > (I32)*PL_reglastparen)
3280af22 3042 *PL_reglastparen = n;
a01268b5 3043 *PL_reglastcloseparen = n;
a0d0e21e 3044 break;
c277df42
IZ
3045 case GROUPP:
3046 n = ARG(scan); /* which paren pair */
eb160463 3047 sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3048 break;
3049 case IFTHEN:
2c2d71f5 3050 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
3051 if (sw)
3052 next = NEXTOPER(NEXTOPER(scan));
3053 else {
3054 next = scan + ARG(scan);
3055 if (OP(next) == IFTHEN) /* Fake one. */
3056 next = NEXTOPER(NEXTOPER(next));
3057 }
3058 break;
3059 case LOGICAL:
0f5d15d6 3060 logical = scan->flags;
c277df42 3061 break;
2ab05381
IZ
3062/*******************************************************************
3063 PL_regcc contains infoblock about the innermost (...)* loop, and
3064 a pointer to the next outer infoblock.
3065
3066 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3067
3068 1) After matching X, regnode for CURLYX is processed;
3069
9041c2e3 3070 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
3071 regmatch() recursively with the starting point at WHILEM node;
3072
3073 3) Each hit of WHILEM node tries to match A and Z (in the order
3074 depending on the current iteration, min/max of {min,max} and
3075 greediness). The information about where are nodes for "A"
3076 and "Z" is read from the infoblock, as is info on how many times "A"
3077 was already matched, and greediness.
3078
3079 4) After A matches, the same WHILEM node is hit again.
3080
3081 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3082 of the same pair. Thus when WHILEM tries to match Z, it temporarily
3083 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3084 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3085 of the external loop.
3086
3087 Currently present infoblocks form a tree with a stem formed by PL_curcc
3088 and whatever it mentions via ->next, and additional attached trees
3089 corresponding to temporarily unset infoblocks as in "5" above.
3090
9041c2e3 3091 In the following picture infoblocks for outer loop of
2ab05381
IZ
3092 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3093 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3094 infoblocks are drawn below the "reset" infoblock.
3095
3096 In fact in the picture below we do not show failed matches for Z and T
3097 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3098 more obvious *why* one needs to *temporary* unset infoblocks.]
3099
3100 Matched REx position InfoBlocks Comment
3101 (Y(A)*?Z)*?T x
3102 Y(A)*?Z)*?T x <- O
3103 Y (A)*?Z)*?T x <- O
3104 Y A)*?Z)*?T x <- O <- I
3105 YA )*?Z)*?T x <- O <- I
3106 YA A)*?Z)*?T x <- O <- I
3107 YAA )*?Z)*?T x <- O <- I
3108 YAA Z)*?T x <- O # Temporary unset I
3109 I
3110
3111 YAAZ Y(A)*?Z)*?T x <- O
3112 I
3113
3114 YAAZY (A)*?Z)*?T x <- O
3115 I
3116
3117 YAAZY A)*?Z)*?T x <- O <- I
3118 I
3119
3120 YAAZYA )*?Z)*?T x <- O <- I
3121 I
3122
3123 YAAZYA Z)*?T x <- O # Temporary unset I
3124 I,I
3125
3126 YAAZYAZ )*?T x <- O
3127 I,I
3128
3129 YAAZYAZ T x # Temporary unset O
3130 O
3131 I,I
3132
3133 YAAZYAZT x
3134 O
3135 I,I
3136 *******************************************************************/
a0d0e21e
LW
3137 case CURLYX: {
3138 CURCUR cc;
3280af22 3139 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
3140 /* No need to save/restore up to this paren */
3141 I32 parenfloor = scan->flags;
c277df42
IZ
3142
3143 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3144 next += ARG(next);
3280af22
NIS
3145 cc.oldcc = PL_regcc;
3146 PL_regcc = &cc;
cb434fcc
IZ
3147 /* XXXX Probably it is better to teach regpush to support
3148 parenfloor > PL_regsize... */
eb160463 3149 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc
IZ
3150 parenfloor = *PL_reglastparen; /* Pessimization... */
3151 cc.parenfloor = parenfloor;
a0d0e21e
LW
3152 cc.cur = -1;
3153 cc.min = ARG1(scan);
3154 cc.max = ARG2(scan);
c277df42 3155 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3156 cc.next = next;
3157 cc.minmod = minmod;
3158 cc.lastloc = 0;
3280af22 3159 PL_reginput = locinput;
a0d0e21e
LW
3160 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3161 regcpblow(cp);
3280af22 3162 PL_regcc = cc.oldcc;
4633a7c4 3163 saySAME(n);
a0d0e21e
LW
3164 }
3165 /* NOT REACHED */
3166 case WHILEM: {
3167 /*
3168 * This is really hard to understand, because after we match
3169 * what we're trying to match, we must make sure the rest of
2c2d71f5 3170 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3171 * to go back UP the parse tree by recursing ever deeper. And
3172 * if it fails, we have to reset our parent's current state
3173 * that we can try again after backing off.
3174 */
3175
c277df42 3176 CHECKPOINT cp, lastcp;
3280af22 3177 CURCUR* cc = PL_regcc;
c277df42 3178 char *lastloc = cc->lastloc; /* Detection of 0-len. */
6e00f9c6 3179 I32 cache_offset = 0, cache_bit = 0;
c277df42 3180
4633a7c4 3181 n = cc->cur + 1; /* how many we know we matched */
3280af22 3182 PL_reginput = locinput;
a0d0e21e 3183
c277df42 3184 DEBUG_r(
9041c2e3 3185 PerlIO_printf(Perl_debug_log,
91f3b821 3186 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3187 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3188 (long)n, (long)cc->min,
2797576d 3189 (long)cc->max, PTR2UV(cc))
c277df42 3190 );
4633a7c4 3191
a0d0e21e
LW
3192 /* If degenerate scan matches "", assume scan done. */
3193
579cf2c3 3194 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3195 PL_regcc = cc->oldcc;
2ab05381
IZ
3196 if (PL_regcc)
3197 ln = PL_regcc->cur;
c277df42 3198 DEBUG_r(
c3464db5
DD
3199 PerlIO_printf(Perl_debug_log,
3200 "%*s empty match detected, try continuation...\n",
3280af22 3201 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3202 );
a0d0e21e 3203 if (regmatch(cc->next))
4633a7c4 3204 sayYES;
2ab05381
IZ
3205 if (PL_regcc)
3206 PL_regcc->cur = ln;
3280af22 3207 PL_regcc = cc;
4633a7c4 3208 sayNO;
a0d0e21e
LW
3209 }
3210
3211 /* First just match a string of min scans. */
3212
3213 if (n < cc->min) {
3214 cc->cur = n;
3215 cc->lastloc = locinput;
4633a7c4
LW
3216 if (regmatch(cc->scan))
3217 sayYES;
3218 cc->cur = n - 1;
c277df42 3219 cc->lastloc = lastloc;
4633a7c4 3220 sayNO;
a0d0e21e
LW
3221 }
3222
2c2d71f5
JH
3223 if (scan->flags) {
3224 /* Check whether we already were at this position.
3225 Postpone detection until we know the match is not
3226 *that* much linear. */
3227 if (!PL_reg_maxiter) {
3228 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3229 PL_reg_leftiter = PL_reg_maxiter;
3230 }
3231 if (PL_reg_leftiter-- == 0) {
8c18bf38 3232 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3233 if (PL_reg_poscache) {
eb160463 3234 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3235 Renew(PL_reg_poscache, size, char);
3236 PL_reg_poscache_size = size;
3237 }
3238 Zero(PL_reg_poscache, size, char);
3239 }
3240 else {
3241 PL_reg_poscache_size = size;
cd7a8267 3242 Newxz(PL_reg_poscache, size, char);
2c2d71f5
JH
3243 }
3244 DEBUG_r(
3245 PerlIO_printf(Perl_debug_log,
3246 "%sDetected a super-linear match, switching on caching%s...\n",
3247 PL_colors[4], PL_colors[5])
3248 );
3249 }
3250 if (PL_reg_leftiter < 0) {
6e00f9c6 3251 cache_offset = locinput - PL_bostr;
2c2d71f5 3252
6e00f9c6
HS
3253 cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3254 + cache_offset * (scan->flags>>4);
3255 cache_bit = cache_offset % 8;
3256 cache_offset /= 8;
3257 if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
2c2d71f5
JH
3258 DEBUG_r(
3259 PerlIO_printf(Perl_debug_log,
3260 "%*s already tried at this position...\n",
3261 REPORT_CODE_OFF+PL_regindent*2, "")
3262 );
6e00f9c6
HS
3263 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3264 /* cache records success */
91513146
JH
3265 sayYES;
3266 else
6e00f9c6 3267 /* cache records failure */
91513146 3268 sayNO_SILENT;
2c2d71f5 3269 }
6e00f9c6 3270 PL_reg_poscache[cache_offset] |= (1<<cache_bit);
2c2d71f5
JH
3271 }
3272 }
3273
a0d0e21e
LW
3274 /* Prefer next over scan for minimal matching. */
3275
3276 if (cc->minmod) {
3280af22 3277 PL_regcc = cc->oldcc;
2ab05381
IZ
3278 if (PL_regcc)
3279 ln = PL_regcc->cur;
5f05dabc 3280 cp = regcppush(cc->parenfloor);
02db2b7b 3281 REGCP_SET(lastcp);
5f05dabc 3282 if (regmatch(cc->next)) {
c277df42 3283 regcpblow(cp);
6e00f9c6 3284 CACHEsayYES; /* All done. */
5f05dabc 3285 }
02db2b7b 3286 REGCP_UNWIND(lastcp);
5f05dabc 3287 regcppop();
2ab05381
IZ
3288 if (PL_regcc)
3289 PL_regcc->cur = ln;
3280af22 3290 PL_regcc = cc;
a0d0e21e 3291
c277df42 3292 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3293 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3294 && !(PL_reg_flags & RF_warned)) {
3295 PL_reg_flags |= RF_warned;
9014280d 3296 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3297 "Complex regular subexpression recursion",
3298 REG_INFTY - 1);
c277df42 3299 }
6e00f9c6 3300 CACHEsayNO;
c277df42 3301 }
a687059c 3302
c277df42 3303 DEBUG_r(
c3464db5
DD
3304 PerlIO_printf(Perl_debug_log,
3305 "%*s trying longer...\n",
3280af22 3306 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3307 );
a0d0e21e 3308 /* Try scanning more and see if it helps. */
3280af22 3309 PL_reginput = locinput;
a0d0e21e
LW
3310 cc->cur = n;
3311 cc->lastloc = locinput;
5f05dabc 3312 cp = regcppush(cc->parenfloor);
02db2b7b 3313 REGCP_SET(lastcp);
5f05dabc 3314 if (regmatch(cc->scan)) {
c277df42 3315 regcpblow(cp);
6e00f9c6 3316 CACHEsayYES;
5f05dabc 3317 }
02db2b7b 3318 REGCP_UNWIND(lastcp);
5f05dabc 3319 regcppop();
4633a7c4 3320 cc->cur = n - 1;
c277df42 3321 cc->lastloc = lastloc;
6e00f9c6 3322 CACHEsayNO;
a0d0e21e
LW
3323 }
3324
3325 /* Prefer scan over next for maximal matching. */
3326
3327 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3328 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3329 cc->cur = n;
3330 cc->lastloc = locinput;
02db2b7b 3331 REGCP_SET(lastcp);
5f05dabc 3332 if (regmatch(cc->scan)) {
c277df42 3333 regcpblow(cp);
6e00f9c6 3334 CACHEsayYES;
5f05dabc 3335 }
02db2b7b 3336 REGCP_UNWIND(lastcp);
a0d0e21e 3337 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3338 PL_reginput = locinput;
c277df42 3339 DEBUG_r(
c3464db5
DD
3340 PerlIO_printf(Perl_debug_log,
3341 "%*s failed, try continuation...\n",
3280af22 3342 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3343 );
3344 }
9041c2e3 3345 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3346 && !(PL_reg_flags & RF_warned)) {
3280af22 3347 PL_reg_flags |= RF_warned;
9014280d 3348 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3349 "Complex regular subexpression recursion",
3350 REG_INFTY - 1);
a0d0e21e
LW
3351 }
3352
3353 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3354 PL_regcc = cc->oldcc;
2ab05381
IZ
3355 if (PL_regcc)
3356 ln = PL_regcc->cur;
a0d0e21e 3357 if (regmatch(cc->next))
6e00f9c6 3358 CACHEsayYES;
2ab05381
IZ
3359 if (PL_regcc)
3360 PL_regcc->cur = ln;
3280af22 3361 PL_regcc = cc;
4633a7c4 3362 cc->cur = n - 1;
c277df42 3363 cc->lastloc = lastloc;
6e00f9c6 3364 CACHEsayNO;
a0d0e21e
LW
3365 }
3366 /* NOT REACHED */
9041c2e3 3367 case BRANCHJ:
c277df42
IZ
3368 next = scan + ARG(scan);
3369 if (next == scan)
3370 next = NULL;
3371 inner = NEXTOPER(NEXTOPER(scan));
3372 goto do_branch;
9041c2e3 3373 case BRANCH:
c277df42
IZ
3374 inner = NEXTOPER(scan);
3375 do_branch:
3376 {
c277df42
IZ
3377 c1 = OP(scan);
3378 if (OP(next) != c1) /* No choice. */
3379 next = inner; /* Avoid recursion. */
a0d0e21e 3380 else {
8c18bf38 3381 const I32 lastparen = *PL_reglastparen;
02db2b7b
IZ
3382 I32 unwind1;
3383 re_unwind_branch_t *uw;
3384
3385 /* Put unwinding data on stack */
3386 unwind1 = SSNEWt(1,re_unwind_branch_t);
3387 uw = SSPTRt(unwind1,re_unwind_branch_t);
3388 uw->prev = unwind;
3389 unwind = unwind1;
3390 uw->type = ((c1 == BRANCH)
3391 ? RE_UNWIND_BRANCH
3392 : RE_UNWIND_BRANCHJ);
3393 uw->lastparen = lastparen;
3394 uw->next = next;
3395 uw->locinput = locinput;
3396 uw->nextchr = nextchr;
3397#ifdef DEBUGGING
3398 uw->regindent = ++PL_regindent;
3399#endif
c277df42 3400
02db2b7b
IZ
3401 REGCP_SET(uw->lastcp);
3402
3403 /* Now go into the first branch */
3404 next = inner;
a687059c 3405 }
a0d0e21e
LW
3406 }
3407 break;
3408 case MINMOD:
3409 minmod = 1;
3410 break;
c277df42
IZ
3411 case CURLYM:
3412 {
00db4c45 3413 I32 l = 0;
c277df42 3414 CHECKPOINT lastcp;
9041c2e3 3415
c277df42 3416 /* We suppose that the next guy does not need
b035a42e 3417 backtracking: in particular, it is of constant non-zero length,
c277df42
IZ
3418 and has no parenths to influence future backrefs. */
3419 ln = ARG1(scan); /* min to match */
3420 n = ARG2(scan); /* max to match */
c277df42
IZ
3421 paren = scan->flags;
3422 if (paren) {
3280af22
NIS
3423 if (paren > PL_regsize)
3424 PL_regsize = paren;
eb160463 3425 if (paren > (I32)*PL_reglastparen)
3280af22 3426 *PL_reglastparen = paren;
c277df42 3427 }
dc45a647 3428 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3429 if (paren)
3430 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3431 PL_reginput = locinput;
c277df42
IZ
3432 if (minmod) {
3433 minmod = 0;
3434 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3435 sayNO;
3280af22 3436 locinput = PL_reginput;
cca55fe3 3437 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3438 regnode *text_node = next;
3439
cca55fe3 3440 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3441
cca55fe3 3442 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3443 else {
cca55fe3 3444 if (PL_regkind[(U8)OP(text_node)] == REF) {
c061b110
JH
3445 c1 = c2 = -1000;
3446 goto assume_ok_MM;
cca55fe3
JP
3447 }
3448 else { c1 = (U8)*STRING(text_node); }
af5decee 3449 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3450 c2 = PL_fold[c1];
af5decee 3451 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3452 c2 = PL_fold_locale[c1];
3453 else
3454 c2 = c1;
3455 }
a0ed51b3
LW
3456 }
3457 else
c277df42 3458 c1 = c2 = -1000;
cca55fe3 3459 assume_ok_MM:
02db2b7b 3460 REGCP_SET(lastcp);
b035a42e 3461 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
c277df42
IZ
3462 /* If it could work, try it. */
3463 if (c1 == -1000 ||
3280af22
NIS
3464 UCHARAT(PL_reginput) == c1 ||
3465 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3466 {
3467 if (paren) {
f31a99c8 3468 if (ln) {
cf93c79d
IZ
3469 PL_regstartp[paren] =
3470 HOPc(PL_reginput, -l) - PL_bostr;
3471 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3472 }
3473 else
cf93c79d 3474 PL_regendp[paren] = -1;
c277df42
IZ
3475 }
3476 if (regmatch(next))
3477 sayYES;
02db2b7b 3478 REGCP_UNWIND(lastcp);
c277df42
IZ
3479 }
3480 /* Couldn't or didn't -- move forward. */
3280af22 3481 PL_reginput = locinput;
c277df42
IZ
3482 if (regrepeat_hard(scan, 1, &l)) {
3483 ln++;
3280af22 3484 locinput = PL_reginput;
c277df42
IZ
3485 }
3486 else
3487 sayNO;
3488 }
a0ed51b3
LW
3489 }
3490 else {
c277df42 3491 n = regrepeat_hard(scan, n, &l);
3280af22 3492 locinput = PL_reginput;
c277df42 3493 DEBUG_r(
5c0ca799 3494 PerlIO_printf(Perl_debug_log,
faccc32b 3495 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3496 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3497 (IV) n, (IV)l)
c277df42
IZ
3498 );
3499 if (n >= ln) {
cca55fe3 3500 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3501 regnode *text_node = next;
3502
cca55fe3 3503 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3504
cca55fe3 3505 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3506 else {
cca55fe3 3507 if (PL_regkind[(U8)OP(text_node)] == REF) {
c061b110
JH
3508 c1 = c2 = -1000;
3509 goto assume_ok_REG;
cca55fe3
JP
3510 }
3511 else { c1 = (U8)*STRING(text_node); }
3512
af5decee 3513 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3514 c2 = PL_fold[c1];
af5decee 3515 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3516 c2 = PL_fold_locale[c1];
3517 else
3518 c2 = c1;
3519 }
a0ed51b3
LW
3520 }
3521 else
c277df42
IZ
3522 c1 = c2 = -1000;
3523 }
cca55fe3 3524 assume_ok_REG:
02db2b7b 3525 REGCP_SET(lastcp);
c277df42
IZ
3526 while (n >= ln) {
3527 /* If it could work, try it. */
3528 if (c1 == -1000 ||
3280af22
NIS
3529 UCHARAT(PL_reginput) == c1 ||
3530 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3531 {
3532 DEBUG_r(
c3464db5 3533 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3534 "%*s trying tail with n=%"IVdf"...\n",
3535 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3536 );
3537 if (paren) {
3538 if (n) {
cf93c79d
IZ
3539 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3540 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3541 }
a0ed51b3 3542 else
cf93c79d 3543 PL_regendp[paren] = -1;
c277df42 3544 }
a0ed51b3
LW
3545 if (regmatch(next))
3546 sayYES;
02db2b7b 3547 REGCP_UNWIND(lastcp);
a0ed51b3 3548 }
c277df42
IZ
3549 /* Couldn't or didn't -- back up. */
3550 n--;
dfe13c55 3551 locinput = HOPc(locinput, -l);
3280af22 3552 PL_reginput = locinput;
c277df42
IZ
3553 }
3554 }
3555 sayNO;
3556 break;
3557 }
3558 case CURLYN:
3559 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3560 if (paren > PL_regsize)
3561 PL_regsize = paren;
eb160463 3562 if (paren > (I32)*PL_reglastparen)
3280af22 3563 *PL_reglastparen = paren;
c277df42
IZ
3564 ln = ARG1(scan); /* min to match */
3565 n = ARG2(scan); /* max to match */
dc45a647 3566 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3567 goto repeat;
a0d0e21e 3568 case CURLY:
c277df42 3569 paren = 0;
a0d0e21e
LW
3570 ln = ARG1(scan); /* min to match */
3571 n = ARG2(scan); /* max to match */
dc45a647 3572 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3573 goto repeat;
3574 case STAR:
3575 ln = 0;
c277df42 3576 n = REG_INFTY;
a0d0e21e 3577 scan = NEXTOPER(scan);
c277df42 3578 paren = 0;
a0d0e21e
LW
3579 goto repeat;
3580 case PLUS:
c277df42
IZ
3581 ln = 1;
3582 n = REG_INFTY;
3583 scan = NEXTOPER(scan);
3584 paren = 0;
3585 repeat:
a0d0e21e
LW
3586 /*
3587 * Lookahead to avoid useless match attempts
3588 * when we know what character comes next.
3589 */
5f80c4cf
JP
3590
3591 /*
3592 * Used to only do .*x and .*?x, but now it allows
3593 * for )'s, ('s and (?{ ... })'s to be in the way
3594 * of the quantifier and the EXACT-like node. -- japhy
3595 */
3596
cca55fe3 3597 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3598 U8 *s;
3599 regnode *text_node = next;
3600
cca55fe3 3601 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3602
cca55fe3 3603 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3604 else {
cca55fe3 3605 if (PL_regkind[(U8)OP(text_node)] == REF) {
c061b110
JH
3606 c1 = c2 = -1000;
3607 goto assume_ok_easy;
cca55fe3
JP
3608 }
3609 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3610
3611 if (!UTF) {
3612 c2 = c1 = *s;
f65d3ee7 3613 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3614 c2 = PL_fold[c1];
f65d3ee7 3615 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3616 c2 = PL_fold_locale[c1];
1aa99e6b 3617 }
5f80c4cf 3618 else { /* UTF */
f65d3ee7 3619 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3620 STRLEN ulen1, ulen2;
a2a469f9
JH
3621 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3622 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
3623
3624 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3625 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3626
a2a469f9 3627 c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
872c91ae
JH
3628 ckWARN(WARN_UTF8) ?
3629 0 : UTF8_ALLOW_ANY);
a2a469f9 3630 c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
872c91ae
JH
3631 ckWARN(WARN_UTF8) ?
3632 0 : UTF8_ALLOW_ANY);
5f80c4cf
JP
3633 }
3634 else {
a2a469f9 3635 c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
872c91ae
JH
3636 ckWARN(WARN_UTF8) ?
3637 0 : UTF8_ALLOW_ANY);
5f80c4cf 3638 }
1aa99e6b
IH
3639 }
3640 }
bbce6d69 3641 }
a0d0e21e 3642 else
bbce6d69 3643 c1 = c2 = -1000;
cca55fe3 3644 assume_ok_easy:
3280af22 3645 PL_reginput = locinput;
a0d0e21e 3646 if (minmod) {
c277df42 3647 CHECKPOINT lastcp;
a0d0e21e
LW
3648 minmod = 0;
3649 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3650 sayNO;
a0ed51b3 3651 locinput = PL_reginput;
02db2b7b 3652 REGCP_SET(lastcp);
0fe9bf95 3653 if (c1 != -1000) {
1aa99e6b 3654 char *e; /* Should not check after this */
0fe9bf95 3655 char *old = locinput;
b2f2f093 3656 int count = 0;
0fe9bf95 3657
1aa99e6b 3658 if (n == REG_INFTY) {
0fe9bf95 3659 e = PL_regeol - 1;
1aa99e6b
IH
3660 if (do_utf8)
3661 while (UTF8_IS_CONTINUATION(*(U8*)e))
3662 e--;
3663 }
3664 else if (do_utf8) {
3665 int m = n - ln;
3666 for (e = locinput;
3667 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3668 e += UTF8SKIP(e);
3669 }
3670 else {
3671 e = locinput + n - ln;
3672 if (e >= PL_regeol)
3673 e = PL_regeol - 1;
3674 }
0fe9bf95
IZ
3675 while (1) {
3676 /* Find place 'next' could work */
1aa99e6b
IH
3677 if (!do_utf8) {
3678 if (c1 == c2) {
a8e8ab15
JH
3679 while (locinput <= e &&
3680 UCHARAT(locinput) != c1)
1aa99e6b
IH
3681 locinput++;
3682 } else {
9041c2e3 3683 while (locinput <= e
a8e8ab15
JH
3684 && UCHARAT(locinput) != c1
3685 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3686 locinput++;
3687 }
3688 count = locinput - old;
3689 }
3690 else {
1aa99e6b 3691 if (c1 == c2) {
8c18bf38 3692 STRLEN len;
872c91ae
JH
3693 /* count initialised to
3694 * utf8_distance(old, locinput) */
b2f2f093 3695 while (locinput <= e &&
872c91ae 3696 utf8n_to_uvchr((U8*)locinput,
a2a469f9 3697 UTF8_MAXBYTES, &len,
872c91ae 3698 ckWARN(WARN_UTF8) ?
eb160463 3699 0 : UTF8_ALLOW_ANY) != (UV)c1) {
1aa99e6b 3700 locinput += len;
b2f2f093
JH
3701 count++;
3702 }
1aa99e6b 3703 } else {
8c18bf38 3704 STRLEN len;
872c91ae
JH
3705 /* count initialised to
3706 * utf8_distance(old, locinput) */
b2f2f093 3707 while (locinput <= e) {
872c91ae 3708 UV c = utf8n_to_uvchr((U8*)locinput,
a2a469f9 3709 UTF8_MAXBYTES, &len,
872c91ae
JH
3710 ckWARN(WARN_UTF8) ?
3711 0 : UTF8_ALLOW_ANY);
eb160463 3712 if (c == (UV)c1 || c == (UV)c2)
1aa99e6b 3713 break;
b2f2f093
JH
3714 locinput += len;
3715 count++;
1aa99e6b
IH
3716 }
3717 }
0fe9bf95 3718 }
9041c2e3 3719 if (locinput > e)
0fe9bf95
IZ
3720 sayNO;
3721 /* PL_reginput == old now */
3722 if (locinput != old) {
3723 ln = 1; /* Did some */
1aa99e6b 3724 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3725 sayNO;
3726 }
3727 /* PL_reginput == locinput now */
29d1e993 3728 TRYPAREN(paren, ln, locinput);
0fe9bf95 3729 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3730 REGCP_UNWIND(lastcp);
0fe9bf95 3731 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3732 old = locinput;
3733 if (do_utf8)
3734 locinput += UTF8SKIP(locinput);
3735 else
3736 locinput++;
b2f2f093 3737 count = 1;
0fe9bf95
IZ
3738 }
3739 }
3740 else
c277df42 3741 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3742 UV c;
3743 if (c1 != -1000) {
3744 if (do_utf8)
872c91ae 3745 c = utf8n_to_uvchr((U8*)PL_reginput,
a2a469f9 3746 UTF8_MAXBYTES, 0,
872c91ae
JH
3747 ckWARN(WARN_UTF8) ?
3748 0 : UTF8_ALLOW_ANY);
1aa99e6b 3749 else
9041c2e3 3750 c = UCHARAT(PL_reginput);
2390ecbc 3751 /* If it could work, try it. */
eb160463 3752 if (c == (UV)c1 || c == (UV)c2)
2390ecbc 3753 {
5b7ea690 3754 TRYPAREN(paren, ln, PL_reginput);
2390ecbc
PP
3755 REGCP_UNWIND(lastcp);
3756 }
1aa99e6b 3757 }
a0d0e21e 3758 /* If it could work, try it. */
2390ecbc 3759 else if (c1 == -1000)
bbce6d69 3760 {
5b7ea690 3761 TRYPAREN(paren, ln, PL_reginput);
02db2b7b 3762 REGCP_UNWIND(lastcp);
bbce6d69 3763 }
c277df42 3764 /* Couldn't or didn't -- move forward. */
a0ed51b3 3765 PL_reginput = locinput;
a0d0e21e
LW
3766 if (regrepeat(scan, 1)) {
3767 ln++;
a0ed51b3
LW
3768 locinput = PL_reginput;
3769 }
3770 else
4633a7c4 3771 sayNO;
a0d0e21e
LW
3772 }
3773 }
3774 else {
c277df42 3775 CHECKPOINT lastcp;
a0d0e21e 3776 n = regrepeat(scan, n);
a0ed51b3 3777 locinput = PL_reginput;
22c35a8c 3778 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
15272685
HS
3779 ((!PL_multiline && OP(next) != MEOL) ||
3780 OP(next) == SEOL || OP(next) == EOS))
3781 {
a0d0e21e 3782 ln = n; /* why back off? */
1aeab75a
GS
3783 /* ...because $ and \Z can match before *and* after
3784 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3785 We should back off by one in this case. */
3786 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3787 ln--;
3788 }
02db2b7b 3789 REGCP_SET(lastcp);
c277df42 3790 if (paren) {
8fa7f367 3791 UV c = 0;
c277df42 3792 while (n >= ln) {
1aa99e6b
IH
3793 if (c1 != -1000) {
3794 if (do_utf8)
872c91ae 3795 c = utf8n_to_uvchr((U8*)PL_reginput,
a2a469f9 3796 UTF8_MAXBYTES, 0,
872c91ae
JH
3797 ckWARN(WARN_UTF8) ?
3798 0 : UTF8_ALLOW_ANY);
1aa99e6b 3799 else
9041c2e3 3800 c = UCHARAT(PL_reginput);
1aa99e6b 3801 }
c277df42 3802 /* If it could work, try it. */
eb160463 3803 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 3804 {
29d1e993 3805 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3806 REGCP_UNWIND(lastcp);
c277df42
IZ
3807 }
3808 /* Couldn't or didn't -- back up. */
3809 n--;
dfe13c55 3810 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3811 }
a0ed51b3
LW
3812 }
3813 else {
8fa7f367 3814 UV c = 0;
c277df42 3815 while (n >= ln) {
1aa99e6b
IH
3816 if (c1 != -1000) {
3817 if (do_utf8)
872c91ae 3818 c = utf8n_to_uvchr((U8*)PL_reginput,
a2a469f9 3819 UTF8_MAXBYTES, 0,
872c91ae
JH
3820 ckWARN(WARN_UTF8) ?
3821 0 : UTF8_ALLOW_ANY);
1aa99e6b 3822 else
9041c2e3 3823 c = UCHARAT(PL_reginput);
1aa99e6b 3824 }
c277df42 3825 /* If it could work, try it. */
eb160463 3826 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 3827 {
29d1e993 3828 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3829 REGCP_UNWIND(lastcp);
c277df42
IZ
3830 }
3831 /* Couldn't or didn't -- back up. */
3832 n--;
dfe13c55 3833 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3834 }
a0d0e21e
LW
3835 }
3836 }
4633a7c4 3837 sayNO;
c277df42 3838 break;
a0d0e21e 3839 case END:
0f5d15d6
IZ
3840 if (PL_reg_call_cc) {
3841 re_cc_state *cur_call_cc = PL_reg_call_cc;
3842 CURCUR *cctmp = PL_regcc;
3843 regexp *re = PL_reg_re;
3844 CHECKPOINT cp, lastcp;
3845
3846 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3847 REGCP_SET(lastcp);
0f5d15d6
IZ
3848 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3849 the caller. */
3850 PL_reginput = locinput; /* Make position available to
3851 the callcc. */
3852 cache_re(PL_reg_call_cc->re);
3853 PL_regcc = PL_reg_call_cc->cc;
3854 PL_reg_call_cc = PL_reg_call_cc->prev;
3855 if (regmatch(cur_call_cc->node)) {
3856 PL_reg_call_cc = cur_call_cc;
3857 regcpblow(cp);
3858 sayYES;
3859 }
02db2b7b 3860 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3861 regcppop();
3862 PL_reg_call_cc = cur_call_cc;
3863 PL_regcc = cctmp;
3864 PL_reg_re = re;
3865 cache_re(re);
3866
3867 DEBUG_r(
3868 PerlIO_printf(Perl_debug_log,
3869 "%*s continuation failed...\n",
3870 REPORT_CODE_OFF+PL_regindent*2, "")
3871 );
7821416a 3872 sayNO_SILENT;
0f5d15d6 3873 }
7821416a
IZ
3874 if (locinput < PL_regtill) {
3875 DEBUG_r(PerlIO_printf(Perl_debug_log,
3876 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3877 PL_colors[4],
3878 (long)(locinput - PL_reg_starttry),
3879 (long)(PL_regtill - PL_reg_starttry),
3880 PL_colors[5]));
3881 sayNO_FINAL; /* Cannot match: too short. */
3882 }
3883 PL_reginput = locinput; /* put where regtry can find it */
3884 sayYES_FINAL; /* Success! */
7e5428c5 3885 case SUCCEED:
3280af22 3886 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3887 sayYES_LOUD; /* Success! */
c277df42
IZ
3888 case SUSPEND:
3889 n = 1;
9fe1d20c 3890 PL_reginput = locinput;
9041c2e3 3891 goto do_ifmatch;
a0d0e21e 3892 case UNLESSM:
c277df42 3893 n = 0;
a0ed51b3 3894 if (scan->flags) {
efb30f32
HS
3895 s = HOPBACKc(locinput, scan->flags);
3896 if (!s)
3897 goto say_yes;
3898 PL_reginput = s;
a0ed51b3
LW
3899 }
3900 else
3901 PL_reginput = locinput;
c277df42
IZ
3902 goto do_ifmatch;
3903 case IFMATCH:
3904 n = 1;
a0ed51b3 3905 if (scan->flags) {
efb30f32
HS
3906 s = HOPBACKc(locinput, scan->flags);
3907 if (!s)
3908 goto say_no;
3909 PL_reginput = s;
a0ed51b3
LW
3910 }
3911 else
3912 PL_reginput = locinput;
3913
c277df42 3914 do_ifmatch:
c277df42
IZ
3915 inner = NEXTOPER(NEXTOPER(scan));
3916 if (regmatch(inner) != n) {
3917 say_no:
3918 if (logical) {
3919 logical = 0;
3920 sw = 0;
3921 goto do_longjump;
a0ed51b3
LW
3922 }
3923 else
c277df42
IZ
3924 sayNO;
3925 }
3926 say_yes:
3927 if (logical) {
3928 logical = 0;
3929 sw = 1;
3930 }
fe44a5e8 3931 if (OP(scan) == SUSPEND) {
3280af22 3932 locinput = PL_reginput;
565764a8 3933 nextchr = UCHARAT(locinput);
fe44a5e8 3934 }
c277df42
IZ
3935 /* FALL THROUGH. */
3936 case LONGJMP:
3937 do_longjump:
3938 next = scan + ARG(scan);
3939 if (next == scan)
3940 next = NULL;
a0d0e21e
LW
3941 break;
3942 default:
b900a521 3943 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3944 PTR2UV(scan), OP(scan));
cea2e8a9 3945 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3946 }
02db2b7b 3947 reenter:
a0d0e21e
LW
3948 scan = next;
3949 }
a687059c 3950
a0d0e21e
LW
3951 /*
3952 * We get here only if there's trouble -- normally "case END" is
3953 * the terminating point.
3954 */
cea2e8a9 3955 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3956 /*NOTREACHED*/
4633a7c4
LW
3957 sayNO;
3958
7821416a
IZ
3959yes_loud:
3960 DEBUG_r(
3961 PerlIO_printf(Perl_debug_log,
3962 "%*s %scould match...%s\n",
3963 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3964 );
3965 goto yes;
3966yes_final:
3967 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3968 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3969yes:
3970#ifdef DEBUGGING
3280af22 3971 PL_regindent--;
4633a7c4 3972#endif
02db2b7b
IZ
3973
3974#if 0 /* Breaks $^R */
3975 if (unwind)
3976 regcpblow(firstcp);
3977#endif
4633a7c4
LW
3978 return 1;
3979
3980no:
7821416a
IZ
3981 DEBUG_r(
3982 PerlIO_printf(Perl_debug_log,
3983 "%*s %sfailed...%s\n",
3984 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3985 );
3986 goto do_no;
3987no_final:
3988do_no:
02db2b7b
IZ
3989 if (unwind) {
3990 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3991
3992 switch (uw->type) {
3993 case RE_UNWIND_BRANCH:
3994 case RE_UNWIND_BRANCHJ:
3995 {
3996 re_unwind_branch_t *uwb = &(uw->branch);
8c18bf38 3997 const I32 lastparen = uwb->lastparen;
9041c2e3 3998
02db2b7b
IZ
3999 REGCP_UNWIND(uwb->lastcp);
4000 for (n = *PL_reglastparen; n > lastparen; n--)
4001 PL_regendp[n] = -1;
4002 *PL_reglastparen = n;
4003 scan = next = uwb->next;
9041c2e3
NIS
4004 if ( !scan ||
4005 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
4006 ? BRANCH : BRANCHJ) ) { /* Failure */
4007 unwind = uwb->prev;
4008#ifdef DEBUGGING
4009 PL_regindent--;
4010#endif
4011 goto do_no;
4012 }
4013 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4014 if ((n = (uwb->type == RE_UNWIND_BRANCH
4015 ? NEXT_OFF(next) : ARG(next))))
4016 next += n;
4017 else
4018 next = NULL; /* XXXX Needn't unwinding in this case... */
4019 uwb->next = next;
4020 next = NEXTOPER(scan);
4021 if (uwb->type == RE_UNWIND_BRANCHJ)
4022 next = NEXTOPER(next);
4023 locinput = uwb->locinput;
4024 nextchr = uwb->nextchr;
4025#ifdef DEBUGGING
4026 PL_regindent = uwb->regindent;
4027#endif
4028
4029 goto reenter;
4030 }
4031 /* NOT REACHED */
4032 default:
4033 Perl_croak(aTHX_ "regexp unwind memory corruption");
4034 }
4035 /* NOT REACHED */
4036 }
4633a7c4 4037#ifdef DEBUGGING
3280af22 4038 PL_regindent--;
4633a7c4 4039#endif
a0d0e21e 4040 return 0;
a687059c
LW
4041}
4042
4043/*
4044 - regrepeat - repeatedly match something simple, report how many
4045 */
4046/*
4047 * [This routine now assumes that it will only match on things of length 1.
4048 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4049 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4050 */
76e3520e 4051STATIC I32
8c18bf38 4052S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4053{
a0d0e21e 4054 register char *scan;
a0d0e21e 4055 register I32 c;
3280af22 4056 register char *loceol = PL_regeol;
a0ed51b3 4057 register I32 hardcount = 0;
53c4c00c 4058 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4059
3280af22 4060 scan = PL_reginput;
faf11cac
HS
4061 if (max == REG_INFTY)
4062 max = I32_MAX;
4063 else if (max < loceol - scan)
a0d0e21e 4064 loceol = scan + max;
a0d0e21e 4065 switch (OP(p)) {
22c35a8c 4066 case REG_ANY:
1aa99e6b 4067 if (do_utf8) {
ffc61ed2 4068 loceol = PL_regeol;
1aa99e6b 4069 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4070 scan += UTF8SKIP(scan);
4071 hardcount++;
4072 }
4073 } else {
4074 while (scan < loceol && *scan != '\n')
4075 scan++;
a0ed51b3
LW
4076 }
4077 break;
ffc61ed2 4078 case SANY:
def8e4ea
JH
4079 if (do_utf8) {
4080 loceol = PL_regeol;
a0804c9e 4081 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4082 scan += UTF8SKIP(scan);
4083 hardcount++;
4084 }
4085 }
4086 else
4087 scan = loceol;
a0ed51b3 4088 break;
f33976b4
DB
4089 case CANY:
4090 scan = loceol;
4091 break;
bbce6d69 4092 case EXACT: /* length of string is 1 */
cd439c50 4093 c = (U8)*STRING(p);
bbce6d69 4094 while (scan < loceol && UCHARAT(scan) == c)
4095 scan++;
4096 break;
4097 case EXACTF: /* length of string is 1 */
cd439c50 4098 c = (U8)*STRING(p);
bbce6d69 4099 while (scan < loceol &&
22c35a8c 4100 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4101 scan++;
4102 break;
4103 case EXACTFL: /* length of string is 1 */
3280af22 4104 PL_reg_flags |= RF_tainted;
cd439c50 4105 c = (U8)*STRING(p);
bbce6d69 4106 while (scan < loceol &&
22c35a8c 4107 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4108 scan++;
4109 break;
4110 case ANYOF:
ffc61ed2
JH
4111 if (do_utf8) {
4112 loceol = PL_regeol;
338501c1
JH
4113 while (hardcount < max && scan < loceol &&
4114 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4115 scan += UTF8SKIP(scan);
4116 hardcount++;
4117 }
4118 } else {
7d3e948e 4119 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4120 scan++;
4121 }
a0d0e21e
LW
4122 break;
4123 case ALNUM:
1aa99e6b 4124 if (do_utf8) {
ffc61ed2 4125 loceol = PL_regeol;
fab1f349 4126 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4127 while (hardcount < max && scan < loceol &&
3568d838 4128 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4129 scan += UTF8SKIP(scan);
4130 hardcount++;
4131 }
4132 } else {
4133 while (scan < loceol && isALNUM(*scan))
4134 scan++;
a0ed51b3
LW
4135 }
4136 break;
bbce6d69 4137 case ALNUML:
3280af22 4138 PL_reg_flags |= RF_tainted;
1aa99e6b 4139 if (do_utf8) {
ffc61ed2 4140 loceol = PL_regeol;
1aa99e6b
IH
4141 while (hardcount < max && scan < loceol &&
4142 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4143 scan += UTF8SKIP(scan);
4144 hardcount++;
4145 }
4146 } else {
4147 while (scan < loceol && isALNUM_LC(*scan))
4148 scan++;
a0ed51b3
LW
4149 }
4150 break;
a0d0e21e 4151 case NALNUM:
1aa99e6b 4152 if (do_utf8) {
ffc61ed2 4153 loceol = PL_regeol;
fab1f349 4154 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4155 while (hardcount < max && scan < loceol &&
3568d838 4156 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4157 scan += UTF8SKIP(scan);
4158 hardcount++;
4159 }
4160 } else {
4161 while (scan < loceol && !isALNUM(*scan))
4162 scan++;
a0ed51b3
LW
4163 }
4164 break;
bbce6d69 4165 case NALNUML:
3280af22 4166 PL_reg_flags |= RF_tainted;
1aa99e6b 4167 if (do_utf8) {
ffc61ed2 4168 loceol = PL_regeol;
1aa99e6b
IH
4169 while (hardcount < max && scan < loceol &&
4170 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4171 scan += UTF8SKIP(scan);
4172 hardcount++;
4173 }
4174 } else {
4175 while (scan < loceol && !isALNUM_LC(*scan))
4176 scan++;
a0ed51b3
LW
4177 }
4178 break;
a0d0e21e 4179 case SPACE:
1aa99e6b 4180 if (do_utf8) {
ffc61ed2 4181 loceol = PL_regeol;
fab1f349 4182 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4183 while (hardcount < max && scan < loceol &&
3568d838
JH
4184 (*scan == ' ' ||
4185 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4186 scan += UTF8SKIP(scan);
4187 hardcount++;
4188 }
4189 } else {
4190 while (scan < loceol && isSPACE(*scan))
4191 scan++;
a0ed51b3
LW
4192 }
4193 break;
bbce6d69 4194 case SPACEL:
3280af22 4195 PL_reg_flags |= RF_tainted;
1aa99e6b 4196 if (do_utf8) {
ffc61ed2 4197 loceol = PL_regeol;
1aa99e6b 4198 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4199 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4200 scan += UTF8SKIP(scan);
4201 hardcount++;
4202 }
4203 } else {
4204 while (scan < loceol && isSPACE_LC(*scan))
4205 scan++;
a0ed51b3
LW
4206 }
4207 break;
a0d0e21e 4208 case NSPACE:
1aa99e6b 4209 if (do_utf8) {
ffc61ed2 4210 loceol = PL_regeol;
fab1f349 4211 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4212 while (hardcount < max && scan < loceol &&
3568d838
JH
4213 !(*scan == ' ' ||
4214 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4215 scan += UTF8SKIP(scan);
4216 hardcount++;
4217 }
4218 } else {
4219 while (scan < loceol && !isSPACE(*scan))
4220 scan++;
4221 break;
a0ed51b3 4222 }
bbce6d69 4223 case NSPACEL:
3280af22 4224 PL_reg_flags |= RF_tainted;
1aa99e6b 4225 if (do_utf8) {
ffc61ed2 4226 loceol = PL_regeol;
1aa99e6b 4227 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4228 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4229 scan += UTF8SKIP(scan);
4230 hardcount++;
4231 }
4232 } else {
4233 while (scan < loceol && !isSPACE_LC(*scan))
4234 scan++;
a0ed51b3
LW
4235 }
4236 break;
a0d0e21e 4237 case DIGIT:
1aa99e6b 4238 if (do_utf8) {
ffc61ed2 4239 loceol = PL_regeol;
fab1f349 4240 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4241 while (hardcount < max && scan < loceol &&
3568d838 4242 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4243 scan += UTF8SKIP(scan);
4244 hardcount++;
4245 }
4246 } else {
4247 while (scan < loceol && isDIGIT(*scan))
4248 scan++;
a0ed51b3
LW
4249 }
4250 break;
a0d0e21e 4251 case NDIGIT:
1aa99e6b 4252 if (do_utf8) {
ffc61ed2 4253 loceol = PL_regeol;
fab1f349 4254 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4255 while (hardcount < max && scan < loceol &&
3568d838 4256 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4257 scan += UTF8SKIP(scan);
4258 hardcount++;
4259 }
4260 } else {
4261 while (scan < loceol && !isDIGIT(*scan))
4262 scan++;
a0ed51b3
LW
4263 }
4264 break;
a0d0e21e
LW
4265 default: /* Called on something of 0 width. */
4266 break; /* So match right here or not at all. */
4267 }
a687059c 4268
a0ed51b3
LW
4269 if (hardcount)
4270 c = hardcount;
4271 else
4272 c = scan - PL_reginput;
3280af22 4273 PL_reginput = scan;
a687059c 4274
9041c2e3 4275 DEBUG_r(
c277df42
IZ
4276 {
4277 SV *prop = sv_newmortal();
4278
47a91a97 4279 regprop(prop, (regnode *)p);
9041c2e3
NIS
4280 PerlIO_printf(Perl_debug_log,
4281 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
fdac8c4b 4282 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4283 });
9041c2e3 4284
a0d0e21e 4285 return(c);
a687059c
LW
4286}
4287
4288/*
c277df42 4289 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4290 *
b035a42e 4291 * The repeater is supposed to have constant non-zero length.
c277df42
IZ
4292 */
4293
76e3520e 4294STATIC I32
cea2e8a9 4295S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4296{
b7953727 4297 register char *scan = Nullch;
c277df42 4298 register char *start;
3280af22 4299 register char *loceol = PL_regeol;
a0ed51b3 4300 I32 l = 0;
708e3b05 4301 I32 count = 0, res = 1;
a0ed51b3
LW
4302
4303 if (!max)
4304 return 0;
c277df42 4305
3280af22 4306 start = PL_reginput;
53c4c00c 4307 if (PL_reg_match_utf8) {
708e3b05 4308 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4309 if (!count++) {
4310 l = 0;
4311 while (start < PL_reginput) {
4312 l++;
4313 start += UTF8SKIP(start);
4314 }
4315 *lp = l;
4316 if (l == 0)
4317 return max;
4318 }
4319 if (count == max)
4320 return count;
4321 }
4322 }
4323 else {
708e3b05 4324 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4325 if (!count++) {
4326 *lp = l = PL_reginput - start;
4327 if (max != REG_INFTY && l*max < loceol - scan)
4328 loceol = scan + l*max;
4329 if (l == 0)
4330 return max;
c277df42
IZ
4331 }
4332 }
4333 }
708e3b05 4334 if (!res)
3280af22 4335 PL_reginput = scan;
9041c2e3 4336
a0ed51b3 4337 return count;
c277df42
IZ
4338}
4339
4340/*
ffc61ed2
JH
4341- regclass_swash - prepare the utf8 swash
4342*/
4343
4344SV *
9e55ce06 4345Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4346{
9e55ce06
JH
4347 SV *sw = NULL;
4348 SV *si = NULL;
4349 SV *alt = NULL;
ffc61ed2
JH
4350
4351 if (PL_regdata && PL_regdata->count) {
8c18bf38 4352 const U32 n = ARG(node);
ffc61ed2
JH
4353
4354 if (PL_regdata->what[n] == 's') {
c6d79d47
AL
4355 SV * const rv = (SV*)PL_regdata->data[n];
4356 AV * const av = (AV*)SvRV((SV*)rv);
339a2a6a 4357 SV **const ary = AvARRAY(av);
9e55ce06 4358 SV **a, **b;
9041c2e3 4359
9e55ce06
JH
4360 /* See the end of regcomp.c:S_reglass() for
4361 * documentation of these array elements. */
4362
b11f357e
JH
4363 si = *ary;
4364 a = SvTYPE(ary[1]) == SVt_RV ? &ary[1] : 0;
4365 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4366
ffc61ed2
JH
4367 if (a)
4368 sw = *a;
4369 else if (si && doinit) {
4370 sw = swash_init("utf8", "", si, 1, 0);
4371 (void)av_store(av, 1, sw);
4372 }
9e55ce06
JH
4373 if (b)
4374 alt = *b;
ffc61ed2
JH
4375 }
4376 }
4377
9e55ce06
JH
4378 if (listsvp)
4379 *listsvp = si;
4380 if (altsvp)
4381 *altsvp = alt;
ffc61ed2
JH
4382
4383 return sw;
4384}
4385
4386/*
ba7b4546 4387 - reginclass - determine if a character falls into a character class
832705d4
JH
4388
4389 The n is the ANYOF regnode, the p is the target string, lenp
4390 is pointer to the maximum length of how far to go in the p
4391 (if the lenp is zero, UTF8SKIP(p) is used),
4392 do_utf8 tells whether the target string is in UTF-8.
4393
bbce6d69 4394 */
4395
76e3520e 4396STATIC bool
8c18bf38 4397S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4398{
8c18bf38 4399 const char flags = ANYOF_FLAGS(n);
bbce6d69 4400 bool match = FALSE;
cc07378b 4401 UV c = *p;
ae9ddab8 4402 STRLEN len = 0;
9e55ce06 4403 STRLEN plen;
1aa99e6b 4404
ae9ddab8 4405 if (do_utf8 && !UTF8_IS_INVARIANT(c))
47a91a97 4406 c = utf8n_to_uvchr((U8 *)p, UTF8_MAXBYTES, &len,
ae9ddab8 4407 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
bbce6d69 4408
0f0076b4 4409 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4410 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4411 if (lenp)
4412 *lenp = 0;
ffc61ed2 4413 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4414 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4415 match = TRUE;
bbce6d69 4416 }
3568d838 4417 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4418 match = TRUE;
ffc61ed2 4419 if (!match) {
9e55ce06 4420 AV *av;
47a91a97 4421 SV * const sw = regclass_swash((regnode *)n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4422
4423 if (sw) {
47a91a97 4424 if (swash_fetch(sw, (U8 *)p, do_utf8))
ffc61ed2
JH
4425 match = TRUE;
4426 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4427 if (!match && lenp && av) {
4428 I32 i;
9e55ce06 4429 for (i = 0; i <= av_len(av); i++) {
c6d79d47 4430 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4431 STRLEN len;
c6d79d47 4432 const char * const s = SvPV_const(sv, len);
9e55ce06 4433
061b10df 4434 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4435 *lenp = len;
4436 match = TRUE;
4437 break;
4438 }
4439 }
4440 }
4441 if (!match) {
a2a469f9 4442 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4443 STRLEN tmplen;
4444
47a91a97 4445 to_utf8_fold((U8 *)p, tmpbuf, &tmplen);
9e55ce06
JH
4446 if (swash_fetch(sw, tmpbuf, do_utf8))
4447 match = TRUE;
4448 }
ffc61ed2
JH
4449 }
4450 }
bbce6d69 4451 }
9e55ce06 4452 if (match && lenp && *lenp == 0)
0f0076b4 4453 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4454 }
1aa99e6b 4455 if (!match && c < 256) {
ffc61ed2
JH
4456 if (ANYOF_BITMAP_TEST(n, c))
4457 match = TRUE;
4458 else if (flags & ANYOF_FOLD) {
eb160463 4459 U8 f;
a0ed51b3 4460
ffc61ed2
JH
4461 if (flags & ANYOF_LOCALE) {
4462 PL_reg_flags |= RF_tainted;
4463 f = PL_fold_locale[c];
4464 }
4465 else
4466 f = PL_fold[c];
4467 if (f != c && ANYOF_BITMAP_TEST(n, f))
4468 match = TRUE;
4469 }
4470
4471 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4472 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4473 if (
4474 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4475 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4476 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4477 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4478 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4479 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4480 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4481 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4482 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4483 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4484 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4485 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4486 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4487 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4488 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4489 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4490 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4491 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4492 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4493 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4494 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4495 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4496 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4497 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4498 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4499 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4500 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4501 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4502 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4503 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4504 ) /* How's that for a conditional? */
4505 {
4506 match = TRUE;
4507 }
a0ed51b3 4508 }
a0ed51b3
LW
4509 }
4510
a0ed51b3
LW
4511 return (flags & ANYOF_INVERT) ? !match : match;
4512}
161b471a 4513
dfe13c55 4514STATIC U8 *
cea2e8a9 4515S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4516{
1aa99e6b
IH
4517 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4518}
4519
4520STATIC U8 *
4521S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4522{
a0ed51b3 4523 if (off >= 0) {
1aa99e6b 4524 while (off-- && s < lim) {
ffc61ed2 4525 /* XXX could check well-formedness here */
a0ed51b3 4526 s += UTF8SKIP(s);
ffc61ed2 4527 }
a0ed51b3
LW
4528 }
4529 else {
4530 while (off++) {
1aa99e6b 4531 if (s > lim) {
a0ed51b3 4532 s--;
ffc61ed2 4533 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4534 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4535 s--;
ffc61ed2
JH
4536 }
4537 /* XXX could check well-formedness here */
a0ed51b3
LW
4538 }
4539 }
4540 }
4541 return s;
4542}
161b471a 4543
dfe13c55 4544STATIC U8 *
1aa99e6b 4545S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4546{
1aa99e6b
IH
4547 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4548}
4549
4550STATIC U8 *
4551S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4552{
4553 if (off >= 0) {
1aa99e6b 4554 while (off-- && s < lim) {
ffc61ed2 4555 /* XXX could check well-formedness here */
a0ed51b3 4556 s += UTF8SKIP(s);
ffc61ed2 4557 }
a0ed51b3
LW
4558 if (off >= 0)
4559 return 0;
4560 }
4561 else {
4562 while (off++) {
1aa99e6b 4563 if (s > lim) {
a0ed51b3 4564 s--;
ffc61ed2 4565 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4566 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4567 s--;
ffc61ed2
JH
4568 }
4569 /* XXX could check well-formedness here */
a0ed51b3
LW
4570 }
4571 else
4572 break;
4573 }
4574 if (off <= 0)
4575 return 0;
4576 }
4577 return s;
4578}
51371543 4579
51371543 4580static void
acfe0abc 4581restore_pos(pTHX_ void *arg)
51371543 4582{
a2592645 4583 PERL_UNUSED_ARG(arg);
51371543
GS
4584 if (PL_reg_eval_set) {
4585 if (PL_reg_oldsaved) {
4586 PL_reg_re->subbeg = PL_reg_oldsaved;
4587 PL_reg_re->sublen = PL_reg_oldsavedlen;
4588 RX_MATCH_COPIED_on(PL_reg_re);
4589 }
4590 PL_reg_magic->mg_len = PL_reg_oldpos;
4591 PL_reg_eval_set = 0;
4592 PL_curpm = PL_reg_oldcurpm;
4593 }
4594}
33b8afdf
JH
4595
4596STATIC void
4597S_to_utf8_substr(pTHX_ register regexp *prog)
4598{
33b8afdf 4599 if (prog->float_substr && !prog->float_utf8) {
a2592645 4600 SV* sv;
04851bb3 4601 prog->float_utf8 = sv = newSVsv(prog->float_substr);
33b8afdf
JH
4602 sv_utf8_upgrade(sv);
4603 if (SvTAIL(prog->float_substr))
4604 SvTAIL_on(sv);
4605 if (prog->float_substr == prog->check_substr)
4606 prog->check_utf8 = sv;
4607 }
4608 if (prog->anchored_substr && !prog->anchored_utf8) {
a2592645 4609 SV* sv;
04851bb3 4610 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
33b8afdf
JH
4611 sv_utf8_upgrade(sv);
4612 if (SvTAIL(prog->anchored_substr))
4613 SvTAIL_on(sv);
4614 if (prog->anchored_substr == prog->check_substr)
4615 prog->check_utf8 = sv;
4616 }
4617}
4618
4619STATIC void
4620S_to_byte_substr(pTHX_ register regexp *prog)
4621{
33b8afdf 4622 if (prog->float_utf8 && !prog->float_substr) {
a2592645 4623 SV* sv;
04851bb3 4624 prog->float_substr = sv = newSVsv(prog->float_utf8);
33b8afdf
JH
4625 if (sv_utf8_downgrade(sv, TRUE)) {
4626 if (SvTAIL(prog->float_utf8))
4627 SvTAIL_on(sv);
4628 } else {
4629 SvREFCNT_dec(sv);
4630 prog->float_substr = sv = &PL_sv_undef;
4631 }
4632 if (prog->float_utf8 == prog->check_utf8)
4633 prog->check_substr = sv;
4634 }
4635 if (prog->anchored_utf8 && !prog->anchored_substr) {
a2592645 4636 SV* sv;
04851bb3 4637 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
33b8afdf
JH
4638 if (sv_utf8_downgrade(sv, TRUE)) {
4639 if (SvTAIL(prog->anchored_utf8))
4640 SvTAIL_on(sv);
4641 } else {
4642 SvREFCNT_dec(sv);
4643 prog->anchored_substr = sv = &PL_sv_undef;
4644 }
4645 if (prog->anchored_utf8 == prog->check_utf8)
4646 prog->check_substr = sv;
4647 }
4648}
d8294a4d
NC
4649
4650/*
4651 * Local variables:
4652 * c-indentation-style: bsd
4653 * c-basic-offset: 4
4654 * indent-tabs-mode: t
4655 * End:
4656 *
4657 * ex: set ts=8 sts=4 sw=4 noet:
4658 */