This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate:
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
40d34c0d
SB
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
10 * a regular expression.
bb3580c7
NC
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
40d34c0d
SB
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
e6906430 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
04851bb3 81 **** 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
dfe13c55
GS
119#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
120#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
121#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
122#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
123#define HOPc(pos,off) ((char*)HOP(pos,off))
124#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 125
efb30f32 126#define HOPBACK(pos, off) ( \
e54858b0 127 (PL_reg_match_utf8) \
efb30f32
HS
128 ? reghopmaybe((U8*)pos, -off) \
129 : (pos - off >= PL_bostr) \
130 ? (U8*)(pos - off) \
131 : (U8*)NULL \
132)
133#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
134
1aa99e6b
IH
135#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
136#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
137#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
138#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
139#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
140#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
141
fab1f349
AL
142#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
143 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
144#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
145#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
146#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
147#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 148
5f80c4cf 149/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
150#define JUMPABLE(rn) ( \
151 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
152 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
153 OP(rn) == PLUS || OP(rn) == MINMOD || \
154 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
155)
156
cca55fe3
JP
157#define HAS_TEXT(rn) ( \
158 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
159)
e2d8ce26 160
a84d97b6
HS
161/*
162 Search for mandatory following text node; for lookahead, the text must
163 follow but for lookbehind (rn->flags != 0) we skip to the next step.
164*/
cca55fe3 165#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 166 while (JUMPABLE(rn)) \
a84d97b6 167 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 168 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
169 else if (OP(rn) == PLUS) \
170 rn = NEXTOPER(rn); \
a84d97b6
HS
171 else if (OP(rn) == IFMATCH) \
172 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 173 else rn += NEXT_OFF(rn); \
5f80c4cf 174} STMT_END
74750237 175
acfe0abc 176static void restore_pos(pTHX_ void *arg);
51371543 177
76e3520e 178STATIC CHECKPOINT
cea2e8a9 179S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 180{
8c18bf38 181 const int retval = PL_savestack_ix;
b1ce53c5 182#define REGCP_PAREN_ELEMS 4
8c18bf38 183 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
184 int p;
185
e49a9654
IH
186 if (paren_elems_to_push < 0)
187 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
188
a01268b5 189#define REGCP_OTHER_ELEMS 6
f3479639 190 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 191 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 192/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
193 SSPUSHINT(PL_regendp[p]);
194 SSPUSHINT(PL_regstartp[p]);
3280af22 195 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
196 SSPUSHINT(p);
197 }
b1ce53c5 198/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
199 SSPUSHINT(PL_regsize);
200 SSPUSHINT(*PL_reglastparen);
a01268b5 201 SSPUSHINT(*PL_reglastcloseparen);
3280af22 202 SSPUSHPTR(PL_reginput);
41123dfd
JH
203#define REGCP_FRAME_ELEMS 2
204/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
205 * are needed for the regexp context stack bookkeeping. */
206 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 207 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 208
a0d0e21e
LW
209 return retval;
210}
211
c277df42 212/* These are needed since we do not localize EVAL nodes: */
02db2b7b 213# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 214 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 215 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 216
02db2b7b 217# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 218 PerlIO_printf(Perl_debug_log, \
faccc32b 219 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 220 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 221
76e3520e 222STATIC char *
cea2e8a9 223S_regcppop(pTHX)
a0d0e21e 224{
b1ce53c5 225 I32 i;
a0d0e21e
LW
226 U32 paren = 0;
227 char *input;
b1ce53c5
JH
228
229 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 230 i = SSPOPINT;
b1ce53c5
JH
231 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
232 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 233 input = (char *) SSPOPPTR;
a01268b5 234 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
235 *PL_reglastparen = SSPOPINT;
236 PL_regsize = SSPOPINT;
b1ce53c5
JH
237
238 /* Now restore the parentheses context. */
41123dfd
JH
239 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240 i > 0; i -= REGCP_PAREN_ELEMS) {
228fe6e6 241 I32 tmps;
a0d0e21e 242 paren = (U32)SSPOPINT;
3280af22 243 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
244 PL_regstartp[paren] = SSPOPINT;
245 tmps = SSPOPINT;
3280af22
NIS
246 if (paren <= *PL_reglastparen)
247 PL_regendp[paren] = tmps;
c277df42 248 DEBUG_r(
c3464db5 249 PerlIO_printf(Perl_debug_log,
b900a521 250 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 251 (UV)paren, (IV)PL_regstartp[paren],
b900a521 252 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 253 (IV)PL_regendp[paren],
3280af22 254 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 255 );
a0d0e21e 256 }
c277df42 257 DEBUG_r(
eb160463 258 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 259 PerlIO_printf(Perl_debug_log,
faccc32b
JH
260 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
261 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
262 }
263 );
daf18116 264#if 1
dafc8851
JH
265 /* It would seem that the similar code in regtry()
266 * already takes care of this, and in fact it is in
267 * a better location to since this code can #if 0-ed out
268 * but the code in regtry() is needed or otherwise tests
269 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
270 * (as of patchlevel 7877) will fail. Then again,
271 * this code seems to be necessary or otherwise
272 * building DynaLoader will fail:
273 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
274 * --jhi */
eb160463
GS
275 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
276 if ((I32)paren > PL_regsize)
cf93c79d
IZ
277 PL_regstartp[paren] = -1;
278 PL_regendp[paren] = -1;
a0d0e21e 279 }
dafc8851 280#endif
a0d0e21e
LW
281 return input;
282}
283
0f5d15d6 284STATIC char *
cea2e8a9 285S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 286{
8c18bf38 287 const I32 tmp = PL_savestack_ix;
0f5d15d6
IZ
288
289 PL_savestack_ix = ss;
290 regcppop();
291 PL_savestack_ix = tmp;
942e002e 292 return Nullch;
0f5d15d6
IZ
293}
294
295typedef struct re_cc_state
296{
297 I32 ss;
298 regnode *node;
299 struct re_cc_state *prev;
300 CURCUR *cc;
301 regexp *re;
302} re_cc_state;
303
02db2b7b 304#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 305
29d1e993
HS
306#define TRYPAREN(paren, n, input) { \
307 if (paren) { \
308 if (n) { \
309 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
310 PL_regendp[paren] = input - PL_bostr; \
311 } \
312 else \
313 PL_regendp[paren] = -1; \
314 } \
315 if (regmatch(next)) \
316 sayYES; \
317 if (paren && n) \
318 PL_regendp[paren] = -1; \
319}
320
321
a687059c 322/*
e50aee73 323 * pregexec and friends
a687059c
LW
324 */
325
326/*
c277df42 327 - pregexec - match a regexp against a string
a687059c 328 */
c277df42 329I32
864dbfa3 330Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 331 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
332/* strend: pointer to null at end of string */
333/* strbeg: real beginning of string */
334/* minend: end of match must be >=minend after stringarg. */
335/* nosave: For optimizations. */
336{
337 return
9041c2e3 338 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
339 nosave ? 0 : REXEC_COPY_STR);
340}
0f5d15d6
IZ
341
342STATIC void
cea2e8a9 343S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
344{
345 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
346#ifdef DEBUGGING
347 PL_regprogram = prog->program;
348#endif
349 PL_regnpar = prog->nparens;
9041c2e3
NIS
350 PL_regdata = prog->data;
351 PL_reg_re = prog;
0f5d15d6 352}
22e551b9 353
9041c2e3 354/*
cad2e5aa
JH
355 * Need to implement the following flags for reg_anch:
356 *
357 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
358 * USE_INTUIT_ML
359 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
360 * INTUIT_AUTORITATIVE_ML
361 * INTUIT_ONCE_NOML - Intuit can match in one location only.
362 * INTUIT_ONCE_ML
363 *
364 * Another flag for this function: SECOND_TIME (so that float substrs
365 * with giant delta may be not rechecked).
366 */
367
368/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
369
fdac8c4b 370/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
371 Otherwise, only SvCUR(sv) is used to get strbeg. */
372
373/* XXXX We assume that strpos is strbeg unless sv. */
374
6eb5f6b9
JH
375/* XXXX Some places assume that there is a fixed substring.
376 An update may be needed if optimizer marks as "INTUITable"
377 RExen without fixed substrings. Similarly, it is assumed that
378 lengths of all the strings are no more than minlen, thus they
379 cannot come from lookahead.
380 (Or minlen should take into account lookahead.) */
381
2c2d71f5
JH
382/* A failure to find a constant substring means that there is no need to make
383 an expensive call to REx engine, thus we celebrate a failure. Similarly,
384 finding a substring too deep into the string means that less calls to
30944b6d
IZ
385 regtry() should be needed.
386
387 REx compiler's optimizer found 4 possible hints:
388 a) Anchored substring;
389 b) Fixed substring;
390 c) Whether we are anchored (beginning-of-line or \G);
391 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 392 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
393 string which does not contradict any of them.
394 */
2c2d71f5 395
6eb5f6b9
JH
396/* Most of decisions we do here should have been done at compile time.
397 The nodes of the REx which we used for the search should have been
398 deleted from the finite automaton. */
399
cad2e5aa
JH
400char *
401Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
402 char *strend, U32 flags, re_scream_pos_data *data)
403{
b7953727 404 register I32 start_shift = 0;
cad2e5aa 405 /* Should be nonnegative! */
b7953727 406 register I32 end_shift = 0;
2c2d71f5
JH
407 register char *s;
408 register SV *check;
a1933d95 409 char *strbeg;
cad2e5aa 410 char *t;
8c18bf38 411 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 412 I32 ml_anch;
6eb5f6b9 413 register char *other_last = Nullch; /* other substr checked before this */
b7953727 414 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
415#ifdef DEBUGGING
416 char *i_strpos = strpos;
ce333219 417 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 418#endif
2ea5368e 419 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 420
b8d68ded
JH
421 if (prog->reganch & ROPT_UTF8) {
422 DEBUG_r(PerlIO_printf(Perl_debug_log,
423 "UTF-8 regex...\n"));
424 PL_reg_flags |= RF_utf8;
425 }
426
2a782b5b 427 DEBUG_r({
228fe6e6 428 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
429 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
430 strpos;
228fe6e6 431 const int len = PL_reg_match_utf8 ?
b8d68ded 432 strlen(s) : strend - strpos;
2a782b5b
JH
433 if (!PL_colorset)
434 reginitcolors();
b8d68ded
JH
435 if (PL_reg_match_utf8)
436 DEBUG_r(PerlIO_printf(Perl_debug_log,
437 "UTF-8 target...\n"));
2a782b5b 438 PerlIO_printf(Perl_debug_log,
5332c881 439 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
2a782b5b
JH
440 PL_colors[4],PL_colors[5],PL_colors[0],
441 prog->precomp,
442 PL_colors[1],
443 (strlen(prog->precomp) > 60 ? "..." : ""),
444 PL_colors[0],
445 (int)(len > 60 ? 60 : len),
446 s, PL_colors[1],
447 (len > 60 ? "..." : "")
448 );
449 });
cad2e5aa 450
c344f387
JH
451 /* CHR_DIST() would be more correct here but it makes things slow. */
452 if (prog->minlen > strend - strpos) {
a72c7584
JH
453 DEBUG_r(PerlIO_printf(Perl_debug_log,
454 "String too short... [re_intuit_start]\n"));
cad2e5aa 455 goto fail;
2c2d71f5 456 }
a1933d95 457 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 458 PL_regeol = strend;
33b8afdf
JH
459 if (do_utf8) {
460 if (!prog->check_utf8 && prog->check_substr)
461 to_utf8_substr(prog);
462 check = prog->check_utf8;
463 } else {
464 if (!prog->check_substr && prog->check_utf8)
465 to_byte_substr(prog);
466 check = prog->check_substr;
467 }
468 if (check == &PL_sv_undef) {
469 DEBUG_r(PerlIO_printf(Perl_debug_log,
470 "Non-utf string cannot match utf check string\n"));
471 goto fail;
472 }
2c2d71f5 473 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
474 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
475 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 476 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 477
7e25d62c
JH
478 if (!ml_anch) {
479 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
480 | ROPT_IMPLICIT)) /* not a real BOL */
fdac8c4b 481 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
482 && sv && !SvROK(sv)
483 && (strpos != strbeg)) {
484 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
485 goto fail;
486 }
487 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 488 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 489 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
490 I32 slen;
491
1aa99e6b 492 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
493 if (SvTAIL(check)) {
494 slen = SvCUR(check); /* >= 1 */
cad2e5aa 495
9041c2e3 496 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
497 || (strend - s == slen && strend[-1] != '\n')) {
498 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
499 goto fail_finish;
cad2e5aa
JH
500 }
501 /* Now should match s[0..slen-2] */
502 slen--;
fdac8c4b 503 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 504 || (slen > 1
fdac8c4b 505 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5
JH
506 report_neq:
507 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
508 goto fail_finish;
509 }
cad2e5aa 510 }
fdac8c4b 511 else if (*SvPVX_const(check) != *s
653099ff 512 || ((slen = SvCUR(check)) > 1
fdac8c4b 513 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5
JH
514 goto report_neq;
515 goto success_at_start;
7e25d62c 516 }
cad2e5aa 517 }
2c2d71f5 518 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 519 s = strpos;
2c2d71f5 520 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 521 end_shift = prog->minlen - start_shift -
653099ff 522 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 523 if (!ml_anch) {
8c18bf38 524 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 525 - (SvTAIL(check) != 0);
8c18bf38 526 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
527
528 if (end_shift < eshift)
529 end_shift = eshift;
530 }
cad2e5aa 531 }
2c2d71f5 532 else { /* Can match at random position */
cad2e5aa
JH
533 ml_anch = 0;
534 s = strpos;
2c2d71f5
JH
535 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
536 /* Should be nonnegative! */
537 end_shift = prog->minlen - start_shift -
653099ff 538 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
539 }
540
2c2d71f5 541#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 542 if (end_shift < 0)
6bbae5e6 543 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
544#endif
545
2c2d71f5
JH
546 restart:
547 /* Find a possible match in the region s..strend by looking for
548 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 549 if (flags & REXEC_SCREAM) {
cad2e5aa 550 I32 p = -1; /* Internal iterator of scream. */
8c18bf38 551 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 552
2c2d71f5
JH
553 if (PL_screamfirst[BmRARE(check)] >= 0
554 || ( BmRARE(check) == '\n'
555 && (BmPREVIOUS(check) == SvCUR(check) - 1)
556 && SvTAIL(check) ))
9041c2e3 557 s = screaminstr(sv, check,
2c2d71f5 558 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 559 else
2c2d71f5 560 goto fail_finish;
88af3bc3
JH
561 /* we may be pointing at the wrong string */
562 if (s && RX_MATCH_COPIED(prog))
fdac8c4b 563 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
564 if (data)
565 *data->scream_olds = s;
566 }
f33976b4 567 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
568 s = fbm_instr((U8*)(s + start_shift),
569 (U8*)(strend - end_shift),
570 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 571 else
1aa99e6b
IH
572 s = fbm_instr(HOP3(s, start_shift, strend),
573 HOP3(strend, -end_shift, strbeg),
2c2d71f5 574 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
575
576 /* Update the count-of-usability, remove useless subpatterns,
577 unshift s. */
2c2d71f5 578
5332c881
AL
579 /* FIXME - DEBUG_EXECUTE_r if that is merged to maint. */
580 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 581 (s ? "Found" : "Did not find"),
33b8afdf 582 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 583 PL_colors[0],
7b0972df 584 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
fdac8c4b 585 SvPVX_const(check),
2c2d71f5
JH
586 PL_colors[1], (SvTAIL(check) ? "$" : ""),
587 (s ? " at offset " : "...\n") ) );
588
589 if (!s)
590 goto fail_finish;
591
6eb5f6b9
JH
592 check_at = s;
593
2c2d71f5 594 /* Finish the diagnostic message */
30944b6d 595 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
596
597 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
598 Start with the other substr.
599 XXXX no SCREAM optimization yet - and a very coarse implementation
5332c881 600 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
601 *always* match. Probably should be marked during compile...
602 Probably it is right to do no SCREAM here...
603 */
604
33b8afdf 605 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 606 /* Take into account the "other" substring. */
2c2d71f5
JH
607 /* XXXX May be hopelessly wrong for UTF... */
608 if (!other_last)
6eb5f6b9 609 other_last = strpos;
33b8afdf 610 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
611 do_other_anchored:
612 {
1aa99e6b 613 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5 614 char *s1 = s;
33b8afdf 615 SV* must;
2c2d71f5 616
2c2d71f5
JH
617 t = s - prog->check_offset_max;
618 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 619 && (!do_utf8
1aa99e6b 620 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 621 && t > strpos)))
30944b6d 622 /* EMPTY */;
2c2d71f5
JH
623 else
624 t = strpos;
1aa99e6b 625 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
626 if (t < other_last) /* These positions already checked */
627 t = other_last;
1aa99e6b 628 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
629 if (last < last1)
630 last1 = last;
631 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
632 /* On end-of-str: see comment below. */
33b8afdf
JH
633 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
634 if (must == &PL_sv_undef) {
635 s = (char*)NULL;
636 DEBUG_r(must = prog->anchored_utf8); /* for debug */
637 }
638 else
639 s = fbm_instr(
640 (unsigned char*)t,
641 HOP3(HOP3(last1, prog->anchored_offset, strend)
642 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
643 must,
644 PL_multiline ? FBMrf_MULTILINE : 0
645 );
1aa99e6b 646 DEBUG_r(PerlIO_printf(Perl_debug_log,
5332c881 647 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
33b8afdf
JH
650 (int)(SvCUR(must)
651 - (SvTAIL(must)!=0)),
fdac8c4b 652 SvPVX_const(must),
33b8afdf 653 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
654 if (!s) {
655 if (last1 >= last2) {
656 DEBUG_r(PerlIO_printf(Perl_debug_log,
657 ", giving up...\n"));
658 goto fail_finish;
659 }
660 DEBUG_r(PerlIO_printf(Perl_debug_log,
661 ", trying floating at offset %ld...\n",
1aa99e6b
IH
662 (long)(HOP3c(s1, 1, strend) - i_strpos)));
663 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
664 s = HOP3c(last, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
1aa99e6b
IH
670 t = HOP3c(s, -prog->anchored_offset, strbeg);
671 other_last = HOP3c(s, 1, strend);
30944b6d 672 s = s1;
2c2d71f5
JH
673 if (t == strpos)
674 goto try_at_start;
2c2d71f5
JH
675 goto try_at_offset;
676 }
30944b6d 677 }
2c2d71f5
JH
678 }
679 else { /* Take into account the floating substring. */
33b8afdf
JH
680 char *last, *last1;
681 char *s1 = s;
682 SV* must;
683
684 t = HOP3c(s, -start_shift, strbeg);
685 last1 = last =
686 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
687 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
688 last = HOP3c(t, prog->float_max_offset, strend);
689 s = HOP3c(t, prog->float_min_offset, strend);
690 if (s < other_last)
691 s = other_last;
2c2d71f5 692 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
693 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
694 /* fbm_instr() takes into account exact value of end-of-str
695 if the check is SvTAIL(ed). Since false positives are OK,
696 and end-of-str is not later than strend we are OK. */
697 if (must == &PL_sv_undef) {
698 s = (char*)NULL;
699 DEBUG_r(must = prog->float_utf8); /* for debug message */
700 }
701 else
2c2d71f5 702 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
703 (unsigned char*)last + SvCUR(must)
704 - (SvTAIL(must)!=0),
705 must, PL_multiline ? FBMrf_MULTILINE : 0);
5332c881
AL
706 /* FIXME - DEBUG_EXECUTE_r if that is merged to maint */
707 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
708 (s ? "Found" : "Contradicts"),
709 PL_colors[0],
710 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
fdac8c4b 711 SvPVX_const(must),
33b8afdf
JH
712 PL_colors[1], (SvTAIL(must) ? "$" : "")));
713 if (!s) {
714 if (last1 == last) {
2c2d71f5 715 DEBUG_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
716 ", giving up...\n"));
717 goto fail_finish;
2c2d71f5 718 }
33b8afdf
JH
719 DEBUG_r(PerlIO_printf(Perl_debug_log,
720 ", trying anchored starting at offset %ld...\n",
721 (long)(s1 + 1 - i_strpos)));
722 other_last = last;
723 s = HOP3c(t, 1, strend);
724 goto restart;
725 }
726 else {
727 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
728 (long)(s - i_strpos)));
729 other_last = s; /* Fix this later. --Hugo */
730 s = s1;
731 if (t == strpos)
732 goto try_at_start;
733 goto try_at_offset;
734 }
2c2d71f5 735 }
cad2e5aa 736 }
2c2d71f5
JH
737
738 t = s - prog->check_offset_max;
2c2d71f5 739 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 740 && (!do_utf8
1aa99e6b
IH
741 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
742 && t > strpos))) {
2c2d71f5
JH
743 /* Fixed substring is found far enough so that the match
744 cannot start at strpos. */
745 try_at_offset:
cad2e5aa 746 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
747 /* Eventually fbm_*() should handle this, but often
748 anchored_offset is not 0, so this check will not be wasted. */
749 /* XXXX In the code below we prefer to look for "^" even in
750 presence of anchored substrings. And we search even
751 beyond the found float position. These pessimizations
752 are historical artefacts only. */
753 find_anchor:
2c2d71f5 754 while (t < strend - prog->minlen) {
cad2e5aa 755 if (*t == '\n') {
4ee3650e 756 if (t < check_at - prog->check_offset_min) {
33b8afdf 757 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
758 /* Since we moved from the found position,
759 we definitely contradict the found anchored
30944b6d
IZ
760 substr. Due to the above check we do not
761 contradict "check" substr.
762 Thus we can arrive here only if check substr
763 is float. Redo checking for "other"=="fixed".
764 */
9041c2e3 765 strpos = t + 1;
30944b6d
IZ
766 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
767 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
768 goto do_other_anchored;
769 }
4ee3650e
GS
770 /* We don't contradict the found floating substring. */
771 /* XXXX Why not check for STCLASS? */
cad2e5aa 772 s = t + 1;
2c2d71f5 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 774 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
775 goto set_useful;
776 }
4ee3650e
GS
777 /* Position contradicts check-string */
778 /* XXXX probably better to look for check-string
779 than for "\n", so one should lower the limit for t? */
780 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 781 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 782 other_last = strpos = s = t + 1;
cad2e5aa
JH
783 goto restart;
784 }
785 t++;
786 }
2c2d71f5
JH
787 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
788 PL_colors[0],PL_colors[1]));
789 goto fail_finish;
cad2e5aa 790 }
f5952150
GS
791 else {
792 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
793 PL_colors[0],PL_colors[1]));
794 }
cad2e5aa
JH
795 s = t;
796 set_useful:
33b8afdf 797 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
798 }
799 else {
f5952150 800 /* The found string does not prohibit matching at strpos,
2c2d71f5 801 - no optimization of calling REx engine can be performed,
f5952150
GS
802 unless it was an MBOL and we are not after MBOL,
803 or a future STCLASS check will fail this. */
2c2d71f5
JH
804 try_at_start:
805 /* Even in this situation we may use MBOL flag if strpos is offset
806 wrt the start of the string. */
05b4157f 807 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 808 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
809 /* May be due to an implicit anchor of m{.*foo} */
810 && !(prog->reganch & ROPT_IMPLICIT))
811 {
cad2e5aa
JH
812 t = strpos;
813 goto find_anchor;
814 }
30944b6d 815 DEBUG_r( if (ml_anch)
f5952150
GS
816 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
817 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 818 );
2c2d71f5 819 success_at_start:
30944b6d 820 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
821 && (do_utf8 ? (
822 prog->check_utf8 /* Could be deleted already */
823 && --BmUSEFUL(prog->check_utf8) < 0
824 && (prog->check_utf8 == prog->float_utf8)
825 ) : (
826 prog->check_substr /* Could be deleted already */
827 && --BmUSEFUL(prog->check_substr) < 0
828 && (prog->check_substr == prog->float_substr)
829 )))
66e933ab 830 {
cad2e5aa 831 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 832 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
833 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
834 if (do_utf8 ? prog->check_substr : prog->check_utf8)
835 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
836 prog->check_substr = prog->check_utf8 = Nullsv; /* disable */
837 prog->float_substr = prog->float_utf8 = Nullsv; /* clear */
5e39e1e5 838 check = Nullsv; /* abort */
cad2e5aa 839 s = strpos;
3cf5c195
IZ
840 /* XXXX This is a remnant of the old implementation. It
841 looks wasteful, since now INTUIT can use many
6eb5f6b9 842 other heuristics. */
cad2e5aa
JH
843 prog->reganch &= ~RE_USE_INTUIT;
844 }
845 else
846 s = strpos;
847 }
848
6eb5f6b9
JH
849 /* Last resort... */
850 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
851 if (prog->regstclass) {
852 /* minlen == 0 is possible if regstclass is \b or \B,
853 and the fixed substr is ''$.
854 Since minlen is already taken into account, s+1 is before strend;
855 accidentally, minlen >= 1 guaranties no false positives at s + 1
856 even for \b or \B. But (minlen? 1 : 0) below assumes that
857 regstclass does not come from lookahead... */
858 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
859 This leaves EXACTF only, which is dealt with in find_byclass(). */
ec6f298e
AL
860 const U8* str = (U8*)STRING(prog->regstclass);
861 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 862 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 863 : 1);
8c18bf38 864 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 865 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 866 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
867 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
868 cl_l, strend)
869 : strend);
6eb5f6b9
JH
870
871 t = s;
975adce1 872 cache_re(prog);
ec6f298e 873 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
874 if (!s) {
875#ifdef DEBUGGING
c05e0e2f 876 const char *what = 0;
6eb5f6b9
JH
877#endif
878 if (endpos == strend) {
879 DEBUG_r( PerlIO_printf(Perl_debug_log,
880 "Could not match STCLASS...\n") );
881 goto fail;
882 }
66e933ab
GS
883 DEBUG_r( PerlIO_printf(Perl_debug_log,
884 "This position contradicts STCLASS...\n") );
653099ff
GS
885 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
886 goto fail;
6eb5f6b9 887 /* Contradict one of substrings */
33b8afdf
JH
888 if (prog->anchored_substr || prog->anchored_utf8) {
889 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
6eb5f6b9
JH
890 DEBUG_r( what = "anchored" );
891 hop_and_restart:
1aa99e6b 892 s = HOP3c(t, 1, strend);
66e933ab
GS
893 if (s + start_shift + end_shift > strend) {
894 /* XXXX Should be taken into account earlier? */
895 DEBUG_r( PerlIO_printf(Perl_debug_log,
896 "Could not match STCLASS...\n") );
897 goto fail;
898 }
5e39e1e5
HS
899 if (!check)
900 goto giveup;
6eb5f6b9 901 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 902 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
903 what, (long)(s + start_shift - i_strpos)) );
904 goto restart;
905 }
66e933ab 906 /* Have both, check_string is floating */
6eb5f6b9
JH
907 if (t + start_shift >= check_at) /* Contradicts floating=check */
908 goto retry_floating_check;
909 /* Recheck anchored substring, but not floating... */
9041c2e3 910 s = check_at;
5e39e1e5
HS
911 if (!check)
912 goto giveup;
6eb5f6b9 913 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 914 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
915 (long)(other_last - i_strpos)) );
916 goto do_other_anchored;
917 }
60e71179
GS
918 /* Another way we could have checked stclass at the
919 current position only: */
920 if (ml_anch) {
921 s = t = t + 1;
5e39e1e5
HS
922 if (!check)
923 goto giveup;
60e71179 924 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
925 "Looking for /%s^%s/m starting at offset %ld...\n",
926 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 927 goto try_at_offset;
66e933ab 928 }
33b8afdf 929 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 930 goto fail;
6eb5f6b9
JH
931 /* Check is floating subtring. */
932 retry_floating_check:
933 t = check_at - start_shift;
934 DEBUG_r( what = "floating" );
935 goto hop_and_restart;
936 }
b7953727
JH
937 if (t != s) {
938 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 939 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
940 (long)(t - i_strpos), (long)(s - i_strpos))
941 );
942 }
943 else {
944 DEBUG_r(PerlIO_printf(Perl_debug_log,
945 "Does not contradict STCLASS...\n");
946 );
947 }
6eb5f6b9 948 }
5e39e1e5
HS
949 giveup:
950 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
951 PL_colors[4], (check ? "Guessed" : "Giving up"),
952 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 953 return s;
2c2d71f5
JH
954
955 fail_finish: /* Substring not found */
33b8afdf
JH
956 if (prog->check_substr || prog->check_utf8) /* could be removed already */
957 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 958 fail:
2c2d71f5 959 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
960 PL_colors[4],PL_colors[5]));
961 return Nullch;
962}
9661b544 963
6eb5f6b9 964/* We know what class REx starts with. Try to find this position... */
3c3eec57 965STATIC char *
8c18bf38 966S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
a687059c 967{
228fe6e6 968 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 969 char *m;
d8093b23 970 STRLEN ln;
bcefe257 971 STRLEN lnc;
45bfbaf1 972 register STRLEN uskip;
d8093b23
G
973 unsigned int c1;
974 unsigned int c2;
6eb5f6b9
JH
975 char *e;
976 register I32 tmp = 1; /* Scratch variable? */
8c18bf38 977 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 978
6eb5f6b9
JH
979 /* We know what class it must start with. */
980 switch (OP(c)) {
6eb5f6b9 981 case ANYOF:
d46a7032 982 if (do_utf8) {
45bfbaf1 983 while (s + (uskip = UTF8SKIP(s)) <= strend) {
d46a7032
JH
984 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
985 !UTF8_IS_INVARIANT((U8)s[0]) ?
986 reginclass(c, (U8*)s, 0, do_utf8) :
987 REGINCLASS(c, (U8*)s)) {
988 if (tmp && (norun || regtry(prog, s)))
989 goto got_it;
990 else
991 tmp = doevery;
992 }
993 else
994 tmp = 1;
45bfbaf1 995 s += uskip;
d46a7032
JH
996 }
997 }
998 else {
999 while (s < strend) {
1000 STRLEN skip = 1;
1001
1002 if (REGINCLASS(c, (U8*)s) ||
1003 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1004 /* The assignment of 2 is intentional:
1005 * for the folded sharp s, the skip is 2. */
1006 (skip = SHARP_S_SKIP))) {
1007 if (tmp && (norun || regtry(prog, s)))
1008 goto got_it;
1009 else
1010 tmp = doevery;
1011 }
1012 else
1013 tmp = 1;
1014 s += skip;
1015 }
a0d0e21e 1016 }
6eb5f6b9 1017 break;
f33976b4
DB
1018 case CANY:
1019 while (s < strend) {
1020 if (tmp && (norun || regtry(prog, s)))
1021 goto got_it;
1022 else
1023 tmp = doevery;
1024 s++;
1025 }
1026 break;
6eb5f6b9 1027 case EXACTF:
bcefe257
NIS
1028 m = STRING(c);
1029 ln = STR_LEN(c); /* length to match in octets/bytes */
1030 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1031 if (UTF) {
a2a2844f 1032 STRLEN ulen1, ulen2;
bcefe257 1033 U8 *sm = (U8 *) m;
a2a469f9
JH
1034 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1035 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
1036
1037 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1038 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1039
a2a469f9 1040 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
872c91ae 1041 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
a2a469f9 1042 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
872c91ae 1043 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
bcefe257
NIS
1044 lnc = 0;
1045 while (sm < ((U8 *) m + ln)) {
1046 lnc++;
1047 sm += UTF8SKIP(sm);
1048 }
1aa99e6b
IH
1049 }
1050 else {
1051 c1 = *(U8*)m;
1052 c2 = PL_fold[c1];
1053 }
6eb5f6b9
JH
1054 goto do_exactf;
1055 case EXACTFL:
bcefe257
NIS
1056 m = STRING(c);
1057 ln = STR_LEN(c);
1058 lnc = (I32) ln;
d8093b23 1059 c1 = *(U8*)m;
6eb5f6b9
JH
1060 c2 = PL_fold_locale[c1];
1061 do_exactf:
dc904a75 1062 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1063
6eb5f6b9
JH
1064 if (norun && e < s)
1065 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1066
60a8b682
JH
1067 /* The idea in the EXACTF* cases is to first find the
1068 * first character of the EXACTF* node and then, if
1069 * necessary, case-insensitively compare the full
1070 * text of the node. The c1 and c2 are the first
1071 * characters (though in Unicode it gets a bit
1072 * more complicated because there are more cases
7f16dd3d
JH
1073 * than just upper and lower: one needs to use
1074 * the so-called folding case for case-insensitive
1075 * matching (called "loose matching" in Unicode).
1076 * ibcmp_utf8() will do just that. */
60a8b682 1077
1aa99e6b 1078 if (do_utf8) {
575cac57 1079 UV c, f;
a2a469f9 1080 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1081 STRLEN len, foldlen;
d7f013c8 1082
09091399 1083 if (c1 == c2) {
bcefe257
NIS
1084 /* Upper and lower of 1st char are equal -
1085 * probably not a "letter". */
1aa99e6b 1086 while (s <= e) {
a2a469f9 1087 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
872c91ae
JH
1088 ckWARN(WARN_UTF8) ?
1089 0 : UTF8_ALLOW_ANY);
80aecb99
JH
1090 if ( c == c1
1091 && (ln == len ||
66423254 1092 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1093 m, (char **)0, ln, (bool)UTF))
55da9344 1094 && (norun || regtry(prog, s)) )
1aa99e6b 1095 goto got_it;
80aecb99 1096 else {
228fe6e6 1097 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1098 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1099 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1100 if ( f != c
1101 && (f == c1 || f == c2)
1102 && (ln == foldlen ||
66423254
JH
1103 !ibcmp_utf8((char *) foldbuf,
1104 (char **)0, foldlen, do_utf8,
d07ddd77 1105 m,
eb160463 1106 (char **)0, ln, (bool)UTF))
80aecb99
JH
1107 && (norun || regtry(prog, s)) )
1108 goto got_it;
1109 }
1aa99e6b
IH
1110 s += len;
1111 }
09091399
JH
1112 }
1113 else {
1aa99e6b 1114 while (s <= e) {
a2a469f9 1115 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
872c91ae
JH
1116 ckWARN(WARN_UTF8) ?
1117 0 : UTF8_ALLOW_ANY);
80aecb99 1118
60a8b682 1119 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1120 * Note that not all the possible combinations
1121 * are handled here: some of them are handled
1122 * by the standard folding rules, and some of
1123 * them (the character class or ANYOF cases)
1124 * are handled during compiletime in
1125 * regexec.c:S_regclass(). */
880bd946
JH
1126 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1127 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1128 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1129
1130 if ( (c == c1 || c == c2)
1131 && (ln == len ||
66423254 1132 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1133 m, (char **)0, ln, (bool)UTF))
55da9344 1134 && (norun || regtry(prog, s)) )
1aa99e6b 1135 goto got_it;
80aecb99 1136 else {
228fe6e6 1137 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1138 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1139 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1140 if ( f != c
1141 && (f == c1 || f == c2)
1142 && (ln == foldlen ||
a6872d42 1143 !ibcmp_utf8((char *) foldbuf,
66423254 1144 (char **)0, foldlen, do_utf8,
d07ddd77 1145 m,
eb160463 1146 (char **)0, ln, (bool)UTF))
80aecb99
JH
1147 && (norun || regtry(prog, s)) )
1148 goto got_it;
1149 }
1aa99e6b
IH
1150 s += len;
1151 }
09091399 1152 }
1aa99e6b
IH
1153 }
1154 else {
1155 if (c1 == c2)
1156 while (s <= e) {
1157 if ( *(U8*)s == c1
1158 && (ln == 1 || !(OP(c) == EXACTF
1159 ? ibcmp(s, m, ln)
1160 : ibcmp_locale(s, m, ln)))
1161 && (norun || regtry(prog, s)) )
1162 goto got_it;
1163 s++;
1164 }
1165 else
1166 while (s <= e) {
1167 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1168 && (ln == 1 || !(OP(c) == EXACTF
1169 ? ibcmp(s, m, ln)
1170 : ibcmp_locale(s, m, ln)))
1171 && (norun || regtry(prog, s)) )
1172 goto got_it;
1173 s++;
1174 }
b3c9acc1
IZ
1175 }
1176 break;
bbce6d69 1177 case BOUNDL:
3280af22 1178 PL_reg_flags |= RF_tainted;
bbce6d69 1179 /* FALL THROUGH */
a0d0e21e 1180 case BOUND:
ffc61ed2 1181 if (do_utf8) {
12d33761 1182 if (s == PL_bostr)
ffc61ed2
JH
1183 tmp = '\n';
1184 else {
b4f7163a 1185 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1186
b4f7163a 1187 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1188 }
1189 tmp = ((OP(c) == BOUND ?
9041c2e3 1190 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
fab1f349 1191 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1192 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1193 if (tmp == !(OP(c) == BOUND ?
3568d838 1194 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1195 isALNUM_LC_utf8((U8*)s)))
1196 {
1197 tmp = !tmp;
1198 if ((norun || regtry(prog, s)))
1199 goto got_it;
1200 }
45bfbaf1 1201 s += uskip;
a687059c 1202 }
a0d0e21e 1203 }
667bb95a 1204 else {
12d33761 1205 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1206 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1207 while (s < strend) {
1208 if (tmp ==
1209 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1210 tmp = !tmp;
1211 if ((norun || regtry(prog, s)))
1212 goto got_it;
1213 }
1214 s++;
a0ed51b3 1215 }
a0ed51b3 1216 }
6eb5f6b9 1217 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1218 goto got_it;
1219 break;
bbce6d69 1220 case NBOUNDL:
3280af22 1221 PL_reg_flags |= RF_tainted;
bbce6d69 1222 /* FALL THROUGH */
a0d0e21e 1223 case NBOUND:
ffc61ed2 1224 if (do_utf8) {
12d33761 1225 if (s == PL_bostr)
ffc61ed2
JH
1226 tmp = '\n';
1227 else {
b4f7163a 1228 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1229
b4f7163a 1230 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1231 }
1232 tmp = ((OP(c) == NBOUND ?
9041c2e3 1233 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
fab1f349 1234 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1235 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1236 if (tmp == !(OP(c) == NBOUND ?
3568d838 1237 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1238 isALNUM_LC_utf8((U8*)s)))
1239 tmp = !tmp;
1240 else if ((norun || regtry(prog, s)))
1241 goto got_it;
45bfbaf1 1242 s += uskip;
ffc61ed2 1243 }
a0d0e21e 1244 }
667bb95a 1245 else {
12d33761 1246 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1247 tmp = ((OP(c) == NBOUND ?
1248 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1249 while (s < strend) {
1250 if (tmp ==
1251 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1252 tmp = !tmp;
1253 else if ((norun || regtry(prog, s)))
1254 goto got_it;
1255 s++;
1256 }
a0ed51b3 1257 }
6eb5f6b9 1258 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1259 goto got_it;
1260 break;
a0d0e21e 1261 case ALNUM:
ffc61ed2 1262 if (do_utf8) {
fab1f349 1263 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1264 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1265 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1266 if (tmp && (norun || regtry(prog, s)))
1267 goto got_it;
1268 else
1269 tmp = doevery;
1270 }
bbce6d69 1271 else
ffc61ed2 1272 tmp = 1;
45bfbaf1 1273 s += uskip;
bbce6d69 1274 }
bbce6d69 1275 }
ffc61ed2
JH
1276 else {
1277 while (s < strend) {
1278 if (isALNUM(*s)) {
1279 if (tmp && (norun || regtry(prog, s)))
1280 goto got_it;
1281 else
1282 tmp = doevery;
1283 }
a0ed51b3 1284 else
ffc61ed2
JH
1285 tmp = 1;
1286 s++;
a0ed51b3 1287 }
a0ed51b3
LW
1288 }
1289 break;
bbce6d69 1290 case ALNUML:
3280af22 1291 PL_reg_flags |= RF_tainted;
ffc61ed2 1292 if (do_utf8) {
45bfbaf1 1293 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1294 if (isALNUM_LC_utf8((U8*)s)) {
1295 if (tmp && (norun || regtry(prog, s)))
1296 goto got_it;
1297 else
1298 tmp = doevery;
1299 }
a687059c 1300 else
ffc61ed2 1301 tmp = 1;
45bfbaf1 1302 s += uskip;
a0d0e21e 1303 }
a0d0e21e 1304 }
ffc61ed2
JH
1305 else {
1306 while (s < strend) {
1307 if (isALNUM_LC(*s)) {
1308 if (tmp && (norun || regtry(prog, s)))
1309 goto got_it;
1310 else
1311 tmp = doevery;
1312 }
a0ed51b3 1313 else
ffc61ed2
JH
1314 tmp = 1;
1315 s++;
a0ed51b3 1316 }
a0ed51b3
LW
1317 }
1318 break;
a0d0e21e 1319 case NALNUM:
ffc61ed2 1320 if (do_utf8) {
fab1f349 1321 LOAD_UTF8_CHARCLASS_ALNUM();
45bfbaf1 1322 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1323 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1324 if (tmp && (norun || regtry(prog, s)))
1325 goto got_it;
1326 else
1327 tmp = doevery;
1328 }
bbce6d69 1329 else
ffc61ed2 1330 tmp = 1;
45bfbaf1 1331 s += uskip;
bbce6d69 1332 }
bbce6d69 1333 }
ffc61ed2
JH
1334 else {
1335 while (s < strend) {
1336 if (!isALNUM(*s)) {
1337 if (tmp && (norun || regtry(prog, s)))
1338 goto got_it;
1339 else
1340 tmp = doevery;
1341 }
a0ed51b3 1342 else
ffc61ed2
JH
1343 tmp = 1;
1344 s++;
a0ed51b3 1345 }
a0ed51b3
LW
1346 }
1347 break;
bbce6d69 1348 case NALNUML:
3280af22 1349 PL_reg_flags |= RF_tainted;
ffc61ed2 1350 if (do_utf8) {
45bfbaf1 1351 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1352 if (!isALNUM_LC_utf8((U8*)s)) {
1353 if (tmp && (norun || regtry(prog, s)))
1354 goto got_it;
1355 else
1356 tmp = doevery;
1357 }
a687059c 1358 else
ffc61ed2 1359 tmp = 1;
45bfbaf1 1360 s += uskip;
a687059c 1361 }
a0d0e21e 1362 }
ffc61ed2
JH
1363 else {
1364 while (s < strend) {
1365 if (!isALNUM_LC(*s)) {
1366 if (tmp && (norun || regtry(prog, s)))
1367 goto got_it;
1368 else
1369 tmp = doevery;
1370 }
a0ed51b3 1371 else
ffc61ed2
JH
1372 tmp = 1;
1373 s++;
a0ed51b3 1374 }
a0ed51b3
LW
1375 }
1376 break;
a0d0e21e 1377 case SPACE:
ffc61ed2 1378 if (do_utf8) {
fab1f349 1379 LOAD_UTF8_CHARCLASS_SPACE();
45bfbaf1 1380 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1381 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1382 if (tmp && (norun || regtry(prog, s)))
1383 goto got_it;
1384 else
1385 tmp = doevery;
1386 }
a0d0e21e 1387 else
ffc61ed2 1388 tmp = 1;
45bfbaf1 1389 s += uskip;
2304df62 1390 }
a0d0e21e 1391 }
ffc61ed2
JH
1392 else {
1393 while (s < strend) {
1394 if (isSPACE(*s)) {
1395 if (tmp && (norun || regtry(prog, s)))
1396 goto got_it;
1397 else
1398 tmp = doevery;
1399 }
a0ed51b3 1400 else
ffc61ed2
JH
1401 tmp = 1;
1402 s++;
a0ed51b3 1403 }
a0ed51b3
LW
1404 }
1405 break;
bbce6d69 1406 case SPACEL:
3280af22 1407 PL_reg_flags |= RF_tainted;
ffc61ed2 1408 if (do_utf8) {
45bfbaf1 1409 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1410 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1411 if (tmp && (norun || regtry(prog, s)))
1412 goto got_it;
1413 else
1414 tmp = doevery;
1415 }
bbce6d69 1416 else
ffc61ed2 1417 tmp = 1;
45bfbaf1 1418 s += uskip;
bbce6d69 1419 }
bbce6d69 1420 }
ffc61ed2
JH
1421 else {
1422 while (s < strend) {
1423 if (isSPACE_LC(*s)) {
1424 if (tmp && (norun || regtry(prog, s)))
1425 goto got_it;
1426 else
1427 tmp = doevery;
1428 }
a0ed51b3 1429 else
ffc61ed2
JH
1430 tmp = 1;
1431 s++;
a0ed51b3 1432 }
a0ed51b3
LW
1433 }
1434 break;
a0d0e21e 1435 case NSPACE:
ffc61ed2 1436 if (do_utf8) {
fab1f349 1437 LOAD_UTF8_CHARCLASS_SPACE();
45bfbaf1 1438 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1439 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1440 if (tmp && (norun || regtry(prog, s)))
1441 goto got_it;
1442 else
1443 tmp = doevery;
1444 }
a0d0e21e 1445 else
ffc61ed2 1446 tmp = 1;
45bfbaf1 1447 s += uskip;
a687059c 1448 }
a0d0e21e 1449 }
ffc61ed2
JH
1450 else {
1451 while (s < strend) {
1452 if (!isSPACE(*s)) {
1453 if (tmp && (norun || regtry(prog, s)))
1454 goto got_it;
1455 else
1456 tmp = doevery;
1457 }
a0ed51b3 1458 else
ffc61ed2
JH
1459 tmp = 1;
1460 s++;
a0ed51b3 1461 }
a0ed51b3
LW
1462 }
1463 break;
bbce6d69 1464 case NSPACEL:
3280af22 1465 PL_reg_flags |= RF_tainted;
ffc61ed2 1466 if (do_utf8) {
45bfbaf1 1467 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1468 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1469 if (tmp && (norun || regtry(prog, s)))
1470 goto got_it;
1471 else
1472 tmp = doevery;
1473 }
bbce6d69 1474 else
ffc61ed2 1475 tmp = 1;
45bfbaf1 1476 s += uskip;
bbce6d69 1477 }
bbce6d69 1478 }
ffc61ed2
JH
1479 else {
1480 while (s < strend) {
1481 if (!isSPACE_LC(*s)) {
1482 if (tmp && (norun || regtry(prog, s)))
1483 goto got_it;
1484 else
1485 tmp = doevery;
1486 }
a0ed51b3 1487 else
ffc61ed2
JH
1488 tmp = 1;
1489 s++;
a0ed51b3 1490 }
a0ed51b3
LW
1491 }
1492 break;
a0d0e21e 1493 case DIGIT:
ffc61ed2 1494 if (do_utf8) {
fab1f349 1495 LOAD_UTF8_CHARCLASS_DIGIT();
45bfbaf1 1496 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1497 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1498 if (tmp && (norun || regtry(prog, s)))
1499 goto got_it;
1500 else
1501 tmp = doevery;
1502 }
a0d0e21e 1503 else
ffc61ed2 1504 tmp = 1;
45bfbaf1 1505 s += uskip;
2b69d0c2 1506 }
a0d0e21e 1507 }
ffc61ed2
JH
1508 else {
1509 while (s < strend) {
1510 if (isDIGIT(*s)) {
1511 if (tmp && (norun || regtry(prog, s)))
1512 goto got_it;
1513 else
1514 tmp = doevery;
1515 }
a0ed51b3 1516 else
ffc61ed2
JH
1517 tmp = 1;
1518 s++;
a0ed51b3 1519 }
a0ed51b3
LW
1520 }
1521 break;
b8c5462f
JH
1522 case DIGITL:
1523 PL_reg_flags |= RF_tainted;
ffc61ed2 1524 if (do_utf8) {
45bfbaf1 1525 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1526 if (isDIGIT_LC_utf8((U8*)s)) {
1527 if (tmp && (norun || regtry(prog, s)))
1528 goto got_it;
1529 else
1530 tmp = doevery;
1531 }
b8c5462f 1532 else
ffc61ed2 1533 tmp = 1;
45bfbaf1 1534 s += uskip;
b8c5462f 1535 }
b8c5462f 1536 }
ffc61ed2
JH
1537 else {
1538 while (s < strend) {
1539 if (isDIGIT_LC(*s)) {
1540 if (tmp && (norun || regtry(prog, s)))
1541 goto got_it;
1542 else
1543 tmp = doevery;
1544 }
b8c5462f 1545 else
ffc61ed2
JH
1546 tmp = 1;
1547 s++;
b8c5462f 1548 }
b8c5462f
JH
1549 }
1550 break;
a0d0e21e 1551 case NDIGIT:
ffc61ed2 1552 if (do_utf8) {
fab1f349 1553 LOAD_UTF8_CHARCLASS_DIGIT();
45bfbaf1 1554 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1555 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1556 if (tmp && (norun || regtry(prog, s)))
1557 goto got_it;
1558 else
1559 tmp = doevery;
1560 }
a0d0e21e 1561 else
ffc61ed2 1562 tmp = 1;
45bfbaf1 1563 s += uskip;
a687059c 1564 }
a0d0e21e 1565 }
ffc61ed2
JH
1566 else {
1567 while (s < strend) {
1568 if (!isDIGIT(*s)) {
1569 if (tmp && (norun || regtry(prog, s)))
1570 goto got_it;
1571 else
1572 tmp = doevery;
1573 }
a0ed51b3 1574 else
ffc61ed2
JH
1575 tmp = 1;
1576 s++;
a0ed51b3 1577 }
a0ed51b3
LW
1578 }
1579 break;
b8c5462f
JH
1580 case NDIGITL:
1581 PL_reg_flags |= RF_tainted;
ffc61ed2 1582 if (do_utf8) {
45bfbaf1 1583 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1584 if (!isDIGIT_LC_utf8((U8*)s)) {
1585 if (tmp && (norun || regtry(prog, s)))
1586 goto got_it;
1587 else
1588 tmp = doevery;
1589 }
b8c5462f 1590 else
ffc61ed2 1591 tmp = 1;
45bfbaf1 1592 s += uskip;
b8c5462f 1593 }
a0ed51b3 1594 }
ffc61ed2
JH
1595 else {
1596 while (s < strend) {
1597 if (!isDIGIT_LC(*s)) {
1598 if (tmp && (norun || regtry(prog, s)))
1599 goto got_it;
1600 else
1601 tmp = doevery;
1602 }
cf93c79d 1603 else
ffc61ed2
JH
1604 tmp = 1;
1605 s++;
b8c5462f 1606 }
b8c5462f
JH
1607 }
1608 break;
b3c9acc1 1609 default:
3c3eec57
GS
1610 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1611 break;
d6a28714 1612 }
6eb5f6b9
JH
1613 return 0;
1614 got_it:
1615 return s;
1616}
1617
1618/*
1619 - regexec_flags - match a regexp against a string
1620 */
1621I32
1622Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1623 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1624/* strend: pointer to null at end of string */
1625/* strbeg: real beginning of string */
1626/* minend: end of match must be >=minend after stringarg. */
1627/* data: May be used for some additional optimizations. */
1628/* nosave: For optimizations. */
1629{
6eb5f6b9
JH
1630 register char *s;
1631 register regnode *c;
1632 register char *startpos = stringarg;
6eb5f6b9
JH
1633 I32 minlen; /* must match at least this many chars */
1634 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1635 I32 end_shift = 0; /* Same for the end. */ /* CC */
1636 I32 scream_pos = -1; /* Internal iterator of scream. */
1637 char *scream_olds;
1638 SV* oreplsv = GvSV(PL_replgv);
228fe6e6 1639 const bool do_utf8 = DO_UTF8(sv);
2a782b5b 1640#ifdef DEBUGGING
9e55ce06
JH
1641 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1642 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1643#endif
a2592645 1644 PERL_UNUSED_ARG(data);
2ea5368e 1645 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9
JH
1646
1647 PL_regcc = 0;
1648
1649 cache_re(prog);
1650#ifdef DEBUGGING
aea4f609 1651 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1652#endif
1653
1654 /* Be paranoid... */
1655 if (prog == NULL || startpos == NULL) {
1656 Perl_croak(aTHX_ "NULL regexp parameter");
1657 return 0;
1658 }
1659
1660 minlen = prog->minlen;
61a36c01 1661 if (strend - startpos < minlen) {
a72c7584
JH
1662 DEBUG_r(PerlIO_printf(Perl_debug_log,
1663 "String too short [regexec_flags]...\n"));
1664 goto phooey;
1aa99e6b 1665 }
6eb5f6b9 1666
6eb5f6b9
JH
1667 /* Check validity of program. */
1668 if (UCHARAT(prog->program) != REG_MAGIC) {
1669 Perl_croak(aTHX_ "corrupted regexp program");
1670 }
1671
1672 PL_reg_flags = 0;
1673 PL_reg_eval_set = 0;
1674 PL_reg_maxiter = 0;
1675
1676 if (prog->reganch & ROPT_UTF8)
1677 PL_reg_flags |= RF_utf8;
1678
1679 /* Mark beginning of line for ^ and lookbehind. */
1680 PL_regbol = startpos;
1681 PL_bostr = strbeg;
1682 PL_reg_sv = sv;
1683
1684 /* Mark end of line for $ (and such) */
1685 PL_regeol = strend;
1686
1687 /* see how far we have to get to not match where we matched before */
1688 PL_regtill = startpos+minend;
1689
1690 /* We start without call_cc context. */
1691 PL_reg_call_cc = 0;
1692
1693 /* If there is a "must appear" string, look for it. */
1694 s = startpos;
1695
1696 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1697 MAGIC *mg;
1698
1699 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1700 PL_reg_ganch = startpos;
1701 else if (sv && SvTYPE(sv) >= SVt_PVMG
1702 && SvMAGIC(sv)
14befaf4
DM
1703 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1704 && mg->mg_len >= 0) {
6eb5f6b9
JH
1705 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1706 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1707 if (s > PL_reg_ganch)
6eb5f6b9
JH
1708 goto phooey;
1709 s = PL_reg_ganch;
1710 }
1711 }
1712 else /* pos() not defined */
1713 PL_reg_ganch = strbeg;
1714 }
1715
33b8afdf 1716 if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
6eb5f6b9
JH
1717 re_scream_pos_data d;
1718
1719 d.scream_olds = &scream_olds;
1720 d.scream_pos = &scream_pos;
1721 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1722 if (!s) {
1723 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1724 goto phooey; /* not present */
3fa9c3d7 1725 }
6eb5f6b9
JH
1726 }
1727
2a782b5b 1728 DEBUG_r({
228fe6e6
AL
1729 const char * const s0 = UTF
1730 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1731 UNI_DISPLAY_REGEX)
1732 : prog->precomp;
1733 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1734 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1735 UNI_DISPLAY_REGEX) : startpos;
228fe6e6 1736 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1737 if (!PL_colorset)
1738 reginitcolors();
1739 PerlIO_printf(Perl_debug_log,
5332c881 1740 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
2a782b5b 1741 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1742 len0, len0, s0,
2a782b5b 1743 PL_colors[1],
9e55ce06 1744 len0 > 60 ? "..." : "",
2a782b5b 1745 PL_colors[0],
9e55ce06
JH
1746 (int)(len1 > 60 ? 60 : len1),
1747 s1, PL_colors[1],
1748 (len1 > 60 ? "..." : "")
2a782b5b
JH
1749 );
1750 });
6eb5f6b9
JH
1751
1752 /* Simplest case: anchored match need be tried only once. */
1753 /* [unless only anchor is BOL and multiline is set] */
1754 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1755 if (s == startpos && regtry(prog, startpos))
1756 goto got_it;
1757 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1758 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1759 {
1760 char *end;
1761
1762 if (minlen)
1763 dontbother = minlen - 1;
1aa99e6b 1764 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1765 /* for multiline we only have to try after newlines */
33b8afdf 1766 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1767 if (s == startpos)
1768 goto after_try;
1769 while (1) {
1770 if (regtry(prog, s))
1771 goto got_it;
1772 after_try:
1773 if (s >= end)
1774 goto phooey;
1775 if (prog->reganch & RE_USE_INTUIT) {
1776 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1777 if (!s)
1778 goto phooey;
1779 }
1780 else
1781 s++;
1782 }
1783 } else {
1784 if (s > startpos)
1785 s--;
1786 while (s < end) {
1787 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1788 if (regtry(prog, s))
1789 goto got_it;
1790 }
1791 }
1792 }
1793 }
1794 goto phooey;
1795 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1796 if (regtry(prog, PL_reg_ganch))
1797 goto got_it;
1798 goto phooey;
1799 }
1800
1801 /* Messy cases: unanchored match. */
33b8afdf 1802 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1803 /* we have /x+whatever/ */
1804 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1805 char ch;
bf93d4cc
GS
1806#ifdef DEBUGGING
1807 int did_match = 0;
1808#endif
33b8afdf
JH
1809 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1810 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
fdac8c4b 1811 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1812
1aa99e6b 1813 if (do_utf8) {
6eb5f6b9
JH
1814 while (s < strend) {
1815 if (*s == ch) {
bf93d4cc 1816 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1817 if (regtry(prog, s)) goto got_it;
1818 s += UTF8SKIP(s);
1819 while (s < strend && *s == ch)
1820 s += UTF8SKIP(s);
1821 }
1822 s += UTF8SKIP(s);
1823 }
1824 }
1825 else {
1826 while (s < strend) {
1827 if (*s == ch) {
bf93d4cc 1828 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1829 if (regtry(prog, s)) goto got_it;
1830 s++;
1831 while (s < strend && *s == ch)
1832 s++;
1833 }
1834 s++;
1835 }
1836 }
b7953727 1837 DEBUG_r(if (!did_match)
bf93d4cc 1838 PerlIO_printf(Perl_debug_log,
b7953727
JH
1839 "Did not find anchored character...\n")
1840 );
6eb5f6b9 1841 }
33b8afdf
JH
1842 else if (prog->anchored_substr != Nullsv
1843 || prog->anchored_utf8 != Nullsv
1844 || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1845 && prog->float_max_offset < strend - s)) {
1846 SV *must;
1847 I32 back_max;
1848 I32 back_min;
1849 char *last;
6eb5f6b9 1850 char *last1; /* Last position checked before */
bf93d4cc
GS
1851#ifdef DEBUGGING
1852 int did_match = 0;
1853#endif
33b8afdf
JH
1854 if (prog->anchored_substr || prog->anchored_utf8) {
1855 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1856 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1857 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1858 back_max = back_min = prog->anchored_offset;
1859 } else {
1860 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1861 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1862 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1863 back_max = prog->float_max_offset;
1864 back_min = prog->float_min_offset;
1865 }
1866 if (must == &PL_sv_undef)
1867 /* could not downgrade utf8 check substring, so must fail */
1868 goto phooey;
1869
1870 last = HOP3c(strend, /* Cannot start after this */
1871 -(I32)(CHR_SVLEN(must)
1872 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1873
1874 if (s > PL_bostr)
1875 last1 = HOPc(s, -1);
1876 else
1877 last1 = s - 1; /* bogus */
1878
5332c881 1879 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1880 check_substr==must. */
1881 scream_pos = -1;
1882 dontbother = end_shift;
1883 strend = HOPc(strend, -dontbother);
1884 while ( (s <= last) &&
9041c2e3 1885 ((flags & REXEC_SCREAM)
1aa99e6b 1886 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1887 end_shift, &scream_pos, 0))
1aa99e6b 1888 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1889 (unsigned char*)strend, must,
6eb5f6b9 1890 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
88af3bc3
JH
1891 /* we may be pointing at the wrong string */
1892 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
fdac8c4b 1893 s = strbeg + (s - SvPVX_const(sv));
bf93d4cc 1894 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1895 if (HOPc(s, -back_max) > last1) {
1896 last1 = HOPc(s, -back_min);
1897 s = HOPc(s, -back_max);
1898 }
1899 else {
1900 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1901
1902 last1 = HOPc(s, -back_min);
1903 s = t;
1904 }
1aa99e6b 1905 if (do_utf8) {
6eb5f6b9
JH
1906 while (s <= last1) {
1907 if (regtry(prog, s))
1908 goto got_it;
1909 s += UTF8SKIP(s);
1910 }
1911 }
1912 else {
1913 while (s <= last1) {
1914 if (regtry(prog, s))
1915 goto got_it;
1916 s++;
1917 }
1918 }
1919 }
b7953727
JH
1920 DEBUG_r(if (!did_match)
1921 PerlIO_printf(Perl_debug_log,
5332c881 1922 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1923 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1924 ? "anchored" : "floating"),
1925 PL_colors[0],
1926 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
fdac8c4b 1927 SvPVX_const(must),
b7953727
JH
1928 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1929 );
6eb5f6b9
JH
1930 goto phooey;
1931 }
155aba94 1932 else if ((c = prog->regstclass)) {
e19fe81c
JH
1933 if (minlen) {
1934 I32 op = (U8)OP(prog->regstclass);
66e933ab 1935 /* don't bother with what can't match */
e19fe81c
JH
1936 if (PL_regkind[op] != EXACT && op != CANY)
1937 strend = HOPc(strend, -(minlen - 1));
1938 }
ffc61ed2
JH
1939 DEBUG_r({
1940 SV *prop = sv_newmortal();
71a0dd65
NC
1941 const char *s0;
1942 const char *s1;
9e55ce06
JH
1943 int len0;
1944 int len1;
1945
ffc61ed2 1946 regprop(prop, c);
9e55ce06 1947 s0 = UTF ?
fdac8c4b 1948 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1949 UNI_DISPLAY_REGEX) :
71a0dd65 1950 SvPVX_const(prop);
9e55ce06
JH
1951 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1952 s1 = UTF ?
c728cb41 1953 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1954 len1 = UTF ? SvCUR(dsv1) : strend - s;
1955 PerlIO_printf(Perl_debug_log,
5332c881 1956 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1957 len0, len0, s0,
1958 len1, len1, s1);
ffc61ed2 1959 });
ec6f298e 1960 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1961 goto got_it;
bf93d4cc 1962 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1963 }
1964 else {
1965 dontbother = 0;
33b8afdf
JH
1966 if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1967 /* Trim the end. */
d6a28714 1968 char *last;
33b8afdf
JH
1969 SV* float_real;
1970
1971 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1972 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1973 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1974
1975 if (flags & REXEC_SCREAM) {
33b8afdf 1976 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1977 end_shift, &scream_pos, 1); /* last one */
1978 if (!last)
ffc61ed2 1979 last = scream_olds; /* Only one occurrence. */
88af3bc3
JH
1980 /* we may be pointing at the wrong string */
1981 else if (RX_MATCH_COPIED(prog))
fdac8c4b 1982 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1983 }
d6a28714
JH
1984 else {
1985 STRLEN len;
71a0dd65 1986 const char * const little = SvPV_const(float_real, len);
d6a28714 1987
33b8afdf 1988 if (SvTAIL(float_real)) {
d6a28714
JH
1989 if (memEQ(strend - len + 1, little, len - 1))
1990 last = strend - len + 1;
1991 else if (!PL_multiline)
9041c2e3 1992 last = memEQ(strend - len, little, len)
d6a28714 1993 ? strend - len : Nullch;
b8c5462f 1994 else
d6a28714
JH
1995 goto find_last;
1996 } else {
1997 find_last:
9041c2e3 1998 if (len)
d6a28714 1999 last = rninstr(s, strend, little, little + len);
b8c5462f 2000 else
5332c881 2001 last = strend; /* matching "$" */
b8c5462f 2002 }
b8c5462f 2003 }
bf93d4cc
GS
2004 if (last == NULL) {
2005 DEBUG_r(PerlIO_printf(Perl_debug_log,
2006 "%sCan't trim the tail, match fails (should not happen)%s\n",
2007 PL_colors[4],PL_colors[5]));
2008 goto phooey; /* Should not happen! */
2009 }
d6a28714
JH
2010 dontbother = strend - last + prog->float_min_offset;
2011 }
2012 if (minlen && (dontbother < minlen))
2013 dontbother = minlen - 1;
2014 strend -= dontbother; /* this one's always in bytes! */
2015 /* We don't know much -- general case. */
1aa99e6b 2016 if (do_utf8) {
d6a28714
JH
2017 for (;;) {
2018 if (regtry(prog, s))
2019 goto got_it;
2020 if (s >= strend)
2021 break;
b8c5462f 2022 s += UTF8SKIP(s);
d6a28714
JH
2023 };
2024 }
2025 else {
2026 do {
2027 if (regtry(prog, s))
2028 goto got_it;
2029 } while (s++ < strend);
2030 }
2031 }
2032
2033 /* Failure. */
2034 goto phooey;
2035
2036got_it:
2037 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2038
2039 if (PL_reg_eval_set) {
2040 /* Preserve the current value of $^R */
2041 if (oreplsv != GvSV(PL_replgv))
2042 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2043 restored, the value remains
2044 the same. */
acfe0abc 2045 restore_pos(aTHX_ 0);
d6a28714
JH
2046 }
2047
2048 /* make sure $`, $&, $', and $digit will work later */
2049 if ( !(flags & REXEC_NOT_FIRST) ) {
2050 if (RX_MATCH_COPIED(prog)) {
2051 Safefree(prog->subbeg);
2052 RX_MATCH_COPIED_off(prog);
2053 }
2054 if (flags & REXEC_COPY_STR) {
2055 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2056
2057 s = savepvn(strbeg, i);
2058 prog->subbeg = s;
2059 prog->sublen = i;
2060 RX_MATCH_COPIED_on(prog);
2061 }
2062 else {
2063 prog->subbeg = strbeg;
2064 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2065 }
2066 }
9041c2e3 2067
d6a28714
JH
2068 return 1;
2069
2070phooey:
bf93d4cc
GS
2071 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2072 PL_colors[4],PL_colors[5]));
d6a28714 2073 if (PL_reg_eval_set)
acfe0abc 2074 restore_pos(aTHX_ 0);
d6a28714
JH
2075 return 0;
2076}
2077
2078/*
2079 - regtry - try match at specific point
2080 */
2081STATIC I32 /* 0 failure, 1 success */
2082S_regtry(pTHX_ regexp *prog, char *startpos)
2083{
d6a28714
JH
2084 register I32 i;
2085 register I32 *sp;
2086 register I32 *ep;
2087 CHECKPOINT lastcp;
2088
02db2b7b
IZ
2089#ifdef DEBUGGING
2090 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2091#endif
d6a28714
JH
2092 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2093 MAGIC *mg;
2094
2095 PL_reg_eval_set = RS_init;
2096 DEBUG_r(DEBUG_s(
b900a521
JH
2097 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2098 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2099 ));
e8347627 2100 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2101 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2102 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2103 SAVETMPS;
2104 /* Apparently this is not needed, judging by wantarray. */
e8347627 2105 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2106 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2107
2108 if (PL_reg_sv) {
2109 /* Make $_ available to executed code. */
2110 if (PL_reg_sv != DEFSV) {
4d1ff10f 2111 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
2112 SAVESPTR(DEFSV);
2113 DEFSV = PL_reg_sv;
b8c5462f 2114 }
d6a28714 2115
9041c2e3 2116 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2117 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2118 /* prepare for quick setting of pos */
14befaf4
DM
2119 sv_magic(PL_reg_sv, (SV*)0,
2120 PERL_MAGIC_regex_global, Nullch, 0);
2121 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 2122 mg->mg_len = -1;
b8c5462f 2123 }
d6a28714
JH
2124 PL_reg_magic = mg;
2125 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2126 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2127 }
09687e5a 2128 if (!PL_reg_curpm) {
cd7a8267 2129 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2130#ifdef USE_ITHREADS
2131 {
2132 SV* repointer = newSViv(0);
577e12cc 2133 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2134 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2135 av_push(PL_regex_padav,repointer);
2136 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2137 PL_regex_pad = AvARRAY(PL_regex_padav);
2138 }
2139#endif
2140 }
aaa362c4 2141 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2142 PL_reg_oldcurpm = PL_curpm;
2143 PL_curpm = PL_reg_curpm;
2144 if (RX_MATCH_COPIED(prog)) {
2145 /* Here is a serious problem: we cannot rewrite subbeg,
2146 since it may be needed if this match fails. Thus
2147 $` inside (?{}) could fail... */
2148 PL_reg_oldsaved = prog->subbeg;
2149 PL_reg_oldsavedlen = prog->sublen;
2150 RX_MATCH_COPIED_off(prog);
2151 }
2152 else
2153 PL_reg_oldsaved = Nullch;
2154 prog->subbeg = PL_bostr;
2155 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2156 }
2157 prog->startp[0] = startpos - PL_bostr;
2158 PL_reginput = startpos;
2159 PL_regstartp = prog->startp;
2160 PL_regendp = prog->endp;
2161 PL_reglastparen = &prog->lastparen;
a01268b5 2162 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2163 prog->lastparen = 0;
4f4e7967 2164 prog->lastcloseparen = 0;
d6a28714
JH
2165 PL_regsize = 0;
2166 DEBUG_r(PL_reg_starttry = startpos);
2167 if (PL_reg_start_tmpl <= prog->nparens) {
2168 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2169 if(PL_reg_start_tmp)
2170 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2171 else
cd7a8267 2172 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2173 }
2174
2175 /* XXXX What this code is doing here?!!! There should be no need
2176 to do this again and again, PL_reglastparen should take care of
3dd2943c 2177 this! --ilya*/
dafc8851
JH
2178
2179 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2180 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2181 * PL_reglastparen), is not needed at all by the test suite
2182 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2183 * enough, for building DynaLoader, or otherwise this
2184 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2185 * will happen. Meanwhile, this code *is* needed for the
2186 * above-mentioned test suite tests to succeed. The common theme
2187 * on those tests seems to be returning null fields from matches.
2188 * --jhi */
dafc8851 2189#if 1
d6a28714
JH
2190 sp = prog->startp;
2191 ep = prog->endp;
2192 if (prog->nparens) {
eb160463 2193 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2194 *++sp = -1;
2195 *++ep = -1;
2196 }
2197 }
dafc8851 2198#endif
02db2b7b 2199 REGCP_SET(lastcp);
d6a28714
JH
2200 if (regmatch(prog->program + 1)) {
2201 prog->endp[0] = PL_reginput - PL_bostr;
2202 return 1;
2203 }
02db2b7b 2204 REGCP_UNWIND(lastcp);
d6a28714
JH
2205 return 0;
2206}
2207
02db2b7b
IZ
2208#define RE_UNWIND_BRANCH 1
2209#define RE_UNWIND_BRANCHJ 2
2210
2211union re_unwind_t;
2212
2213typedef struct { /* XX: makes sense to enlarge it... */
2214 I32 type;
2215 I32 prev;
2216 CHECKPOINT lastcp;
2217} re_unwind_generic_t;
2218
2219typedef struct {
2220 I32 type;
2221 I32 prev;
2222 CHECKPOINT lastcp;
2223 I32 lastparen;
2224 regnode *next;
2225 char *locinput;
2226 I32 nextchr;
2227#ifdef DEBUGGING
2228 int regindent;
2229#endif
2230} re_unwind_branch_t;
2231
2232typedef union re_unwind_t {
2233 I32 type;
2234 re_unwind_generic_t generic;
2235 re_unwind_branch_t branch;
2236} re_unwind_t;
2237
8ba1375e
MJD
2238#define sayYES goto yes
2239#define sayNO goto no
e0f9d4a8 2240#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2241#define sayYES_FINAL goto yes_final
2242#define sayYES_LOUD goto yes_loud
2243#define sayNO_FINAL goto no_final
2244#define sayNO_SILENT goto do_no
2245#define saySAME(x) if (x) goto yes; else goto no
2246
6e00f9c6
HS
2247#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2248#define POSCACHE_SEEN 1 /* we know what we're caching */
2249#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2250#define CACHEsayYES STMT_START { \
2251 if (cache_offset | cache_bit) { \
2252 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2253 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2254 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2255 /* cache records failure, but this is success */ \
2256 DEBUG_r( \
2257 PerlIO_printf(Perl_debug_log, \
2258 "%*s (remove success from failure cache)\n", \
2259 REPORT_CODE_OFF+PL_regindent*2, "") \
2260 ); \
2261 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2262 } \
2263 } \
2264 sayYES; \
2265} STMT_END
2266#define CACHEsayNO STMT_START { \
2267 if (cache_offset | cache_bit) { \
2268 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2269 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2270 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2271 /* cache records success, but this is failure */ \
2272 DEBUG_r( \
2273 PerlIO_printf(Perl_debug_log, \
2274 "%*s (remove failure from success cache)\n", \
2275 REPORT_CODE_OFF+PL_regindent*2, "") \
2276 ); \
2277 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2278 } \
2279 } \
2280 sayNO; \
2281} STMT_END
2282
8ba1375e
MJD
2283#define REPORT_CODE_OFF 24
2284
d6a28714
JH
2285/*
2286 - regmatch - main matching routine
2287 *
2288 * Conceptually the strategy is simple: check to see whether the current
2289 * node matches, call self recursively to see whether the rest matches,
2290 * and then act accordingly. In practice we make some effort to avoid
2291 * recursion, in particular by going through "ordinary" nodes (that don't
2292 * need to know whether the rest of the match failed) by a loop instead of
2293 * by recursion.
2294 */
2295/* [lwall] I've hoisted the register declarations to the outer block in order to
2296 * maybe save a little bit of pushing and popping on the stack. It also takes
2297 * advantage of machines that use a register save mask on subroutine entry.
2298 */
2299STATIC I32 /* 0 failure, 1 success */
2300S_regmatch(pTHX_ regnode *prog)
2301{
d6a28714
JH
2302 register regnode *scan; /* Current node. */
2303 regnode *next; /* Next node. */
2304 regnode *inner; /* Next node in internal branch. */
2305 register I32 nextchr; /* renamed nextchr - nextchar colides with
2306 function of same name */
2307 register I32 n; /* no or next */
b7953727
JH
2308 register I32 ln = 0; /* len or last */
2309 register char *s = Nullch; /* operand or save */
d6a28714 2310 register char *locinput = PL_reginput;
b7953727 2311 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2312 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2313 I32 unwind = 0;
b7953727 2314#if 0
02db2b7b 2315 I32 firstcp = PL_savestack_ix;
b7953727 2316#endif
8c18bf38 2317 const register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2318#ifdef DEBUGGING
ce333219
JH
2319 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2320 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2321 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2322#endif
02db2b7b 2323
d6a28714
JH
2324#ifdef DEBUGGING
2325 PL_regindent++;
2326#endif
2327
2328 /* Note that nextchr is a byte even in UTF */
2329 nextchr = UCHARAT(locinput);
2330 scan = prog;
2331 while (scan != NULL) {
8ba1375e 2332
2a782b5b 2333 DEBUG_r( {
d6a28714 2334 SV *prop = sv_newmortal();
228fe6e6
AL
2335 const int docolor = *PL_colors[0];
2336 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2337 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2338 /* The part of the string before starttry has one color
2339 (pref0_len chars), between starttry and current
2340 position another one (pref_len - pref0_len chars),
2341 after the current position the third one.
2342 We assume that pref0_len <= pref_len, otherwise we
2343 decrease pref0_len. */
9041c2e3 2344 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2345 ? (5 + taill) - l : locinput - PL_bostr;
2346 int pref0_len;
d6a28714 2347
df1ffd02 2348 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2349 pref_len++;
2350 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2351 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2352 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2353 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2354 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2355 l--;
d6a28714
JH
2356 if (pref0_len < 0)
2357 pref0_len = 0;
2358 if (pref0_len > pref_len)
2359 pref0_len = pref_len;
2360 regprop(prop, scan);
2a782b5b 2361 {
228fe6e6 2362 const char * const s0 =
e19fe81c 2363 do_utf8 && OP(scan) != CANY ?
2a782b5b 2364 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2365 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2366 locinput - pref_len;
228fe6e6
AL
2367 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2368 const char * const s1 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2369 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2370 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2371 locinput - pref_len + pref0_len;
228fe6e6
AL
2372 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2373 const char * const s2 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2374 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2375 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2376 locinput;
228fe6e6 2377 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2378 PerlIO_printf(Perl_debug_log,
2379 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2380 (IV)(locinput - PL_bostr),
2381 PL_colors[4],
2382 len0, s0,
2383 PL_colors[5],
2384 PL_colors[2],
2385 len1, s1,
2386 PL_colors[3],
2387 (docolor ? "" : "> <"),
2388 PL_colors[0],
2389 len2, s2,
2390 PL_colors[1],
2391 15 - l - pref_len + 1,
2392 "",
2393 (IV)(scan - PL_regprogram), PL_regindent*2, "",
fdac8c4b 2394 SvPVX_const(prop));
2a782b5b
JH
2395 }
2396 });
d6a28714
JH
2397
2398 next = scan + NEXT_OFF(scan);
2399 if (next == scan)
2400 next = NULL;
2401
2402 switch (OP(scan)) {
2403 case BOL:
12d33761
HS
2404 if (locinput == PL_bostr || (PL_multiline &&
2405 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2406 {
2407 /* regtill = regbol; */
b8c5462f
JH
2408 break;
2409 }
d6a28714
JH
2410 sayNO;
2411 case MBOL:
12d33761
HS
2412 if (locinput == PL_bostr ||
2413 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2414 {
b8c5462f
JH
2415 break;
2416 }
d6a28714
JH
2417 sayNO;
2418 case SBOL:
c2a73568 2419 if (locinput == PL_bostr)
b8c5462f 2420 break;
d6a28714
JH
2421 sayNO;
2422 case GPOS:
2423 if (locinput == PL_reg_ganch)
2424 break;
2425 sayNO;
2426 case EOL:
2427 if (PL_multiline)
2428 goto meol;
2429 else
2430 goto seol;
2431 case MEOL:
2432 meol:
2433 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2434 sayNO;
b8c5462f 2435 break;
d6a28714
JH
2436 case SEOL:
2437 seol:
2438 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2439 sayNO;
d6a28714 2440 if (PL_regeol - locinput > 1)
b8c5462f 2441 sayNO;
b8c5462f 2442 break;
d6a28714
JH
2443 case EOS:
2444 if (PL_regeol != locinput)
b8c5462f 2445 sayNO;
d6a28714 2446 break;
ffc61ed2 2447 case SANY:
d6a28714 2448 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2449 sayNO;
f33976b4
DB
2450 if (do_utf8) {
2451 locinput += PL_utf8skip[nextchr];
2452 if (locinput > PL_regeol)
2453 sayNO;
2454 nextchr = UCHARAT(locinput);
2455 }
2456 else
2457 nextchr = UCHARAT(++locinput);
2458 break;
2459 case CANY:
2460 if (!nextchr && locinput >= PL_regeol)
2461 sayNO;
b8c5462f 2462 nextchr = UCHARAT(++locinput);
a0d0e21e 2463 break;
ffc61ed2 2464 case REG_ANY:
1aa99e6b
IH
2465 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2466 sayNO;
2467 if (do_utf8) {
b8c5462f 2468 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2469 if (locinput > PL_regeol)
2470 sayNO;
a0ed51b3 2471 nextchr = UCHARAT(locinput);
a0ed51b3 2472 }
1aa99e6b
IH
2473 else
2474 nextchr = UCHARAT(++locinput);
a0ed51b3 2475 break;
8c18bf38 2476
d6a28714 2477 case EXACT:
cd439c50
IZ
2478 s = STRING(scan);
2479 ln = STR_LEN(scan);
eb160463 2480 if (do_utf8 != UTF) {
bc517b45 2481 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2482 char *l = locinput;
8c18bf38 2483 const char *e = s + ln;
a72c7584 2484
5ff6fc6d
JH
2485 if (do_utf8) {
2486 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2487 while (s < e) {
8c18bf38 2488 STRLEN ulen;
1aa99e6b 2489 if (l >= PL_regeol)
5ff6fc6d
JH
2490 sayNO;
2491 if (NATIVE_TO_UNI(*(U8*)s) !=
a2a469f9 2492 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
872c91ae
JH
2493 ckWARN(WARN_UTF8) ?
2494 0 : UTF8_ALLOW_ANY))
5ff6fc6d 2495 sayNO;
bc517b45 2496 l += ulen;
5ff6fc6d 2497 s ++;
1aa99e6b 2498 }
5ff6fc6d
JH
2499 }
2500 else {
2501 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2502 while (s < e) {
8c18bf38 2503 STRLEN ulen;
1aa99e6b
IH
2504 if (l >= PL_regeol)
2505 sayNO;
5ff6fc6d 2506 if (NATIVE_TO_UNI(*((U8*)l)) !=
a2a469f9 2507 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
872c91ae
JH
2508 ckWARN(WARN_UTF8) ?
2509 0 : UTF8_ALLOW_ANY))
1aa99e6b 2510 sayNO;
bc517b45 2511 s += ulen;
a72c7584 2512 l ++;
1aa99e6b 2513 }
5ff6fc6d 2514 }
1aa99e6b
IH
2515 locinput = l;
2516 nextchr = UCHARAT(locinput);
2517 break;
2518 }
bc517b45 2519 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2520 /* Inline the first character, for speed. */
2521 if (UCHARAT(s) != nextchr)
2522 sayNO;
2523 if (PL_regeol - locinput < ln)
2524 sayNO;
2525 if (ln > 1 && memNE(s, locinput, ln))
2526 sayNO;
2527 locinput += ln;
2528 nextchr = UCHARAT(locinput);
2529 break;
2530 case EXACTFL:
b8c5462f
JH
2531 PL_reg_flags |= RF_tainted;
2532 /* FALL THROUGH */
d6a28714 2533 case EXACTF:
cd439c50
IZ
2534 s = STRING(scan);
2535 ln = STR_LEN(scan);
d6a28714 2536
d07ddd77
JH
2537 if (do_utf8 || UTF) {
2538 /* Either target or the pattern are utf8. */
d6a28714 2539 char *l = locinput;
d07ddd77 2540 char *e = PL_regeol;
bc517b45 2541
eb160463 2542 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 2543 l, &e, 0, do_utf8)) {
5486206c
JH
2544 /* One more case for the sharp s:
2545 * pack("U0U*", 0xDF) =~ /ss/i,
2546 * the 0xC3 0x9F are the UTF-8
2547 * byte sequence for the U+00DF. */
2548 if (!(do_utf8 &&
2549 toLOWER(s[0]) == 's' &&
2550 ln >= 2 &&
2551 toLOWER(s[1]) == 's' &&
2552 (U8)l[0] == 0xC3 &&
2553 e - l >= 2 &&
2554 (U8)l[1] == 0x9F))
2555 sayNO;
2556 }
d07ddd77
JH
2557 locinput = e;
2558 nextchr = UCHARAT(locinput);
2559 break;
a0ed51b3 2560 }
d6a28714 2561
bc517b45
JH
2562 /* Neither the target and the pattern are utf8. */
2563
d6a28714
JH
2564 /* Inline the first character, for speed. */
2565 if (UCHARAT(s) != nextchr &&
2566 UCHARAT(s) != ((OP(scan) == EXACTF)
2567 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2568 sayNO;
d6a28714 2569 if (PL_regeol - locinput < ln)
b8c5462f 2570 sayNO;
d6a28714
JH
2571 if (ln > 1 && (OP(scan) == EXACTF
2572 ? ibcmp(s, locinput, ln)
2573 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2574 sayNO;
d6a28714
JH
2575 locinput += ln;
2576 nextchr = UCHARAT(locinput);
a0d0e21e 2577 break;
d6a28714 2578 case ANYOF:
ffc61ed2 2579 if (do_utf8) {
9e55ce06
JH
2580 STRLEN inclasslen = PL_regeol - locinput;
2581
ba7b4546 2582 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2583 sayNO_ANYOF;
ffc61ed2
JH
2584 if (locinput >= PL_regeol)
2585 sayNO;
0f0076b4 2586 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2587 nextchr = UCHARAT(locinput);
e0f9d4a8 2588 break;
ffc61ed2
JH
2589 }
2590 else {
2591 if (nextchr < 0)
2592 nextchr = UCHARAT(locinput);
7d3e948e 2593 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2594 sayNO_ANYOF;
ffc61ed2
JH
2595 if (!nextchr && locinput >= PL_regeol)
2596 sayNO;
2597 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2598 break;
2599 }
2600 no_anyof:
2601 /* If we might have the case of the German sharp s
2602 * in a casefolding Unicode character class. */
2603
ebc501f0
JH
2604 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2605 locinput += SHARP_S_SKIP;
e0f9d4a8 2606 nextchr = UCHARAT(locinput);
ffc61ed2 2607 }
e0f9d4a8
JH
2608 else
2609 sayNO;
b8c5462f 2610 break;
d6a28714 2611 case ALNUML:
b8c5462f
JH
2612 PL_reg_flags |= RF_tainted;
2613 /* FALL THROUGH */
d6a28714 2614 case ALNUM:
b8c5462f 2615 if (!nextchr)
4633a7c4 2616 sayNO;
ffc61ed2 2617 if (do_utf8) {
fab1f349 2618 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2619 if (!(OP(scan) == ALNUM
3568d838 2620 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2621 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2622 {
2623 sayNO;
a0ed51b3 2624 }
b8c5462f 2625 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2626 nextchr = UCHARAT(locinput);
2627 break;
2628 }
ffc61ed2 2629 if (!(OP(scan) == ALNUM
d6a28714 2630 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2631 sayNO;
b8c5462f 2632 nextchr = UCHARAT(++locinput);
a0d0e21e 2633 break;
d6a28714 2634 case NALNUML:
b8c5462f
JH
2635 PL_reg_flags |= RF_tainted;
2636 /* FALL THROUGH */
d6a28714
JH
2637 case NALNUM:
2638 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2639 sayNO;
ffc61ed2 2640 if (do_utf8) {
fab1f349 2641 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2642 if (OP(scan) == NALNUM
3568d838 2643 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2644 : isALNUM_LC_utf8((U8*)locinput))
2645 {
b8c5462f 2646 sayNO;
d6a28714 2647 }
b8c5462f
JH
2648 locinput += PL_utf8skip[nextchr];
2649 nextchr = UCHARAT(locinput);
2650 break;
2651 }
ffc61ed2 2652 if (OP(scan) == NALNUM
d6a28714 2653 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2654 sayNO;
76e3520e 2655 nextchr = UCHARAT(++locinput);
a0d0e21e 2656 break;
d6a28714
JH
2657 case BOUNDL:
2658 case NBOUNDL:
3280af22 2659 PL_reg_flags |= RF_tainted;
bbce6d69 2660 /* FALL THROUGH */
d6a28714
JH
2661 case BOUND:
2662 case NBOUND:
2663 /* was last char in word? */
ffc61ed2 2664 if (do_utf8) {
12d33761
HS
2665 if (locinput == PL_bostr)
2666 ln = '\n';
ffc61ed2 2667 else {
8c18bf38 2668 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 2669
b4f7163a 2670 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
2671 }
2672 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2673 ln = isALNUM_uni(ln);
fab1f349 2674 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 2675 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2676 }
2677 else {
9041c2e3 2678 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2679 n = isALNUM_LC_utf8((U8*)locinput);
2680 }
a0ed51b3 2681 }
d6a28714 2682 else {
12d33761
HS
2683 ln = (locinput != PL_bostr) ?
2684 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2685 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2686 ln = isALNUM(ln);
2687 n = isALNUM(nextchr);
2688 }
2689 else {
2690 ln = isALNUM_LC(ln);
2691 n = isALNUM_LC(nextchr);
2692 }
d6a28714 2693 }
ffc61ed2
JH
2694 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2695 OP(scan) == BOUNDL))
2696 sayNO;
a0ed51b3 2697 break;
d6a28714 2698 case SPACEL:
3280af22 2699 PL_reg_flags |= RF_tainted;
bbce6d69 2700 /* FALL THROUGH */
d6a28714 2701 case SPACE:
9442cb0e 2702 if (!nextchr)
4633a7c4 2703 sayNO;
1aa99e6b 2704 if (do_utf8) {
fd400ab9 2705 if (UTF8_IS_CONTINUED(nextchr)) {
fab1f349 2706 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 2707 if (!(OP(scan) == SPACE
3568d838 2708 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2709 : isSPACE_LC_utf8((U8*)locinput)))
2710 {
2711 sayNO;
2712 }
2713 locinput += PL_utf8skip[nextchr];
2714 nextchr = UCHARAT(locinput);
2715 break;
d6a28714 2716 }
ffc61ed2
JH
2717 if (!(OP(scan) == SPACE
2718 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2719 sayNO;
2720 nextchr = UCHARAT(++locinput);
2721 }
2722 else {
2723 if (!(OP(scan) == SPACE
2724 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2725 sayNO;
2726 nextchr = UCHARAT(++locinput);
a0ed51b3 2727 }
a0ed51b3 2728 break;
d6a28714 2729 case NSPACEL:
3280af22 2730 PL_reg_flags |= RF_tainted;
bbce6d69 2731 /* FALL THROUGH */
d6a28714 2732 case NSPACE:
9442cb0e 2733 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2734 sayNO;
1aa99e6b 2735 if (do_utf8) {
fab1f349 2736 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 2737 if (OP(scan) == NSPACE
3568d838 2738 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2739 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2740 {
2741 sayNO;
2742 }
2743 locinput += PL_utf8skip[nextchr];
2744 nextchr = UCHARAT(locinput);
2745 break;
a0ed51b3 2746 }
ffc61ed2 2747 if (OP(scan) == NSPACE
d6a28714 2748 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2749 sayNO;
76e3520e 2750 nextchr = UCHARAT(++locinput);
a0d0e21e 2751 break;
d6a28714 2752 case DIGITL:
a0ed51b3
LW
2753 PL_reg_flags |= RF_tainted;
2754 /* FALL THROUGH */
d6a28714 2755 case DIGIT:
9442cb0e 2756 if (!nextchr)
a0ed51b3 2757 sayNO;
1aa99e6b 2758 if (do_utf8) {
fab1f349 2759 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 2760 if (!(OP(scan) == DIGIT
3568d838 2761 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2762 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2763 {
a0ed51b3 2764 sayNO;
dfe13c55 2765 }
6f06b55f 2766 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2767 nextchr = UCHARAT(locinput);
2768 break;
2769 }
ffc61ed2 2770 if (!(OP(scan) == DIGIT
9442cb0e 2771 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2772 sayNO;
2773 nextchr = UCHARAT(++locinput);
2774 break;
d6a28714 2775 case NDIGITL:
b8c5462f
JH
2776 PL_reg_flags |= RF_tainted;
2777 /* FALL THROUGH */
d6a28714 2778 case NDIGIT:
9442cb0e 2779 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2780 sayNO;
1aa99e6b 2781 if (do_utf8) {
fab1f349 2782 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 2783 if (OP(scan) == NDIGIT
3568d838 2784 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2785 : isDIGIT_LC_utf8((U8*)locinput))
2786 {
a0ed51b3 2787 sayNO;
9442cb0e 2788 }
6f06b55f 2789 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2790 nextchr = UCHARAT(locinput);
2791 break;
2792 }
ffc61ed2 2793 if (OP(scan) == NDIGIT
9442cb0e 2794 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2795 sayNO;
2796 nextchr = UCHARAT(++locinput);
2797 break;
2798 case CLUMP:
b7c83a7e 2799 if (locinput >= PL_regeol)
a0ed51b3 2800 sayNO;
b7c83a7e 2801 if (do_utf8) {
fab1f349 2802 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
2803 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2804 sayNO;
2805 locinput += PL_utf8skip[nextchr];
2806 while (locinput < PL_regeol &&
2807 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2808 locinput += UTF8SKIP(locinput);
2809 if (locinput > PL_regeol)
2810 sayNO;
eb08e2da
JH
2811 }
2812 else
2813 locinput++;
a0ed51b3
LW
2814 nextchr = UCHARAT(locinput);
2815 break;
c8756f30 2816 case REFFL:
3280af22 2817 PL_reg_flags |= RF_tainted;
c8756f30 2818 /* FALL THROUGH */
c277df42 2819 case REF:
c8756f30 2820 case REFF:
c277df42 2821 n = ARG(scan); /* which paren pair */
cf93c79d 2822 ln = PL_regstartp[n];
2c2d71f5 2823 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
eb160463 2824 if ((I32)*PL_reglastparen < n || ln == -1)
af3f8c16 2825 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2826 if (ln == PL_regendp[n])
a0d0e21e 2827 break;
a0ed51b3 2828
cf93c79d 2829 s = PL_bostr + ln;
1aa99e6b 2830 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2831 char *l = locinput;
8c18bf38 2832 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2833 /*
2834 * Note that we can't do the "other character" lookup trick as
2835 * in the 8-bit case (no pun intended) because in Unicode we
2836 * have to map both upper and title case to lower case.
2837 */
2838 if (OP(scan) == REFF) {
2839 while (s < e) {
8c18bf38
AL
2840 STRLEN ulen1, ulen2;
2841 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
2842 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
2843
a0ed51b3
LW
2844 if (l >= PL_regeol)
2845 sayNO;
a2a2844f
JH
2846 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2847 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2848 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2849 sayNO;
a2a2844f
JH
2850 s += ulen1;
2851 l += ulen2;
a0ed51b3
LW
2852 }
2853 }
2854 locinput = l;
2855 nextchr = UCHARAT(locinput);
2856 break;
2857 }
2858
a0d0e21e 2859 /* Inline the first character, for speed. */
76e3520e 2860 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2861 (OP(scan) == REF ||
2862 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2863 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2864 sayNO;
cf93c79d 2865 ln = PL_regendp[n] - ln;
3280af22 2866 if (locinput + ln > PL_regeol)
4633a7c4 2867 sayNO;
c8756f30
AK
2868 if (ln > 1 && (OP(scan) == REF
2869 ? memNE(s, locinput, ln)
2870 : (OP(scan) == REFF
2871 ? ibcmp(s, locinput, ln)
2872 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2873 sayNO;
a0d0e21e 2874 locinput += ln;
76e3520e 2875 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2876 break;
2877
2878 case NOTHING:
c277df42 2879 case TAIL:
a0d0e21e
LW
2880 break;
2881 case BACK:
2882 break;
c277df42
IZ
2883 case EVAL:
2884 {
2885 dSP;
533c011a 2886 OP_4tree *oop = PL_op;
3280af22 2887 COP *ocurcop = PL_curcop;
d7afa7f5 2888 PAD *old_comppad;
c277df42 2889 SV *ret;
9b8ea7f3 2890 struct regexp *oreg = PL_reg_re;
9041c2e3 2891
c277df42 2892 n = ARG(scan);
533c011a 2893 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2894 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
d7afa7f5 2895 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 2896 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2897
8e5e9ebe
RGS
2898 {
2899 SV **before = SP;
2900 CALLRUNOPS(aTHX); /* Scalar context. */
2901 SPAGAIN;
2902 if (SP == before)
075aa684 2903 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
2904 else {
2905 ret = POPs;
2906 PUTBACK;
2907 }
2908 }
2909
0f5d15d6 2910 PL_op = oop;
d7afa7f5 2911 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 2912 PL_curcop = ocurcop;
c277df42 2913 if (logical) {
0f5d15d6
IZ
2914 if (logical == 2) { /* Postponed subexpression. */
2915 regexp *re;
22c35a8c 2916 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2917 re_cc_state state;
0f5d15d6 2918 CHECKPOINT cp, lastcp;
5b7ea690 2919 int toggleutf;
5835a535 2920 register SV *sv;
0f5d15d6 2921
5835a535
JH
2922 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2923 mg = mg_find(sv, PERL_MAGIC_qr);
2924 else if (SvSMAGICAL(ret)) {
2925 if (SvGMAGICAL(ret))
2926 sv_unmagic(ret, PERL_MAGIC_qr);
2927 else
2928 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 2929 }
5835a535 2930
0f5d15d6
IZ
2931 if (mg) {
2932 re = (regexp *)mg->mg_obj;
df0003d4 2933 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2934 }
2935 else {
2936 STRLEN len;
c06c673c 2937 const char *t = SvPV_const(ret, len);
0f5d15d6 2938 PMOP pm;
8c18bf38
AL
2939 char * const oprecomp = PL_regprecomp;
2940 const I32 osize = PL_regsize;
2941 const I32 onpar = PL_regnpar;
0f5d15d6 2942
5fcd1c1b 2943 Zero(&pm, 1, PMOP);
5b7ea690 2944 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
c06c673c 2945 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 2946 if (!(SvFLAGS(ret)
5835a535
JH
2947 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2948 | SVs_GMG)))
14befaf4
DM
2949 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2950 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2951 PL_regprecomp = oprecomp;
2952 PL_regsize = osize;
2953 PL_regnpar = onpar;
2954 }
2955 DEBUG_r(
9041c2e3 2956 PerlIO_printf(Perl_debug_log,
5332c881 2957 "Entering embedded \"%s%.60s%s%s\"\n",
0f5d15d6
IZ
2958 PL_colors[0],
2959 re->precomp,
2960 PL_colors[1],
2961 (strlen(re->precomp) > 60 ? "..." : ""))
2962 );
2963 state.node = next;
2964 state.prev = PL_reg_call_cc;
2965 state.cc = PL_regcc;
2966 state.re = PL_reg_re;
2967
2ab05381 2968 PL_regcc = 0;
9041c2e3 2969
0f5d15d6 2970 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2971 REGCP_SET(lastcp);
0f5d15d6
IZ
2972 cache_re(re);
2973 state.ss = PL_savestack_ix;
2974 *PL_reglastparen = 0;
a01268b5 2975 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2976 PL_reg_call_cc = &state;
2977 PL_reginput = locinput;
5b7ea690
JH
2978 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2979 ((re->reganch & ROPT_UTF8) != 0);
2980 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
2981
2982 /* XXXX This is too dramatic a measure... */
2983 PL_reg_maxiter = 0;
2984
0f5d15d6 2985 if (regmatch(re->program + 1)) {
2c914db6
IZ
2986 /* Even though we succeeded, we need to restore
2987 global variables, since we may be wrapped inside
2988 SUSPEND, thus the match may be not finished yet. */
2989
2990 /* XXXX Do this only if SUSPENDed? */
2991 PL_reg_call_cc = state.prev;
2992 PL_regcc = state.cc;
2993 PL_reg_re = state.re;
2994 cache_re(PL_reg_re);
5b7ea690 2995 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
2996
2997 /* XXXX This is too dramatic a measure... */
2998 PL_reg_maxiter = 0;
2999
3000 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
3001 ReREFCNT_dec(re);
3002 regcpblow(cp);
3003 sayYES;
3004 }
0f5d15d6 3005 ReREFCNT_dec(re);
02db2b7b 3006 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3007 regcppop();
3008 PL_reg_call_cc = state.prev;
3009 PL_regcc = state.cc;
3010 PL_reg_re = state.re;
d3790889 3011 cache_re(PL_reg_re);
5b7ea690 3012 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3013
3014 /* XXXX This is too dramatic a measure... */
3015 PL_reg_maxiter = 0;
3016
8e514ae6 3017 logical = 0;
0f5d15d6
IZ
3018 sayNO;
3019 }
c277df42 3020 sw = SvTRUE(ret);
0f5d15d6 3021 logical = 0;
a0ed51b3 3022 }
9b8ea7f3 3023 else {
3280af22 3024 sv_setsv(save_scalar(PL_replgv), ret);
9b8ea7f3
JH
3025 cache_re(oreg);
3026 }
c277df42
IZ
3027 break;
3028 }
a0d0e21e 3029 case OPEN:
c277df42 3030 n = ARG(scan); /* which paren pair */
3280af22
NIS
3031 PL_reg_start_tmp[n] = locinput;
3032 if (n > PL_regsize)
3033 PL_regsize = n;
a0d0e21e
LW
3034 break;
3035 case CLOSE:
c277df42 3036 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3037 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3038 PL_regendp[n] = locinput - PL_bostr;
eb160463 3039 if (n > (I32)*PL_reglastparen)
3280af22 3040 *PL_reglastparen = n;
a01268b5 3041 *PL_reglastcloseparen = n;
a0d0e21e 3042 break;
c277df42
IZ
3043 case GROUPP:
3044 n = ARG(scan); /* which paren pair */
eb160463 3045 sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3046 break;
3047 case IFTHEN:
2c2d71f5 3048 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
3049 if (sw)
3050 next = NEXTOPER(NEXTOPER(scan));
3051 else {
3052 next = scan + ARG(scan);
3053 if (OP(next) == IFTHEN) /* Fake one. */
3054 next = NEXTOPER(NEXTOPER(next));
3055 }
3056 break;
3057 case LOGICAL:
0f5d15d6 3058 logical = scan->flags;
c277df42 3059 break;
2ab05381
IZ
3060/*******************************************************************
3061 PL_regcc contains infoblock about the innermost (...)* loop, and
3062 a pointer to the next outer infoblock.
3063
3064 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3065
3066 1) After matching X, regnode for CURLYX is processed;
3067
9041c2e3 3068 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
3069 regmatch() recursively with the starting point at WHILEM node;
3070
3071 3) Each hit of WHILEM node tries to match A and Z (in the order
3072 depending on the current iteration, min/max of {min,max} and
3073 greediness). The information about where are nodes for "A"
3074 and "Z" is read from the infoblock, as is info on how many times "A"
3075 was already matched, and greediness.
3076
3077 4) After A matches, the same WHILEM node is hit again.
3078
3079 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3080 of the same pair. Thus when WHILEM tries to match Z, it temporarily
3081 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3082 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3083 of the external loop.
3084
3085 Currently present infoblocks form a tree with a stem formed by PL_curcc
3086 and whatever it mentions via ->next, and additional attached trees
3087 corresponding to temporarily unset infoblocks as in "5" above.
3088
9041c2e3 3089 In the following picture infoblocks for outer loop of
2ab05381
IZ
3090 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3091 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3092 infoblocks are drawn below the "reset" infoblock.
3093
3094 In fact in the picture below we do not show failed matches for Z and T
3095 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3096 more obvious *why* one needs to *temporary* unset infoblocks.]
3097
3098 Matched REx position InfoBlocks Comment
3099 (Y(A)*?Z)*?T x
3100 Y(A)*?Z)*?T x <- O
3101 Y (A)*?Z)*?T x <- O
3102 Y A)*?Z)*?T x <- O <- I
3103 YA )*?Z)*?T x <- O <- I
3104 YA A)*?Z)*?T x <- O <- I
3105 YAA )*?Z)*?T x <- O <- I
3106 YAA Z)*?T x <- O # Temporary unset I
3107 I
3108
3109 YAAZ Y(A)*?Z)*?T x <- O
3110 I
3111
3112 YAAZY (A)*?Z)*?T x <- O
3113 I
3114
3115 YAAZY A)*?Z)*?T x <- O <- I
3116 I
3117
3118 YAAZYA )*?Z)*?T x <- O <- I
3119 I
3120
3121 YAAZYA Z)*?T x <- O # Temporary unset I
3122 I,I
3123
3124 YAAZYAZ )*?T x <- O
3125 I,I
3126
3127 YAAZYAZ T x # Temporary unset O
3128 O
3129 I,I
3130
3131 YAAZYAZT x
3132 O
3133 I,I
3134 *******************************************************************/
a0d0e21e
LW
3135 case CURLYX: {
3136 CURCUR cc;
3280af22 3137 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
3138 /* No need to save/restore up to this paren */
3139 I32 parenfloor = scan->flags;
c277df42
IZ
3140
3141 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3142 next += ARG(next);
3280af22
NIS
3143 cc.oldcc = PL_regcc;
3144 PL_regcc = &cc;
cb434fcc
IZ
3145 /* XXXX Probably it is better to teach regpush to support
3146 parenfloor > PL_regsize... */
eb160463 3147 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc
IZ
3148 parenfloor = *PL_reglastparen; /* Pessimization... */
3149 cc.parenfloor = parenfloor;
a0d0e21e
LW
3150 cc.cur = -1;
3151 cc.min = ARG1(scan);
3152 cc.max = ARG2(scan);
c277df42 3153 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3154 cc.next = next;
3155 cc.minmod = minmod;
3156 cc.lastloc = 0;
3280af22 3157 PL_reginput = locinput;
a0d0e21e
LW
3158 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3159 regcpblow(cp);
3280af22 3160 PL_regcc = cc.oldcc;
4633a7c4 3161 saySAME(n);
a0d0e21e
LW
3162 }
3163 /* NOT REACHED */
3164 case WHILEM: {
3165 /*
3166 * This is really hard to understand, because after we match
3167 * what we're trying to match, we must make sure the rest of
2c2d71f5 3168 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3169 * to go back UP the parse tree by recursing ever deeper. And
3170 * if it fails, we have to reset our parent's current state
3171 * that we can try again after backing off.
3172 */
3173
c277df42 3174 CHECKPOINT cp, lastcp;
3280af22 3175 CURCUR* cc = PL_regcc;
c277df42 3176 char *lastloc = cc->lastloc; /* Detection of 0-len. */
6e00f9c6 3177 I32 cache_offset = 0, cache_bit = 0;
c277df42 3178
4633a7c4 3179 n = cc->cur + 1; /* how many we know we matched */
3280af22 3180 PL_reginput = locinput;
a0d0e21e 3181
c277df42 3182 DEBUG_r(
9041c2e3 3183 PerlIO_printf(Perl_debug_log,
91f3b821 3184 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3185 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3186 (long)n, (long)cc->min,
2797576d 3187 (long)cc->max, PTR2UV(cc))
c277df42 3188 );
4633a7c4 3189
a0d0e21e
LW
3190 /* If degenerate scan matches "", assume scan done. */
3191
579cf2c3 3192 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3193 PL_regcc = cc->oldcc;
2ab05381
IZ
3194 if (PL_regcc)
3195 ln = PL_regcc->cur;
c277df42 3196 DEBUG_r(
c3464db5
DD
3197 PerlIO_printf(Perl_debug_log,
3198 "%*s empty match detected, try continuation...\n",
3280af22 3199 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3200 );
a0d0e21e 3201 if (regmatch(cc->next))
4633a7c4 3202 sayYES;
2ab05381
IZ
3203 if (PL_regcc)
3204 PL_regcc->cur = ln;
3280af22 3205 PL_regcc = cc;
4633a7c4 3206 sayNO;
a0d0e21e
LW
3207 }
3208
3209 /* First just match a string of min scans. */
3210
3211 if (n < cc->min) {
3212 cc->cur = n;
3213 cc->lastloc = locinput;
4633a7c4
LW
3214 if (regmatch(cc->scan))
3215 sayYES;
3216 cc->cur = n - 1;
c277df42 3217 cc->lastloc = lastloc;
4633a7c4 3218 sayNO;
a0d0e21e
LW
3219 }
3220
2c2d71f5
JH
3221 if (scan->flags) {
3222 /* Check whether we already were at this position.
3223 Postpone detection until we know the match is not
3224 *that* much linear. */
3225 if (!PL_reg_maxiter) {
3226 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3227 PL_reg_leftiter = PL_reg_maxiter;
3228 }
3229 if (PL_reg_leftiter-- == 0) {
8c18bf38 3230 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3231 if (PL_reg_poscache) {
eb160463 3232 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3233 Renew(PL_reg_poscache, size, char);
3234 PL_reg_poscache_size = size;
3235 }
3236 Zero(PL_reg_poscache, size, char);
3237 }
3238 else {
3239 PL_reg_poscache_size = size;
cd7a8267 3240 Newxz(PL_reg_poscache, size, char);
2c2d71f5
JH
3241 }
3242 DEBUG_r(
3243 PerlIO_printf(Perl_debug_log,
3244 "%sDetected a super-linear match, switching on caching%s...\n",
3245 PL_colors[4], PL_colors[5])
3246 );
3247 }
3248 if (PL_reg_leftiter < 0) {
6e00f9c6 3249 cache_offset = locinput - PL_bostr;
2c2d71f5 3250
6e00f9c6
HS
3251 cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3252 + cache_offset * (scan->flags>>4);
3253 cache_bit = cache_offset % 8;
3254 cache_offset /= 8;
3255 if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
2c2d71f5
JH
3256 DEBUG_r(
3257 PerlIO_printf(Perl_debug_log,
3258 "%*s already tried at this position...\n",
3259 REPORT_CODE_OFF+PL_regindent*2, "")
3260 );
6e00f9c6
HS
3261 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3262 /* cache records success */
91513146
JH
3263 sayYES;
3264 else
6e00f9c6 3265 /* cache records failure */
91513146 3266 sayNO_SILENT;
2c2d71f5 3267 }
6e00f9c6 3268 PL_reg_poscache[cache_offset] |= (1<<cache_bit);
2c2d71f5
JH
3269 }
3270 }
3271
a0d0e21e
LW
3272 /* Prefer next over scan for minimal matching. */
3273
3274 if (cc->minmod) {
3280af22 3275 PL_regcc = cc->oldcc;
2ab05381
IZ
3276 if (PL_regcc)
3277 ln = PL_regcc->cur;
5f05dabc 3278 cp = regcppush(cc->parenfloor);
02db2b7b 3279 REGCP_SET(lastcp);
5f05dabc 3280 if (regmatch(cc->next)) {
c277df42 3281 regcpblow(cp);
6e00f9c6 3282 CACHEsayYES; /* All done. */
5f05dabc 3283 }
02db2b7b 3284 REGCP_UNWIND(lastcp);
5f05dabc 3285 regcppop();
2ab05381
IZ
3286 if (PL_regcc)
3287 PL_regcc->cur = ln;
3280af22 3288 PL_regcc = cc;
a0d0e21e 3289
c277df42 3290 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3291 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3292 && !(PL_reg_flags & RF_warned)) {
3293 PL_reg_flags |= RF_warned;
9014280d 3294 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3295 "Complex regular subexpression recursion",
3296 REG_INFTY - 1);
c277df42 3297 }
6e00f9c6 3298 CACHEsayNO;
c277df42 3299 }
a687059c 3300
c277df42 3301 DEBUG_r(
c3464db5
DD
3302 PerlIO_printf(Perl_debug_log,
3303 "%*s trying longer...\n",
3280af22 3304 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3305 );
a0d0e21e 3306 /* Try scanning more and see if it helps. */
3280af22 3307 PL_reginput = locinput;
a0d0e21e
LW
3308 cc->cur = n;
3309 cc->lastloc = locinput;
5f05dabc 3310 cp = regcppush(cc->parenfloor);
02db2b7b 3311 REGCP_SET(lastcp);
5f05dabc 3312 if (regmatch(cc->scan)) {
c277df42 3313 regcpblow(cp);
6e00f9c6 3314 CACHEsayYES;
5f05dabc 3315 }
02db2b7b 3316 REGCP_UNWIND(lastcp);
5f05dabc 3317 regcppop();
4633a7c4 3318 cc->cur = n - 1;
c277df42 3319 cc->lastloc = lastloc;
6e00f9c6 3320 CACHEsayNO;
a0d0e21e
LW
3321 }
3322
3323 /* Prefer scan over next for maximal matching. */
3324
3325 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3326 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3327 cc->cur = n;
3328 cc->lastloc = locinput;
02db2b7b 3329 REGCP_SET(lastcp);
5f05dabc 3330 if (regmatch(cc->scan)) {
c277df42 3331 regcpblow(cp);
6e00f9c6 3332 CACHEsayYES;
5f05dabc 3333 }
02db2b7b 3334 REGCP_UNWIND(lastcp);
a0d0e21e 3335 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3336 PL_reginput = locinput;
c277df42 3337 DEBUG_r(
c3464db5
DD
3338 PerlIO_printf(Perl_debug_log,
3339 "%*s failed, try continuation...\n",
3280af22 3340 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3341 );
3342 }
9041c2e3 3343 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3344 && !(PL_reg_flags & RF_warned)) {
3280af22 3345 PL_reg_flags |= RF_warned;
9014280d 3346 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3347 "Complex regular subexpression recursion",
3348 REG_INFTY - 1);
a0d0e21e
LW
3349 }
3350
3351 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3352 PL_regcc = cc->oldcc;
2ab05381
IZ
3353 if (PL_regcc)
3354 ln = PL_regcc->cur;
a0d0e21e 3355 if (regmatch(cc->next))
6e00f9c6 3356 CACHEsayYES;
2ab05381
IZ
3357 if (PL_regcc)
3358 PL_regcc->cur = ln;
3280af22 3359 PL_regcc = cc;
4633a7c4 3360 cc->cur = n - 1;
c277df42 3361 cc->lastloc = lastloc;
6e00f9c6 3362 CACHEsayNO;
a0d0e21e
LW
3363 }
3364 /* NOT REACHED */
9041c2e3 3365 case BRANCHJ:
c277df42
IZ
3366 next = scan + ARG(scan);
3367 if (next == scan)
3368 next = NULL;
3369 inner = NEXTOPER(NEXTOPER(scan));
3370 goto do_branch;
9041c2e3 3371 case BRANCH:
c277df42
IZ
3372 inner = NEXTOPER(scan);
3373 do_branch:
3374 {
c277df42
IZ
3375 c1 = OP(scan);
3376 if (OP(next) != c1) /* No choice. */
3377 next = inner; /* Avoid recursion. */
a0d0e21e 3378 else {
8c18bf38 3379 const I32 lastparen = *PL_reglastparen;
02db2b7b
IZ
3380 I32 unwind1;
3381 re_unwind_branch_t *uw;
3382
3383 /* Put unwinding data on stack */
3384 unwind1 = SSNEWt(1,re_unwind_branch_t);
3385 uw = SSPTRt(unwind1,re_unwind_branch_t);
3386 uw->prev = unwind;
3387 unwind = unwind1;
3388 uw->type = ((c1 == BRANCH)
3389 ? RE_UNWIND_BRANCH
3390 : RE_UNWIND_BRANCHJ);
3391 uw->lastparen = lastparen;
3392 uw->next = next;
3393 uw->locinput = locinput;
3394 uw->nextchr = nextchr;
3395#ifdef DEBUGGING
3396 uw->regindent = ++PL_regindent;
3397#endif
c277df42 3398
02db2b7b
IZ
3399 REGCP_SET(uw->lastcp);
3400
3401 /* Now go into the first branch */
3402 next = inner;
a687059c 3403 }
a0d0e21e
LW
3404 }
3405 break;
3406 case MINMOD:
3407 minmod = 1;
3408 break;
c277df42
IZ
3409 case CURLYM:
3410 {
00db4c45 3411 I32 l = 0;
c277df42 3412 CHECKPOINT lastcp;
9041c2e3 3413
c277df42 3414 /* We suppose that the next guy does not need
b035a42e 3415 backtracking: in particular, it is of constant non-zero length,
c277df42
IZ
3416 and has no parenths to influence future backrefs. */
3417 ln = ARG1(scan); /* min to match */
3418 n = ARG2(scan); /* max to match */
c277df42
IZ
3419 paren = scan->flags;
3420 if (paren) {
3280af22
NIS
3421 if (paren > PL_regsize)
3422 PL_regsize = paren;
eb160463 3423 if (paren > (I32)*PL_reglastparen)
3280af22 3424 *PL_reglastparen = paren;
c277df42 3425 }
dc45a647 3426 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3427 if (paren)
3428 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3429 PL_reginput = locinput;
c277df42
IZ
3430 if (minmod) {
3431 minmod = 0;
3432 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3433 sayNO;
3280af22 3434 locinput = PL_reginput;
cca55fe3 3435 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3436 regnode *text_node = next;
3437
cca55fe3 3438 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3439
cca55fe3 3440 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3441 else {
cca55fe3 3442 if (PL_regkind[(U8)OP(text_node)] == REF) {
c061b110
JH
3443 c1 = c2 = -1000;
3444 goto assume_ok_MM;
cca55fe3
JP
3445 }
3446 else { c1 = (U8)*STRING(text_node); }
af5decee 3447 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3448 c2 = PL_fold[c1];
af5decee 3449 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3450 c2 = PL_fold_locale[c1];
3451 else
3452 c2 = c1;
3453 }
a0ed51b3
LW
3454 }
3455 else
c277df42 3456 c1 = c2 = -1000;
cca55fe3 3457 assume_ok_MM:
02db2b7b 3458 REGCP_SET(lastcp);
b035a42e 3459 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
c277df42
IZ
3460 /* If it could work, try it. */
3461 if (c1 == -1000 ||
3280af22
NIS
3462 UCHARAT(PL_reginput) == c1 ||
3463 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3464 {
3465 if (paren) {
f31a99c8 3466 if (ln) {
cf93c79d
IZ
3467 PL_regstartp[paren] =
3468 HOPc(PL_reginput, -l) - PL_bostr;
3469 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3470 }
3471 else
cf93c79d 3472 PL_regendp[paren] = -1;
c277df42
IZ
3473 }
3474 if (regmatch(next))
3475 sayYES;
02db2b7b 3476 REGCP_UNWIND(lastcp);
c277df42
IZ
3477 }
3478 /* Couldn't or didn't -- move forward. */
3280af22 3479 PL_reginput = locinput;
c277df42
IZ
3480 if (regrepeat_hard(scan, 1, &l)) {
3481 ln++;
3280af22 3482 locinput = PL_reginput;
c277df42
IZ
3483 }
3484 else
3485 sayNO;
3486 }
a0ed51b3
LW
3487 }
3488 else {
c277df42 3489 n = regrepeat_hard(scan, n, &l);
3280af22 3490 locinput = PL_reginput;
c277df42 3491 DEBUG_r(
5c0ca799 3492 PerlIO_printf(Perl_debug_log,
faccc32b 3493 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3494 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3495 (IV) n, (IV)l)
c277df42
IZ
3496 );
3497 if (n >= ln) {
cca55fe3 3498 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3499 regnode *text_node = next;
3500
cca55fe3 3501 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3502
cca55fe3 3503 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3504 else {
cca55fe3 3505 if (PL_regkind[(U8)OP(text_node)] == REF) {
c061b110
JH
3506 c1 = c2 = -1000;
3507 goto assume_ok_REG;
cca55fe3
JP
3508 }
3509 else { c1 = (U8)*STRING(text_node); }
3510
af5decee 3511 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3512 c2 = PL_fold[c1];
af5decee 3513 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3514 c2 = PL_fold_locale[c1];
3515 else
3516 c2 = c1;
3517 }
a0ed51b3
LW
3518 }
3519 else
c277df42
IZ
3520 c1 = c2 = -1000;
3521 }
cca55fe3 3522 assume_ok_REG:
02db2b7b 3523 REGCP_SET(lastcp);
c277df42
IZ
3524 while (n >= ln) {
3525 /* If it could work, try it. */
3526 if (c1 == -1000 ||
3280af22
NIS
3527 UCHARAT(PL_reginput) == c1 ||
3528 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3529 {
3530 DEBUG_r(
c3464db5 3531 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3532 "%*s trying tail with n=%"IVdf"...\n",
3533 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3534 );
3535 if (paren) {
3536 if (n) {
cf93c79d
IZ
3537 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3538 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3539 }
a0ed51b3 3540 else
cf93c79d 3541 PL_regendp[paren] = -1;
c277df42 3542 }
a0ed51b3
LW
3543 if (regmatch(next))
3544 sayYES;
02db2b7b 3545 REGCP_UNWIND(lastcp);
a0ed51b3 3546 }
c277df42
IZ
3547 /* Couldn't or didn't -- back up. */
3548 n--;
dfe13c55 3549 locinput = HOPc(locinput, -l);
3280af22 3550 PL_reginput = locinput;
c277df42
IZ
3551 }
3552 }
3553 sayNO;
3554 break;
3555 }
3556 case CURLYN:
3557 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3558 if (paren > PL_regsize)
3559 PL_regsize = paren;
eb160463 3560 if (paren > (I32)*PL_reglastparen)
3280af22 3561 *PL_reglastparen = paren;
c277df42
IZ
3562 ln = ARG1(scan); /* min to match */
3563 n = ARG2(scan); /* max to match */
dc45a647 3564 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3565 goto repeat;
a0d0e21e 3566 case CURLY:
c277df42 3567 paren = 0;
a0d0e21e
LW
3568 ln = ARG1(scan); /* min to match */
3569 n = ARG2(scan); /* max to match */
dc45a647 3570 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3571 goto repeat;
3572 case STAR:
3573 ln = 0;
c277df42 3574 n = REG_INFTY;
a0d0e21e 3575 scan = NEXTOPER(scan);
c277df42 3576 paren = 0;
a0d0e21e
LW
3577 goto repeat;
3578 case PLUS:
c277df42
IZ
3579 ln = 1;
3580 n = REG_INFTY;
3581 scan = NEXTOPER(scan);
3582 paren = 0;
3583 repeat:
a0d0e21e
LW
3584 /*
3585 * Lookahead to avoid useless match attempts
3586 * when we know what character comes next.
3587 */
5f80c4cf
JP
3588
3589 /*
3590 * Used to only do .*x and .*?x, but now it allows
3591 * for )'s, ('s and (?{ ... })'s to be in the way
3592 * of the quantifier and the EXACT-like node. -- japhy
3593 */
3594
cca55fe3 3595 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3596 U8 *s;
3597 regnode *text_node = next;
3598
cca55fe3 3599 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3600
cca55fe3 3601 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3602 else {
cca55fe3 3603 if (PL_regkind[(U8)OP(text_node)] == REF) {
c061b110
JH
3604 c1 = c2 = -1000;
3605 goto assume_ok_easy;
cca55fe3
JP
3606 }
3607 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3608
3609 if (!UTF) {
3610 c2 = c1 = *s;
f65d3ee7 3611 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3612 c2 = PL_fold[c1];
f65d3ee7 3613 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3614 c2 = PL_fold_locale[c1];
1aa99e6b 3615 }
5f80c4cf 3616 else { /* UTF */
f65d3ee7 3617 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3618 STRLEN ulen1, ulen2;
a2a469f9
JH
3619 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3620 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
3621
3622 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3623 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3624
a2a469f9 3625 c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
872c91ae
JH
3626 ckWARN(WARN_UTF8) ?
3627 0 : UTF8_ALLOW_ANY);
a2a469f9 3628 c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
872c91ae
JH
3629 ckWARN(WARN_UTF8) ?
3630 0 : UTF8_ALLOW_ANY);
5f80c4cf
JP
3631 }
3632 else {
a2a469f9 3633 c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
872c91ae
JH
3634 ckWARN(WARN_UTF8) ?
3635 0 : UTF8_ALLOW_ANY);
5f80c4cf 3636 }
1aa99e6b
IH
3637 }
3638 }
bbce6d69 3639 }
a0d0e21e 3640 else
bbce6d69 3641 c1 = c2 = -1000;
cca55fe3 3642 assume_ok_easy:
3280af22 3643 PL_reginput = locinput;
a0d0e21e 3644 if (minmod) {
c277df42 3645 CHECKPOINT lastcp;
a0d0e21e
LW
3646 minmod = 0;
3647 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3648 sayNO;
a0ed51b3 3649 locinput = PL_reginput;
02db2b7b 3650 REGCP_SET(lastcp);
0fe9bf95 3651 if (c1 != -1000) {
1aa99e6b 3652 char *e; /* Should not check after this */
0fe9bf95 3653 char *old = locinput;
b2f2f093 3654 int count = 0;
0fe9bf95 3655
1aa99e6b 3656 if (n == REG_INFTY) {
0fe9bf95 3657 e = PL_regeol - 1;
1aa99e6b
IH
3658 if (do_utf8)
3659 while (UTF8_IS_CONTINUATION(*(U8*)e))
3660 e--;
3661 }
3662 else if (do_utf8) {
3663 int m = n - ln;
3664 for (e = locinput;
3665 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3666 e += UTF8SKIP(e);
3667 }
3668 else {
3669 e = locinput + n - ln;
3670 if (e >= PL_regeol)
3671 e = PL_regeol - 1;
3672 }
0fe9bf95
IZ
3673 while (1) {
3674 /* Find place 'next' could work */
1aa99e6b
IH
3675 if (!do_utf8) {
3676 if (c1 == c2) {
a8e8ab15
JH
3677 while (locinput <= e &&
3678 UCHARAT(locinput) != c1)
1aa99e6b
IH
3679 locinput++;
3680 } else {
9041c2e3 3681 while (locinput <= e
a8e8ab15
JH
3682 && UCHARAT(locinput) != c1
3683 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3684 locinput++;
3685 }
3686 count = locinput - old;
3687 }
3688 else {
1aa99e6b 3689 if (c1 == c2) {
8c18bf38 3690 STRLEN len;
872c91ae
JH
3691 /* count initialised to
3692 * utf8_distance(old, locinput) */
b2f2f093 3693 while (locinput <= e &&
872c91ae 3694 utf8n_to_uvchr((U8*)locinput,
a2a469f9 3695 UTF8_MAXBYTES, &len,
872c91ae 3696 ckWARN(WARN_UTF8) ?
eb160463 3697 0 : UTF8_ALLOW_ANY) != (UV)c1) {
1aa99e6b 3698 locinput += len;
b2f2f093
JH
3699 count++;
3700 }
1aa99e6b 3701 } else {
8c18bf38 3702 STRLEN len;
872c91ae
JH
3703 /* count initialised to
3704 * utf8_distance(old, locinput) */
b2f2f093 3705 while (locinput <= e) {
872c91ae 3706 UV c = utf8n_to_uvchr((U8*)locinput,
a2a469f9 3707 UTF8_MAXBYTES, &len,
872c91ae
JH
3708 ckWARN(WARN_UTF8) ?
3709 0 : UTF8_ALLOW_ANY);
eb160463 3710 if (c == (UV)c1 || c == (UV)c2)
1aa99e6b 3711 break;
b2f2f093
JH
3712 locinput += len;
3713 count++;
1aa99e6b
IH
3714 }
3715 }
0fe9bf95 3716 }
9041c2e3 3717 if (locinput > e)
0fe9bf95
IZ
3718 sayNO;
3719 /* PL_reginput == old now */
3720 if (locinput != old) {
3721 ln = 1; /* Did some */
1aa99e6b 3722 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3723 sayNO;
3724 }
3725 /* PL_reginput == locinput now */
29d1e993 3726 TRYPAREN(paren, ln, locinput);
0fe9bf95 3727 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3728 REGCP_UNWIND(lastcp);
0fe9bf95 3729 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3730 old = locinput;
3731 if (do_utf8)
3732 locinput += UTF8SKIP(locinput);
3733 else
3734 locinput++;
b2f2f093 3735 count = 1;
0fe9bf95
IZ
3736 }
3737 }
3738 else
c277df42 3739 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3740 UV c;
3741 if (c1 != -1000) {
3742 if (do_utf8)
872c91ae 3743 c = utf8n_to_uvchr((U8*)PL_reginput,
a2a469f9 3744 UTF8_MAXBYTES, 0,
872c91ae
JH
3745 ckWARN(WARN_UTF8) ?
3746 0 : UTF8_ALLOW_ANY);
1aa99e6b 3747 else
9041c2e3 3748 c = UCHARAT(PL_reginput);
2390ecbc 3749 /* If it could work, try it. */
eb160463 3750 if (c == (UV)c1 || c == (UV)c2)
2390ecbc 3751 {
5b7ea690 3752 TRYPAREN(paren, ln, PL_reginput);
2390ecbc
PP
3753 REGCP_UNWIND(lastcp);
3754 }
1aa99e6b 3755 }
a0d0e21e 3756 /* If it could work, try it. */
2390ecbc 3757 else if (c1 == -1000)
bbce6d69 3758 {
5b7ea690 3759 TRYPAREN(paren, ln, PL_reginput);
02db2b7b 3760 REGCP_UNWIND(lastcp);
bbce6d69 3761 }
c277df42 3762 /* Couldn't or didn't -- move forward. */
a0ed51b3 3763 PL_reginput = locinput;
a0d0e21e
LW
3764 if (regrepeat(scan, 1)) {
3765 ln++;
a0ed51b3
LW
3766 locinput = PL_reginput;
3767 }
3768 else
4633a7c4 3769 sayNO;
a0d0e21e
LW
3770 }
3771 }
3772 else {
c277df42 3773 CHECKPOINT lastcp;
a0d0e21e 3774 n = regrepeat(scan, n);
a0ed51b3 3775 locinput = PL_reginput;
22c35a8c 3776 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
15272685
HS
3777 ((!PL_multiline && OP(next) != MEOL) ||
3778 OP(next) == SEOL || OP(next) == EOS))
3779 {
a0d0e21e 3780 ln = n; /* why back off? */
1aeab75a
GS
3781 /* ...because $ and \Z can match before *and* after
3782 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3783 We should back off by one in this case. */
3784 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3785 ln--;
3786 }
02db2b7b 3787 REGCP_SET(lastcp);
c277df42 3788 if (paren) {
8fa7f367 3789 UV c = 0;
c277df42 3790 while (n >= ln) {
1aa99e6b
IH
3791 if (c1 != -1000) {
3792 if (do_utf8)
872c91ae 3793 c = utf8n_to_uvchr((U8*)PL_reginput,
a2a469f9 3794 UTF8_MAXBYTES, 0,
872c91ae
JH
3795 ckWARN(WARN_UTF8) ?
3796 0 : UTF8_ALLOW_ANY);
1aa99e6b 3797 else
9041c2e3 3798 c = UCHARAT(PL_reginput);
1aa99e6b 3799 }
c277df42 3800 /* If it could work, try it. */
eb160463 3801 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 3802 {
29d1e993 3803 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3804 REGCP_UNWIND(lastcp);
c277df42
IZ
3805 }
3806 /* Couldn't or didn't -- back up. */
3807 n--;
dfe13c55 3808 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3809 }
a0ed51b3
LW
3810 }
3811 else {
8fa7f367 3812 UV c = 0;
c277df42 3813 while (n >= ln) {
1aa99e6b
IH
3814 if (c1 != -1000) {
3815 if (do_utf8)
872c91ae 3816 c = utf8n_to_uvchr((U8*)PL_reginput,
a2a469f9 3817 UTF8_MAXBYTES, 0,
872c91ae
JH
3818 ckWARN(WARN_UTF8) ?
3819 0 : UTF8_ALLOW_ANY);
1aa99e6b 3820 else
9041c2e3 3821 c = UCHARAT(PL_reginput);
1aa99e6b 3822 }
c277df42 3823 /* If it could work, try it. */
eb160463 3824 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 3825 {
29d1e993 3826 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3827 REGCP_UNWIND(lastcp);
c277df42
IZ
3828 }
3829 /* Couldn't or didn't -- back up. */
3830 n--;
dfe13c55 3831 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3832 }
a0d0e21e
LW
3833 }
3834 }
4633a7c4 3835 sayNO;
c277df42 3836 break;
a0d0e21e 3837 case END:
0f5d15d6
IZ
3838 if (PL_reg_call_cc) {
3839 re_cc_state *cur_call_cc = PL_reg_call_cc;
3840 CURCUR *cctmp = PL_regcc;
3841 regexp *re = PL_reg_re;
3842 CHECKPOINT cp, lastcp;
3843
3844 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3845 REGCP_SET(lastcp);
0f5d15d6
IZ
3846 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3847 the caller. */
3848 PL_reginput = locinput; /* Make position available to
3849 the callcc. */
3850 cache_re(PL_reg_call_cc->re);
3851 PL_regcc = PL_reg_call_cc->cc;
3852 PL_reg_call_cc = PL_reg_call_cc->prev;
3853 if (regmatch(cur_call_cc->node)) {
3854 PL_reg_call_cc = cur_call_cc;
3855 regcpblow(cp);
3856 sayYES;
3857 }
02db2b7b 3858 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3859 regcppop();
3860 PL_reg_call_cc = cur_call_cc;
3861 PL_regcc = cctmp;
3862 PL_reg_re = re;
3863 cache_re(re);
3864
3865 DEBUG_r(
3866 PerlIO_printf(Perl_debug_log,
3867 "%*s continuation failed...\n",
3868 REPORT_CODE_OFF+PL_regindent*2, "")
3869 );
7821416a 3870 sayNO_SILENT;
0f5d15d6 3871 }
7821416a
IZ
3872 if (locinput < PL_regtill) {
3873 DEBUG_r(PerlIO_printf(Perl_debug_log,
3874 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3875 PL_colors[4],
3876 (long)(locinput - PL_reg_starttry),
3877 (long)(PL_regtill - PL_reg_starttry),
3878 PL_colors[5]));
3879 sayNO_FINAL; /* Cannot match: too short. */
3880 }
3881 PL_reginput = locinput; /* put where regtry can find it */
3882 sayYES_FINAL; /* Success! */
7e5428c5 3883 case SUCCEED:
3280af22 3884 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3885 sayYES_LOUD; /* Success! */
c277df42
IZ
3886 case SUSPEND:
3887 n = 1;
9fe1d20c 3888 PL_reginput = locinput;
9041c2e3 3889 goto do_ifmatch;
a0d0e21e 3890 case UNLESSM:
c277df42 3891 n = 0;
a0ed51b3 3892 if (scan->flags) {
efb30f32
HS
3893 s = HOPBACKc(locinput, scan->flags);
3894 if (!s)
3895 goto say_yes;
3896 PL_reginput = s;
a0ed51b3
LW
3897 }
3898 else
3899 PL_reginput = locinput;
c277df42
IZ
3900 goto do_ifmatch;
3901 case IFMATCH:
3902 n = 1;
a0ed51b3 3903 if (scan->flags) {
efb30f32
HS
3904 s = HOPBACKc(locinput, scan->flags);
3905 if (!s)
3906 goto say_no;
3907 PL_reginput = s;
a0ed51b3
LW
3908 }
3909 else
3910 PL_reginput = locinput;
3911
c277df42 3912 do_ifmatch:
c277df42
IZ
3913 inner = NEXTOPER(NEXTOPER(scan));
3914 if (regmatch(inner) != n) {
3915 say_no:
3916 if (logical) {
3917 logical = 0;
3918 sw = 0;
3919 goto do_longjump;
a0ed51b3
LW
3920 }
3921 else
c277df42
IZ
3922 sayNO;
3923 }
3924 say_yes:
3925 if (logical) {
3926 logical = 0;
3927 sw = 1;
3928 }
fe44a5e8 3929 if (OP(scan) == SUSPEND) {
3280af22 3930 locinput = PL_reginput;
565764a8 3931 nextchr = UCHARAT(locinput);
fe44a5e8 3932 }
c277df42
IZ
3933 /* FALL THROUGH. */
3934 case LONGJMP:
3935 do_longjump:
3936 next = scan + ARG(scan);
3937 if (next == scan)
3938 next = NULL;
a0d0e21e
LW
3939 break;
3940 default:
b900a521 3941 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3942 PTR2UV(scan), OP(scan));
cea2e8a9 3943 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3944 }
02db2b7b 3945 reenter:
a0d0e21e
LW
3946 scan = next;
3947 }
a687059c 3948
a0d0e21e
LW
3949 /*
3950 * We get here only if there's trouble -- normally "case END" is
3951 * the terminating point.
3952 */
cea2e8a9 3953 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3954 /*NOTREACHED*/
4633a7c4
LW
3955 sayNO;
3956
7821416a
IZ
3957yes_loud:
3958 DEBUG_r(
3959 PerlIO_printf(Perl_debug_log,
3960 "%*s %scould match...%s\n",
3961 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3962 );
3963 goto yes;
3964yes_final:
3965 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3966 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3967yes:
3968#ifdef DEBUGGING
3280af22 3969 PL_regindent--;
4633a7c4 3970#endif
02db2b7b
IZ
3971
3972#if 0 /* Breaks $^R */
3973 if (unwind)
3974 regcpblow(firstcp);
3975#endif
4633a7c4
LW
3976 return 1;
3977
3978no:
7821416a
IZ
3979 DEBUG_r(
3980 PerlIO_printf(Perl_debug_log,
3981 "%*s %sfailed...%s\n",
3982 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3983 );
3984 goto do_no;
3985no_final:
3986do_no:
02db2b7b
IZ
3987 if (unwind) {
3988 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3989
3990 switch (uw->type) {
3991 case RE_UNWIND_BRANCH:
3992 case RE_UNWIND_BRANCHJ:
3993 {
3994 re_unwind_branch_t *uwb = &(uw->branch);
8c18bf38 3995 const I32 lastparen = uwb->lastparen;
9041c2e3 3996
02db2b7b
IZ
3997 REGCP_UNWIND(uwb->lastcp);
3998 for (n = *PL_reglastparen; n > lastparen; n--)
3999 PL_regendp[n] = -1;
4000 *PL_reglastparen = n;
4001 scan = next = uwb->next;
9041c2e3
NIS
4002 if ( !scan ||
4003 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
4004 ? BRANCH : BRANCHJ) ) { /* Failure */
4005 unwind = uwb->prev;
4006#ifdef DEBUGGING
4007 PL_regindent--;
4008#endif
4009 goto do_no;
4010 }
4011 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4012 if ((n = (uwb->type == RE_UNWIND_BRANCH
4013 ? NEXT_OFF(next) : ARG(next))))
4014 next += n;
4015 else
4016 next = NULL; /* XXXX Needn't unwinding in this case... */
4017 uwb->next = next;
4018 next = NEXTOPER(scan);
4019 if (uwb->type == RE_UNWIND_BRANCHJ)
4020 next = NEXTOPER(next);
4021 locinput = uwb->locinput;
4022 nextchr = uwb->nextchr;
4023#ifdef DEBUGGING
4024 PL_regindent = uwb->regindent;
4025#endif
4026
4027 goto reenter;
4028 }
4029 /* NOT REACHED */
4030 default:
4031 Perl_croak(aTHX_ "regexp unwind memory corruption");
4032 }
4033 /* NOT REACHED */
4034 }
4633a7c4 4035#ifdef DEBUGGING
3280af22 4036 PL_regindent--;
4633a7c4 4037#endif
a0d0e21e 4038 return 0;
a687059c
LW
4039}
4040
4041/*
4042 - regrepeat - repeatedly match something simple, report how many
4043 */
4044/*
4045 * [This routine now assumes that it will only match on things of length 1.
4046 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4047 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4048 */
76e3520e 4049STATIC I32
8c18bf38 4050S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4051{
a0d0e21e 4052 register char *scan;
a0d0e21e 4053 register I32 c;
3280af22 4054 register char *loceol = PL_regeol;
a0ed51b3 4055 register I32 hardcount = 0;
53c4c00c 4056 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4057
3280af22 4058 scan = PL_reginput;
faf11cac
HS
4059 if (max == REG_INFTY)
4060 max = I32_MAX;
4061 else if (max < loceol - scan)
a0d0e21e 4062 loceol = scan + max;
a0d0e21e 4063 switch (OP(p)) {
22c35a8c 4064 case REG_ANY:
1aa99e6b 4065 if (do_utf8) {
ffc61ed2 4066 loceol = PL_regeol;
1aa99e6b 4067 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4068 scan += UTF8SKIP(scan);
4069 hardcount++;
4070 }
4071 } else {
4072 while (scan < loceol && *scan != '\n')
4073 scan++;
a0ed51b3
LW
4074 }
4075 break;
ffc61ed2 4076 case SANY:
def8e4ea
JH
4077 if (do_utf8) {
4078 loceol = PL_regeol;
a0804c9e 4079 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4080 scan += UTF8SKIP(scan);
4081 hardcount++;
4082 }
4083 }
4084 else
4085 scan = loceol;
a0ed51b3 4086 break;
f33976b4
DB
4087 case CANY:
4088 scan = loceol;
4089 break;
bbce6d69 4090 case EXACT: /* length of string is 1 */
cd439c50 4091 c = (U8)*STRING(p);
bbce6d69 4092 while (scan < loceol && UCHARAT(scan) == c)
4093 scan++;
4094 break;
4095 case EXACTF: /* length of string is 1 */
cd439c50 4096 c = (U8)*STRING(p);
bbce6d69 4097 while (scan < loceol &&
22c35a8c 4098 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4099 scan++;
4100 break;
4101 case EXACTFL: /* length of string is 1 */
3280af22 4102 PL_reg_flags |= RF_tainted;
cd439c50 4103 c = (U8)*STRING(p);
bbce6d69 4104 while (scan < loceol &&
22c35a8c 4105 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4106 scan++;
4107 break;
4108 case ANYOF:
ffc61ed2
JH
4109 if (do_utf8) {
4110 loceol = PL_regeol;
338501c1
JH
4111 while (hardcount < max && scan < loceol &&
4112 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4113 scan += UTF8SKIP(scan);
4114 hardcount++;
4115 }
4116 } else {
7d3e948e 4117 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4118 scan++;
4119 }
a0d0e21e
LW
4120 break;
4121 case ALNUM:
1aa99e6b 4122 if (do_utf8) {
ffc61ed2 4123 loceol = PL_regeol;
fab1f349 4124 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4125 while (hardcount < max && scan < loceol &&
3568d838 4126 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4127 scan += UTF8SKIP(scan);
4128 hardcount++;
4129 }
4130 } else {
4131 while (scan < loceol && isALNUM(*scan))
4132 scan++;
a0ed51b3
LW
4133 }
4134 break;
bbce6d69 4135 case ALNUML:
3280af22 4136 PL_reg_flags |= RF_tainted;
1aa99e6b 4137 if (do_utf8) {
ffc61ed2 4138 loceol = PL_regeol;
1aa99e6b
IH
4139 while (hardcount < max && scan < loceol &&
4140 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4141 scan += UTF8SKIP(scan);
4142 hardcount++;
4143 }
4144 } else {
4145 while (scan < loceol && isALNUM_LC(*scan))
4146 scan++;
a0ed51b3
LW
4147 }
4148 break;
a0d0e21e 4149 case NALNUM:
1aa99e6b 4150 if (do_utf8) {
ffc61ed2 4151 loceol = PL_regeol;
fab1f349 4152 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4153 while (hardcount < max && scan < loceol &&
3568d838 4154 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4155 scan += UTF8SKIP(scan);
4156 hardcount++;
4157 }
4158 } else {
4159 while (scan < loceol && !isALNUM(*scan))
4160 scan++;
a0ed51b3
LW
4161 }
4162 break;
bbce6d69 4163 case NALNUML:
3280af22 4164 PL_reg_flags |= RF_tainted;
1aa99e6b 4165 if (do_utf8) {
ffc61ed2 4166 loceol = PL_regeol;
1aa99e6b
IH
4167 while (hardcount < max && scan < loceol &&
4168 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4169 scan += UTF8SKIP(scan);
4170 hardcount++;
4171 }
4172 } else {
4173 while (scan < loceol && !isALNUM_LC(*scan))
4174 scan++;
a0ed51b3
LW
4175 }
4176 break;
a0d0e21e 4177 case SPACE:
1aa99e6b 4178 if (do_utf8) {
ffc61ed2 4179 loceol = PL_regeol;
fab1f349 4180 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4181 while (hardcount < max && scan < loceol &&
3568d838
JH
4182 (*scan == ' ' ||
4183 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4184 scan += UTF8SKIP(scan);
4185 hardcount++;
4186 }
4187 } else {
4188 while (scan < loceol && isSPACE(*scan))
4189 scan++;
a0ed51b3
LW
4190 }
4191 break;
bbce6d69 4192 case SPACEL:
3280af22 4193 PL_reg_flags |= RF_tainted;
1aa99e6b 4194 if (do_utf8) {
ffc61ed2 4195 loceol = PL_regeol;
1aa99e6b 4196 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4197 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4198 scan += UTF8SKIP(scan);
4199 hardcount++;
4200 }
4201 } else {
4202 while (scan < loceol && isSPACE_LC(*scan))
4203 scan++;
a0ed51b3
LW
4204 }
4205 break;
a0d0e21e 4206 case NSPACE:
1aa99e6b 4207 if (do_utf8) {
ffc61ed2 4208 loceol = PL_regeol;
fab1f349 4209 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4210 while (hardcount < max && scan < loceol &&
3568d838
JH
4211 !(*scan == ' ' ||
4212 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4213 scan += UTF8SKIP(scan);
4214 hardcount++;
4215 }
4216 } else {
4217 while (scan < loceol && !isSPACE(*scan))
4218 scan++;
4219 break;
a0ed51b3 4220 }
bbce6d69 4221 case NSPACEL:
3280af22 4222 PL_reg_flags |= RF_tainted;
1aa99e6b 4223 if (do_utf8) {
ffc61ed2 4224 loceol = PL_regeol;
1aa99e6b 4225 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4226 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4227 scan += UTF8SKIP(scan);
4228 hardcount++;
4229 }
4230 } else {
4231 while (scan < loceol && !isSPACE_LC(*scan))
4232 scan++;
a0ed51b3
LW
4233 }
4234 break;
a0d0e21e 4235 case DIGIT:
1aa99e6b 4236 if (do_utf8) {
ffc61ed2 4237 loceol = PL_regeol;
fab1f349 4238 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4239 while (hardcount < max && scan < loceol &&
3568d838 4240 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4241 scan += UTF8SKIP(scan);
4242 hardcount++;
4243 }
4244 } else {
4245 while (scan < loceol && isDIGIT(*scan))
4246 scan++;
a0ed51b3
LW
4247 }
4248 break;
a0d0e21e 4249 case NDIGIT:
1aa99e6b 4250 if (do_utf8) {
ffc61ed2 4251 loceol = PL_regeol;
fab1f349 4252 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4253 while (hardcount < max && scan < loceol &&
3568d838 4254 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4255 scan += UTF8SKIP(scan);
4256 hardcount++;
4257 }
4258 } else {
4259 while (scan < loceol && !isDIGIT(*scan))
4260 scan++;
a0ed51b3
LW
4261 }
4262 break;
a0d0e21e
LW
4263 default: /* Called on something of 0 width. */
4264 break; /* So match right here or not at all. */
4265 }
a687059c 4266
a0ed51b3
LW
4267 if (hardcount)
4268 c = hardcount;
4269 else
4270 c = scan - PL_reginput;
3280af22 4271 PL_reginput = scan;
a687059c 4272
9041c2e3 4273 DEBUG_r(
c277df42
IZ
4274 {
4275 SV *prop = sv_newmortal();
4276
4277 regprop(prop, p);
9041c2e3
NIS
4278 PerlIO_printf(Perl_debug_log,
4279 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
fdac8c4b 4280 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4281 });
9041c2e3 4282
a0d0e21e 4283 return(c);
a687059c
LW
4284}
4285
4286/*
c277df42 4287 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4288 *
b035a42e 4289 * The repeater is supposed to have constant non-zero length.
c277df42
IZ
4290 */
4291
76e3520e 4292STATIC I32
cea2e8a9 4293S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4294{
b7953727 4295 register char *scan = Nullch;
c277df42 4296 register char *start;
3280af22 4297 register char *loceol = PL_regeol;
a0ed51b3 4298 I32 l = 0;
708e3b05 4299 I32 count = 0, res = 1;
a0ed51b3
LW
4300
4301 if (!max)
4302 return 0;
c277df42 4303
3280af22 4304 start = PL_reginput;
53c4c00c 4305 if (PL_reg_match_utf8) {
708e3b05 4306 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4307 if (!count++) {
4308 l = 0;
4309 while (start < PL_reginput) {
4310 l++;
4311 start += UTF8SKIP(start);
4312 }
4313 *lp = l;
4314 if (l == 0)
4315 return max;
4316 }
4317 if (count == max)
4318 return count;
4319 }
4320 }
4321 else {
708e3b05 4322 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4323 if (!count++) {
4324 *lp = l = PL_reginput - start;
4325 if (max != REG_INFTY && l*max < loceol - scan)
4326 loceol = scan + l*max;
4327 if (l == 0)
4328 return max;
c277df42
IZ
4329 }
4330 }
4331 }
708e3b05 4332 if (!res)
3280af22 4333 PL_reginput = scan;
9041c2e3 4334
a0ed51b3 4335 return count;
c277df42
IZ
4336}
4337
4338/*
ffc61ed2
JH
4339- regclass_swash - prepare the utf8 swash
4340*/
4341
4342SV *
9e55ce06 4343Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4344{
9e55ce06
JH
4345 SV *sw = NULL;
4346 SV *si = NULL;
4347 SV *alt = NULL;
ffc61ed2
JH
4348
4349 if (PL_regdata && PL_regdata->count) {
8c18bf38 4350 const U32 n = ARG(node);
ffc61ed2
JH
4351
4352 if (PL_regdata->what[n] == 's') {
4353 SV *rv = (SV*)PL_regdata->data[n];
4354 AV *av = (AV*)SvRV((SV*)rv);
b11f357e 4355 SV **ary = AvARRAY(av);
9e55ce06 4356 SV **a, **b;
9041c2e3 4357
9e55ce06
JH
4358 /* See the end of regcomp.c:S_reglass() for
4359 * documentation of these array elements. */
4360
b11f357e
JH
4361 si = *ary;
4362 a = SvTYPE(ary[1]) == SVt_RV ? &ary[1] : 0;
4363 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4364
ffc61ed2
JH
4365 if (a)
4366 sw = *a;
4367 else if (si && doinit) {
4368 sw = swash_init("utf8", "", si, 1, 0);
4369 (void)av_store(av, 1, sw);
4370 }
9e55ce06
JH
4371 if (b)
4372 alt = *b;
ffc61ed2
JH
4373 }
4374 }
4375
9e55ce06
JH
4376 if (listsvp)
4377 *listsvp = si;
4378 if (altsvp)
4379 *altsvp = alt;
ffc61ed2
JH
4380
4381 return sw;
4382}
4383
4384/*
ba7b4546 4385 - reginclass - determine if a character falls into a character class
832705d4
JH
4386
4387 The n is the ANYOF regnode, the p is the target string, lenp
4388 is pointer to the maximum length of how far to go in the p
4389 (if the lenp is zero, UTF8SKIP(p) is used),
4390 do_utf8 tells whether the target string is in UTF-8.
4391
bbce6d69 4392 */
4393
76e3520e 4394STATIC bool
8c18bf38 4395S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4396{
8c18bf38 4397 const char flags = ANYOF_FLAGS(n);
bbce6d69 4398 bool match = FALSE;
cc07378b 4399 UV c = *p;
ae9ddab8 4400 STRLEN len = 0;
9e55ce06 4401 STRLEN plen;
1aa99e6b 4402
ae9ddab8 4403 if (do_utf8 && !UTF8_IS_INVARIANT(c))
a2a469f9 4404 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
ae9ddab8 4405 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
bbce6d69 4406
0f0076b4 4407 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4408 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4409 if (lenp)
4410 *lenp = 0;
ffc61ed2 4411 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4412 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4413 match = TRUE;
bbce6d69 4414 }
3568d838 4415 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4416 match = TRUE;
ffc61ed2 4417 if (!match) {
9e55ce06
JH
4418 AV *av;
4419 SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4420
4421 if (sw) {
3568d838 4422 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4423 match = TRUE;
4424 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4425 if (!match && lenp && av) {
4426 I32 i;
4427
4428 for (i = 0; i <= av_len(av); i++) {
4429 SV* sv = *av_fetch(av, i, FALSE);
4430 STRLEN len;
71a0dd65 4431 const char *s = SvPV_const(sv, len);
9e55ce06 4432
061b10df 4433 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4434 *lenp = len;
4435 match = TRUE;
4436 break;
4437 }
4438 }
4439 }
4440 if (!match) {
a2a469f9 4441 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4442 STRLEN tmplen;
4443
9e55ce06
JH
4444 to_utf8_fold(p, tmpbuf, &tmplen);
4445 if (swash_fetch(sw, tmpbuf, do_utf8))
4446 match = TRUE;
4447 }
ffc61ed2
JH
4448 }
4449 }
bbce6d69 4450 }
9e55ce06 4451 if (match && lenp && *lenp == 0)
0f0076b4 4452 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4453 }
1aa99e6b 4454 if (!match && c < 256) {
ffc61ed2
JH
4455 if (ANYOF_BITMAP_TEST(n, c))
4456 match = TRUE;
4457 else if (flags & ANYOF_FOLD) {
eb160463 4458 U8 f;
a0ed51b3 4459
ffc61ed2
JH
4460 if (flags & ANYOF_LOCALE) {
4461 PL_reg_flags |= RF_tainted;
4462 f = PL_fold_locale[c];
4463 }
4464 else
4465 f = PL_fold[c];
4466 if (f != c && ANYOF_BITMAP_TEST(n, f))
4467 match = TRUE;
4468 }
4469
4470 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4471 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4472 if (
4473 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4474 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4475 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4476 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4477 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4478 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4479 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4480 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4481 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4482 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4483 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4484 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4485 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4486 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4487 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4488 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4489 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4490 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4491 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4492 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4493 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4494 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4495 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4496 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4497 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4498 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4499 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4500 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4501 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4502 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4503 ) /* How's that for a conditional? */
4504 {
4505 match = TRUE;
4506 }
a0ed51b3 4507 }
a0ed51b3
LW
4508 }
4509
a0ed51b3
LW
4510 return (flags & ANYOF_INVERT) ? !match : match;
4511}
161b471a 4512
dfe13c55 4513STATIC U8 *
cea2e8a9 4514S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4515{
1aa99e6b
IH
4516 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4517}
4518
4519STATIC U8 *
4520S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4521{
a0ed51b3 4522 if (off >= 0) {
1aa99e6b 4523 while (off-- && s < lim) {
ffc61ed2 4524 /* XXX could check well-formedness here */
a0ed51b3 4525 s += UTF8SKIP(s);
ffc61ed2 4526 }
a0ed51b3
LW
4527 }
4528 else {
4529 while (off++) {
1aa99e6b 4530 if (s > lim) {
a0ed51b3 4531 s--;
ffc61ed2 4532 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4533 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4534 s--;
ffc61ed2
JH
4535 }
4536 /* XXX could check well-formedness here */
a0ed51b3
LW
4537 }
4538 }
4539 }
4540 return s;
4541}
161b471a 4542
dfe13c55 4543STATIC U8 *
1aa99e6b 4544S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4545{
1aa99e6b
IH
4546 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4547}
4548
4549STATIC U8 *
4550S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4551{
4552 if (off >= 0) {
1aa99e6b 4553 while (off-- && s < lim) {
ffc61ed2 4554 /* XXX could check well-formedness here */
a0ed51b3 4555 s += UTF8SKIP(s);
ffc61ed2 4556 }
a0ed51b3
LW
4557 if (off >= 0)
4558 return 0;
4559 }
4560 else {
4561 while (off++) {
1aa99e6b 4562 if (s > lim) {
a0ed51b3 4563 s--;
ffc61ed2 4564 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4565 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4566 s--;
ffc61ed2
JH
4567 }
4568 /* XXX could check well-formedness here */
a0ed51b3
LW
4569 }
4570 else
4571 break;
4572 }
4573 if (off <= 0)
4574 return 0;
4575 }
4576 return s;
4577}
51371543 4578
51371543 4579static void
acfe0abc 4580restore_pos(pTHX_ void *arg)
51371543 4581{
a2592645 4582 PERL_UNUSED_ARG(arg);
51371543
GS
4583 if (PL_reg_eval_set) {
4584 if (PL_reg_oldsaved) {
4585 PL_reg_re->subbeg = PL_reg_oldsaved;
4586 PL_reg_re->sublen = PL_reg_oldsavedlen;
4587 RX_MATCH_COPIED_on(PL_reg_re);
4588 }
4589 PL_reg_magic->mg_len = PL_reg_oldpos;
4590 PL_reg_eval_set = 0;
4591 PL_curpm = PL_reg_oldcurpm;
4592 }
4593}
33b8afdf
JH
4594
4595STATIC void
4596S_to_utf8_substr(pTHX_ register regexp *prog)
4597{
33b8afdf 4598 if (prog->float_substr && !prog->float_utf8) {
a2592645 4599 SV* sv;
04851bb3 4600 prog->float_utf8 = sv = newSVsv(prog->float_substr);
33b8afdf
JH
4601 sv_utf8_upgrade(sv);
4602 if (SvTAIL(prog->float_substr))
4603 SvTAIL_on(sv);
4604 if (prog->float_substr == prog->check_substr)
4605 prog->check_utf8 = sv;
4606 }
4607 if (prog->anchored_substr && !prog->anchored_utf8) {
a2592645 4608 SV* sv;
04851bb3 4609 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
33b8afdf
JH
4610 sv_utf8_upgrade(sv);
4611 if (SvTAIL(prog->anchored_substr))
4612 SvTAIL_on(sv);
4613 if (prog->anchored_substr == prog->check_substr)
4614 prog->check_utf8 = sv;
4615 }
4616}
4617
4618STATIC void
4619S_to_byte_substr(pTHX_ register regexp *prog)
4620{
33b8afdf 4621 if (prog->float_utf8 && !prog->float_substr) {
a2592645 4622 SV* sv;
04851bb3 4623 prog->float_substr = sv = newSVsv(prog->float_utf8);
33b8afdf
JH
4624 if (sv_utf8_downgrade(sv, TRUE)) {
4625 if (SvTAIL(prog->float_utf8))
4626 SvTAIL_on(sv);
4627 } else {
4628 SvREFCNT_dec(sv);
4629 prog->float_substr = sv = &PL_sv_undef;
4630 }
4631 if (prog->float_utf8 == prog->check_utf8)
4632 prog->check_substr = sv;
4633 }
4634 if (prog->anchored_utf8 && !prog->anchored_substr) {
a2592645 4635 SV* sv;
04851bb3 4636 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
33b8afdf
JH
4637 if (sv_utf8_downgrade(sv, TRUE)) {
4638 if (SvTAIL(prog->anchored_utf8))
4639 SvTAIL_on(sv);
4640 } else {
4641 SvREFCNT_dec(sv);
4642 prog->anchored_substr = sv = &PL_sv_undef;
4643 }
4644 if (prog->anchored_utf8 == prog->check_utf8)
4645 prog->check_substr = sv;
4646 }
4647}
d8294a4d
NC
4648
4649/*
4650 * Local variables:
4651 * c-indentation-style: bsd
4652 * c-basic-offset: 4
4653 * indent-tabs-mode: t
4654 * End:
4655 *
4656 * ex: set ts=8 sts=4 sw=4 noet:
4657 */