This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don't bother checking for the Greek special
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
e54858b0 114 (PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded 403 char *s = PL_reg_match_utf8 ?
c728cb41
JH
404 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
405 strpos;
b8d68ded
JH
406 int len = PL_reg_match_utf8 ?
407 strlen(s) : strend - strpos;
2a782b5b
JH
408 if (!PL_colorset)
409 reginitcolors();
b8d68ded
JH
410 if (PL_reg_match_utf8)
411 DEBUG_r(PerlIO_printf(Perl_debug_log,
412 "UTF-8 target...\n"));
2a782b5b
JH
413 PerlIO_printf(Perl_debug_log,
414 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
415 PL_colors[4],PL_colors[5],PL_colors[0],
416 prog->precomp,
417 PL_colors[1],
418 (strlen(prog->precomp) > 60 ? "..." : ""),
419 PL_colors[0],
420 (int)(len > 60 ? 60 : len),
421 s, PL_colors[1],
422 (len > 60 ? "..." : "")
423 );
424 });
cad2e5aa 425
1aa99e6b 426 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
427 DEBUG_r(PerlIO_printf(Perl_debug_log,
428 "String too short... [re_intuit_start]\n"));
cad2e5aa 429 goto fail;
2c2d71f5 430 }
a1933d95 431 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 432 PL_regeol = strend;
653099ff 433 check = prog->check_substr;
2c2d71f5 434 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
435 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
436 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 437 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 438
7e25d62c
JH
439 if (!ml_anch) {
440 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
441 | ROPT_IMPLICIT)) /* not a real BOL */
442 /* SvCUR is not set on references: SvRV and SvPVX overlap */
443 && sv && !SvROK(sv)
444 && (strpos != strbeg)) {
445 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
446 goto fail;
447 }
448 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 449 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 450 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
451 I32 slen;
452
1aa99e6b 453 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
454 if (SvTAIL(check)) {
455 slen = SvCUR(check); /* >= 1 */
cad2e5aa 456
9041c2e3 457 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
458 || (strend - s == slen && strend[-1] != '\n')) {
459 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
460 goto fail_finish;
cad2e5aa
JH
461 }
462 /* Now should match s[0..slen-2] */
463 slen--;
653099ff 464 if (slen && (*SvPVX(check) != *s
cad2e5aa 465 || (slen > 1
653099ff 466 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
467 report_neq:
468 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
469 goto fail_finish;
470 }
cad2e5aa 471 }
653099ff
GS
472 else if (*SvPVX(check) != *s
473 || ((slen = SvCUR(check)) > 1
474 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
475 goto report_neq;
476 goto success_at_start;
7e25d62c 477 }
cad2e5aa 478 }
2c2d71f5 479 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 480 s = strpos;
2c2d71f5 481 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 482 end_shift = prog->minlen - start_shift -
653099ff 483 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 484 if (!ml_anch) {
653099ff
GS
485 I32 end = prog->check_offset_max + CHR_SVLEN(check)
486 - (SvTAIL(check) != 0);
1aa99e6b 487 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
488
489 if (end_shift < eshift)
490 end_shift = eshift;
491 }
cad2e5aa 492 }
2c2d71f5 493 else { /* Can match at random position */
cad2e5aa
JH
494 ml_anch = 0;
495 s = strpos;
2c2d71f5
JH
496 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
497 /* Should be nonnegative! */
498 end_shift = prog->minlen - start_shift -
653099ff 499 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
500 }
501
2c2d71f5 502#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 503 if (end_shift < 0)
6bbae5e6 504 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
505#endif
506
2c2d71f5
JH
507 restart:
508 /* Find a possible match in the region s..strend by looking for
509 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 510 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
511 I32 p = -1; /* Internal iterator of scream. */
512 I32 *pp = data ? data->scream_pos : &p;
513
2c2d71f5
JH
514 if (PL_screamfirst[BmRARE(check)] >= 0
515 || ( BmRARE(check) == '\n'
516 && (BmPREVIOUS(check) == SvCUR(check) - 1)
517 && SvTAIL(check) ))
9041c2e3 518 s = screaminstr(sv, check,
2c2d71f5 519 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 520 else
2c2d71f5 521 goto fail_finish;
cad2e5aa
JH
522 if (data)
523 *data->scream_olds = s;
524 }
f33976b4 525 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
526 s = fbm_instr((U8*)(s + start_shift),
527 (U8*)(strend - end_shift),
528 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 529 else
1aa99e6b
IH
530 s = fbm_instr(HOP3(s, start_shift, strend),
531 HOP3(strend, -end_shift, strbeg),
2c2d71f5 532 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
533
534 /* Update the count-of-usability, remove useless subpatterns,
535 unshift s. */
2c2d71f5
JH
536
537 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
538 (s ? "Found" : "Did not find"),
539 ((check == prog->anchored_substr) ? "anchored" : "floating"),
540 PL_colors[0],
7b0972df
JH
541 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
542 SvPVX(check),
2c2d71f5
JH
543 PL_colors[1], (SvTAIL(check) ? "$" : ""),
544 (s ? " at offset " : "...\n") ) );
545
546 if (!s)
547 goto fail_finish;
548
6eb5f6b9
JH
549 check_at = s;
550
2c2d71f5 551 /* Finish the diagnostic message */
30944b6d 552 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
553
554 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
555 Start with the other substr.
556 XXXX no SCREAM optimization yet - and a very coarse implementation
557 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
558 *always* match. Probably should be marked during compile...
559 Probably it is right to do no SCREAM here...
560 */
561
562 if (prog->float_substr && prog->anchored_substr) {
30944b6d 563 /* Take into account the "other" substring. */
2c2d71f5
JH
564 /* XXXX May be hopelessly wrong for UTF... */
565 if (!other_last)
6eb5f6b9 566 other_last = strpos;
2c2d71f5 567 if (check == prog->float_substr) {
30944b6d
IZ
568 do_other_anchored:
569 {
1aa99e6b 570 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
571 char *s1 = s;
572
2c2d71f5
JH
573 t = s - prog->check_offset_max;
574 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
575 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 576 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 577 && t > strpos)))
30944b6d 578 /* EMPTY */;
2c2d71f5
JH
579 else
580 t = strpos;
1aa99e6b 581 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
582 if (t < other_last) /* These positions already checked */
583 t = other_last;
1aa99e6b 584 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
585 if (last < last1)
586 last1 = last;
587 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
588 /* On end-of-str: see comment below. */
589 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
590 HOP3(HOP3(last1, prog->anchored_offset, strend)
591 + SvCUR(prog->anchored_substr),
592 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
593 prog->anchored_substr,
594 PL_multiline ? FBMrf_MULTILINE : 0);
595 DEBUG_r(PerlIO_printf(Perl_debug_log,
596 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
597 (s ? "Found" : "Contradicts"),
598 PL_colors[0],
7b0972df
JH
599 (int)(SvCUR(prog->anchored_substr)
600 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
601 SvPVX(prog->anchored_substr),
602 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
603 if (!s) {
604 if (last1 >= last2) {
605 DEBUG_r(PerlIO_printf(Perl_debug_log,
606 ", giving up...\n"));
607 goto fail_finish;
608 }
609 DEBUG_r(PerlIO_printf(Perl_debug_log,
610 ", trying floating at offset %ld...\n",
1aa99e6b
IH
611 (long)(HOP3c(s1, 1, strend) - i_strpos)));
612 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
613 s = HOP3c(last, 1, strend);
2c2d71f5
JH
614 goto restart;
615 }
616 else {
617 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 618 (long)(s - i_strpos)));
1aa99e6b
IH
619 t = HOP3c(s, -prog->anchored_offset, strbeg);
620 other_last = HOP3c(s, 1, strend);
30944b6d 621 s = s1;
2c2d71f5
JH
622 if (t == strpos)
623 goto try_at_start;
2c2d71f5
JH
624 goto try_at_offset;
625 }
30944b6d 626 }
2c2d71f5
JH
627 }
628 else { /* Take into account the floating substring. */
629 char *last, *last1;
630 char *s1 = s;
631
1aa99e6b
IH
632 t = HOP3c(s, -start_shift, strbeg);
633 last1 = last =
634 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
635 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
636 last = HOP3c(t, prog->float_max_offset, strend);
637 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
638 if (s < other_last)
639 s = other_last;
2c2d71f5
JH
640 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
641 /* fbm_instr() takes into account exact value of end-of-str
642 if the check is SvTAIL(ed). Since false positives are OK,
643 and end-of-str is not later than strend we are OK. */
644 s = fbm_instr((unsigned char*)s,
645 (unsigned char*)last + SvCUR(prog->float_substr)
646 - (SvTAIL(prog->float_substr)!=0),
647 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
648 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
649 (s ? "Found" : "Contradicts"),
650 PL_colors[0],
7b0972df
JH
651 (int)(SvCUR(prog->float_substr)
652 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
653 SvPVX(prog->float_substr),
654 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
655 if (!s) {
656 if (last1 == last) {
657 DEBUG_r(PerlIO_printf(Perl_debug_log,
658 ", giving up...\n"));
659 goto fail_finish;
660 }
661 DEBUG_r(PerlIO_printf(Perl_debug_log,
662 ", trying anchored starting at offset %ld...\n",
30944b6d 663 (long)(s1 + 1 - i_strpos)));
803ff556 664 other_last = last;
1aa99e6b 665 s = HOP3c(t, 1, strend);
2c2d71f5
JH
666 goto restart;
667 }
668 else {
669 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 670 (long)(s - i_strpos)));
803ff556 671 other_last = s; /* Fix this later. --Hugo */
30944b6d 672 s = s1;
2c2d71f5
JH
673 if (t == strpos)
674 goto try_at_start;
2c2d71f5
JH
675 goto try_at_offset;
676 }
677 }
cad2e5aa 678 }
2c2d71f5
JH
679
680 t = s - prog->check_offset_max;
2c2d71f5
JH
681 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
682 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
683 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
684 && t > strpos))) {
2c2d71f5
JH
685 /* Fixed substring is found far enough so that the match
686 cannot start at strpos. */
687 try_at_offset:
cad2e5aa 688 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
689 /* Eventually fbm_*() should handle this, but often
690 anchored_offset is not 0, so this check will not be wasted. */
691 /* XXXX In the code below we prefer to look for "^" even in
692 presence of anchored substrings. And we search even
693 beyond the found float position. These pessimizations
694 are historical artefacts only. */
695 find_anchor:
2c2d71f5 696 while (t < strend - prog->minlen) {
cad2e5aa 697 if (*t == '\n') {
4ee3650e 698 if (t < check_at - prog->check_offset_min) {
30944b6d 699 if (prog->anchored_substr) {
4ee3650e
GS
700 /* Since we moved from the found position,
701 we definitely contradict the found anchored
30944b6d
IZ
702 substr. Due to the above check we do not
703 contradict "check" substr.
704 Thus we can arrive here only if check substr
705 is float. Redo checking for "other"=="fixed".
706 */
9041c2e3 707 strpos = t + 1;
30944b6d
IZ
708 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
709 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
710 goto do_other_anchored;
711 }
4ee3650e
GS
712 /* We don't contradict the found floating substring. */
713 /* XXXX Why not check for STCLASS? */
cad2e5aa 714 s = t + 1;
2c2d71f5 715 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 716 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
717 goto set_useful;
718 }
4ee3650e
GS
719 /* Position contradicts check-string */
720 /* XXXX probably better to look for check-string
721 than for "\n", so one should lower the limit for t? */
722 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 723 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 724 other_last = strpos = s = t + 1;
cad2e5aa
JH
725 goto restart;
726 }
727 t++;
728 }
2c2d71f5
JH
729 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
730 PL_colors[0],PL_colors[1]));
731 goto fail_finish;
cad2e5aa 732 }
f5952150
GS
733 else {
734 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
735 PL_colors[0],PL_colors[1]));
736 }
cad2e5aa
JH
737 s = t;
738 set_useful:
2c2d71f5 739 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
740 }
741 else {
f5952150 742 /* The found string does not prohibit matching at strpos,
2c2d71f5 743 - no optimization of calling REx engine can be performed,
f5952150
GS
744 unless it was an MBOL and we are not after MBOL,
745 or a future STCLASS check will fail this. */
2c2d71f5
JH
746 try_at_start:
747 /* Even in this situation we may use MBOL flag if strpos is offset
748 wrt the start of the string. */
05b4157f 749 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 750 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
751 /* May be due to an implicit anchor of m{.*foo} */
752 && !(prog->reganch & ROPT_IMPLICIT))
753 {
cad2e5aa
JH
754 t = strpos;
755 goto find_anchor;
756 }
30944b6d 757 DEBUG_r( if (ml_anch)
f5952150
GS
758 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
759 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 760 );
2c2d71f5 761 success_at_start:
30944b6d 762 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 763 && prog->check_substr /* Could be deleted already */
cad2e5aa 764 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
765 && prog->check_substr == prog->float_substr)
766 {
cad2e5aa 767 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 768 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
769 SvREFCNT_dec(prog->check_substr);
770 prog->check_substr = Nullsv; /* disable */
771 prog->float_substr = Nullsv; /* clear */
5e39e1e5 772 check = Nullsv; /* abort */
cad2e5aa 773 s = strpos;
3cf5c195
IZ
774 /* XXXX This is a remnant of the old implementation. It
775 looks wasteful, since now INTUIT can use many
6eb5f6b9 776 other heuristics. */
cad2e5aa
JH
777 prog->reganch &= ~RE_USE_INTUIT;
778 }
779 else
780 s = strpos;
781 }
782
6eb5f6b9
JH
783 /* Last resort... */
784 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
785 if (prog->regstclass) {
786 /* minlen == 0 is possible if regstclass is \b or \B,
787 and the fixed substr is ''$.
788 Since minlen is already taken into account, s+1 is before strend;
789 accidentally, minlen >= 1 guaranties no false positives at s + 1
790 even for \b or \B. But (minlen? 1 : 0) below assumes that
791 regstclass does not come from lookahead... */
792 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
793 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 794 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 795 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 796 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 797 : 1);
6eb5f6b9 798 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
799 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
800 : (prog->float_substr
801 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
802 cl_l, strend)
803 : strend);
a1933d95 804 char *startpos = strbeg;
6eb5f6b9
JH
805
806 t = s;
76384e4a 807 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 808 PL_regdata = prog->data;
76384e4a
GS
809 PL_bostr = startpos;
810 }
f33976b4 811 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
812 if (!s) {
813#ifdef DEBUGGING
b7953727 814 char *what = 0;
6eb5f6b9
JH
815#endif
816 if (endpos == strend) {
817 DEBUG_r( PerlIO_printf(Perl_debug_log,
818 "Could not match STCLASS...\n") );
819 goto fail;
820 }
66e933ab
GS
821 DEBUG_r( PerlIO_printf(Perl_debug_log,
822 "This position contradicts STCLASS...\n") );
653099ff
GS
823 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
824 goto fail;
6eb5f6b9
JH
825 /* Contradict one of substrings */
826 if (prog->anchored_substr) {
6eb5f6b9
JH
827 if (prog->anchored_substr == check) {
828 DEBUG_r( what = "anchored" );
829 hop_and_restart:
1aa99e6b 830 s = HOP3c(t, 1, strend);
66e933ab
GS
831 if (s + start_shift + end_shift > strend) {
832 /* XXXX Should be taken into account earlier? */
833 DEBUG_r( PerlIO_printf(Perl_debug_log,
834 "Could not match STCLASS...\n") );
835 goto fail;
836 }
5e39e1e5
HS
837 if (!check)
838 goto giveup;
6eb5f6b9 839 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 840 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
841 what, (long)(s + start_shift - i_strpos)) );
842 goto restart;
843 }
66e933ab 844 /* Have both, check_string is floating */
6eb5f6b9
JH
845 if (t + start_shift >= check_at) /* Contradicts floating=check */
846 goto retry_floating_check;
847 /* Recheck anchored substring, but not floating... */
9041c2e3 848 s = check_at;
5e39e1e5
HS
849 if (!check)
850 goto giveup;
6eb5f6b9 851 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 852 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
853 (long)(other_last - i_strpos)) );
854 goto do_other_anchored;
855 }
60e71179
GS
856 /* Another way we could have checked stclass at the
857 current position only: */
858 if (ml_anch) {
859 s = t = t + 1;
5e39e1e5
HS
860 if (!check)
861 goto giveup;
60e71179 862 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
863 "Looking for /%s^%s/m starting at offset %ld...\n",
864 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 865 goto try_at_offset;
66e933ab 866 }
60e71179
GS
867 if (!prog->float_substr) /* Could have been deleted */
868 goto fail;
6eb5f6b9
JH
869 /* Check is floating subtring. */
870 retry_floating_check:
871 t = check_at - start_shift;
872 DEBUG_r( what = "floating" );
873 goto hop_and_restart;
874 }
b7953727
JH
875 if (t != s) {
876 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 877 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
878 (long)(t - i_strpos), (long)(s - i_strpos))
879 );
880 }
881 else {
882 DEBUG_r(PerlIO_printf(Perl_debug_log,
883 "Does not contradict STCLASS...\n");
884 );
885 }
6eb5f6b9 886 }
5e39e1e5
HS
887 giveup:
888 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
889 PL_colors[4], (check ? "Guessed" : "Giving up"),
890 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 891 return s;
2c2d71f5
JH
892
893 fail_finish: /* Substring not found */
66e933ab
GS
894 if (prog->check_substr) /* could be removed already */
895 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 896 fail:
2c2d71f5 897 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
898 PL_colors[4],PL_colors[5]));
899 return Nullch;
900}
9661b544 901
6eb5f6b9 902/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
903STATIC char *
904S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 905{
6eb5f6b9
JH
906 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
907 char *m;
d8093b23
G
908 STRLEN ln;
909 unsigned int c1;
910 unsigned int c2;
6eb5f6b9
JH
911 char *e;
912 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 913 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 914
6eb5f6b9
JH
915 /* We know what class it must start with. */
916 switch (OP(c)) {
6eb5f6b9 917 case ANYOF:
a0ed51b3 918 while (s < strend) {
e0f9d4a8
JH
919 STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
920
921 if (reginclass(c, (U8*)s, do_utf8) ||
922 (ANYOF_UNICODE_FOLD_SHARP_S(c, s, strend) &&
923 (skip = 2))) {
6eb5f6b9
JH
924 if (tmp && (norun || regtry(prog, s)))
925 goto got_it;
926 else
927 tmp = doevery;
a0ed51b3 928 }
e0f9d4a8
JH
929 else
930 tmp = 1;
931 s += skip;
a0d0e21e 932 }
6eb5f6b9 933 break;
f33976b4
DB
934 case CANY:
935 while (s < strend) {
936 if (tmp && (norun || regtry(prog, s)))
937 goto got_it;
938 else
939 tmp = doevery;
940 s++;
941 }
942 break;
6eb5f6b9
JH
943 case EXACTF:
944 m = STRING(c);
945 ln = STR_LEN(c);
1aa99e6b 946 if (UTF) {
a2a2844f 947 STRLEN ulen1, ulen2;
e7ae6809
JH
948 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
949 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
950
951 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
952 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
953
954 c1 = utf8_to_uvuni(tmpbuf1, 0);
955 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
956 }
957 else {
958 c1 = *(U8*)m;
959 c2 = PL_fold[c1];
960 }
6eb5f6b9
JH
961 goto do_exactf;
962 case EXACTFL:
963 m = STRING(c);
964 ln = STR_LEN(c);
d8093b23 965 c1 = *(U8*)m;
6eb5f6b9
JH
966 c2 = PL_fold_locale[c1];
967 do_exactf:
93c50829 968 e = do_utf8 ? s + ln : strend - ln;
b3c9acc1 969
6eb5f6b9
JH
970 if (norun && e < s)
971 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 972
60a8b682
JH
973 /* The idea in the EXACTF* cases is to first find the
974 * first character of the EXACTF* node and then, if
975 * necessary, case-insensitively compare the full
976 * text of the node. The c1 and c2 are the first
977 * characters (though in Unicode it gets a bit
978 * more complicated because there are more cases
979 * than just upper and lower: one is really supposed
980 * to use the so-called folding case for case-insensitive
981 * matching (called "loose matching" in Unicode). */
982
1aa99e6b 983 if (do_utf8) {
575cac57
JH
984 UV c, f;
985 U8 tmpbuf [UTF8_MAXLEN+1];
986 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
987 STRLEN len, foldlen;
d7f013c8 988
09091399 989 if (c1 == c2) {
1aa99e6b 990 while (s <= e) {
575cac57 991 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
992 if ( c == c1
993 && (ln == len ||
66423254
JH
994 ibcmp_utf8(s, (char **)0, 0, do_utf8,
995 m, (char **)0, ln, UTF))
55da9344 996 && (norun || regtry(prog, s)) )
1aa99e6b 997 goto got_it;
80aecb99
JH
998 else {
999 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1000 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1001 if ( f != c
1002 && (f == c1 || f == c2)
1003 && (ln == foldlen ||
66423254
JH
1004 !ibcmp_utf8((char *) foldbuf,
1005 (char **)0, foldlen, do_utf8,
d07ddd77 1006 m,
66423254 1007 (char **)0, ln, UTF))
80aecb99
JH
1008 && (norun || regtry(prog, s)) )
1009 goto got_it;
1010 }
1aa99e6b
IH
1011 s += len;
1012 }
09091399
JH
1013 }
1014 else {
1aa99e6b 1015 while (s <= e) {
575cac57 1016 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1017
60a8b682
JH
1018 /* Handle some of the three Greek sigmas cases.
1019 * Note that not all the possible combinations
1020 * are handled here: some of them are handled
1021 * handled by the standard folding rules, and
1022 * some of them (the character class or ANYOF
1023 * cases) are handled during compiletime in
1024 * regexec.c:S_regclass(). */
880bd946
JH
1025 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1026 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1027 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1028
1029 if ( (c == c1 || c == c2)
1030 && (ln == len ||
66423254
JH
1031 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1032 m, (char **)0, ln, UTF))
55da9344 1033 && (norun || regtry(prog, s)) )
1aa99e6b 1034 goto got_it;
80aecb99
JH
1035 else {
1036 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1037 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1038 if ( f != c
1039 && (f == c1 || f == c2)
1040 && (ln == foldlen ||
1041 !ibcmp_utf8((char *)foldbuf,
66423254 1042 (char **)0, foldlen, do_utf8,
d07ddd77 1043 m,
66423254 1044 (char **)0, ln, UTF))
80aecb99
JH
1045 && (norun || regtry(prog, s)) )
1046 goto got_it;
1047 }
1aa99e6b
IH
1048 s += len;
1049 }
09091399 1050 }
1aa99e6b
IH
1051 }
1052 else {
1053 if (c1 == c2)
1054 while (s <= e) {
1055 if ( *(U8*)s == c1
1056 && (ln == 1 || !(OP(c) == EXACTF
1057 ? ibcmp(s, m, ln)
1058 : ibcmp_locale(s, m, ln)))
1059 && (norun || regtry(prog, s)) )
1060 goto got_it;
1061 s++;
1062 }
1063 else
1064 while (s <= e) {
1065 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1066 && (ln == 1 || !(OP(c) == EXACTF
1067 ? ibcmp(s, m, ln)
1068 : ibcmp_locale(s, m, ln)))
1069 && (norun || regtry(prog, s)) )
1070 goto got_it;
1071 s++;
1072 }
b3c9acc1
IZ
1073 }
1074 break;
bbce6d69 1075 case BOUNDL:
3280af22 1076 PL_reg_flags |= RF_tainted;
bbce6d69 1077 /* FALL THROUGH */
a0d0e21e 1078 case BOUND:
ffc61ed2 1079 if (do_utf8) {
12d33761 1080 if (s == PL_bostr)
ffc61ed2
JH
1081 tmp = '\n';
1082 else {
1aa99e6b 1083 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1084
0064a8a9
JH
1085 if (s > (char*)r)
1086 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1087 }
1088 tmp = ((OP(c) == BOUND ?
9041c2e3 1089 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1090 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1091 while (s < strend) {
1092 if (tmp == !(OP(c) == BOUND ?
3568d838 1093 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1094 isALNUM_LC_utf8((U8*)s)))
1095 {
1096 tmp = !tmp;
1097 if ((norun || regtry(prog, s)))
1098 goto got_it;
1099 }
1100 s += UTF8SKIP(s);
a687059c 1101 }
a0d0e21e 1102 }
667bb95a 1103 else {
12d33761 1104 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1105 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1106 while (s < strend) {
1107 if (tmp ==
1108 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1109 tmp = !tmp;
1110 if ((norun || regtry(prog, s)))
1111 goto got_it;
1112 }
1113 s++;
a0ed51b3 1114 }
a0ed51b3 1115 }
6eb5f6b9 1116 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1117 goto got_it;
1118 break;
bbce6d69 1119 case NBOUNDL:
3280af22 1120 PL_reg_flags |= RF_tainted;
bbce6d69 1121 /* FALL THROUGH */
a0d0e21e 1122 case NBOUND:
ffc61ed2 1123 if (do_utf8) {
12d33761 1124 if (s == PL_bostr)
ffc61ed2
JH
1125 tmp = '\n';
1126 else {
1aa99e6b 1127 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1128
0064a8a9
JH
1129 if (s > (char*)r)
1130 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1131 }
1132 tmp = ((OP(c) == NBOUND ?
9041c2e3 1133 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1134 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1135 while (s < strend) {
1136 if (tmp == !(OP(c) == NBOUND ?
3568d838 1137 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1138 isALNUM_LC_utf8((U8*)s)))
1139 tmp = !tmp;
1140 else if ((norun || regtry(prog, s)))
1141 goto got_it;
1142 s += UTF8SKIP(s);
1143 }
a0d0e21e 1144 }
667bb95a 1145 else {
12d33761 1146 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1147 tmp = ((OP(c) == NBOUND ?
1148 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1149 while (s < strend) {
1150 if (tmp ==
1151 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1152 tmp = !tmp;
1153 else if ((norun || regtry(prog, s)))
1154 goto got_it;
1155 s++;
1156 }
a0ed51b3 1157 }
6eb5f6b9 1158 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1159 goto got_it;
1160 break;
a0d0e21e 1161 case ALNUM:
ffc61ed2 1162 if (do_utf8) {
8269fa76 1163 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1164 while (s < strend) {
3568d838 1165 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1166 if (tmp && (norun || regtry(prog, s)))
1167 goto got_it;
1168 else
1169 tmp = doevery;
1170 }
bbce6d69 1171 else
ffc61ed2
JH
1172 tmp = 1;
1173 s += UTF8SKIP(s);
bbce6d69 1174 }
bbce6d69 1175 }
ffc61ed2
JH
1176 else {
1177 while (s < strend) {
1178 if (isALNUM(*s)) {
1179 if (tmp && (norun || regtry(prog, s)))
1180 goto got_it;
1181 else
1182 tmp = doevery;
1183 }
a0ed51b3 1184 else
ffc61ed2
JH
1185 tmp = 1;
1186 s++;
a0ed51b3 1187 }
a0ed51b3
LW
1188 }
1189 break;
bbce6d69 1190 case ALNUML:
3280af22 1191 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1192 if (do_utf8) {
1193 while (s < strend) {
1194 if (isALNUM_LC_utf8((U8*)s)) {
1195 if (tmp && (norun || regtry(prog, s)))
1196 goto got_it;
1197 else
1198 tmp = doevery;
1199 }
a687059c 1200 else
ffc61ed2
JH
1201 tmp = 1;
1202 s += UTF8SKIP(s);
a0d0e21e 1203 }
a0d0e21e 1204 }
ffc61ed2
JH
1205 else {
1206 while (s < strend) {
1207 if (isALNUM_LC(*s)) {
1208 if (tmp && (norun || regtry(prog, s)))
1209 goto got_it;
1210 else
1211 tmp = doevery;
1212 }
a0ed51b3 1213 else
ffc61ed2
JH
1214 tmp = 1;
1215 s++;
a0ed51b3 1216 }
a0ed51b3
LW
1217 }
1218 break;
a0d0e21e 1219 case NALNUM:
ffc61ed2 1220 if (do_utf8) {
8269fa76 1221 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1222 while (s < strend) {
3568d838 1223 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1224 if (tmp && (norun || regtry(prog, s)))
1225 goto got_it;
1226 else
1227 tmp = doevery;
1228 }
bbce6d69 1229 else
ffc61ed2
JH
1230 tmp = 1;
1231 s += UTF8SKIP(s);
bbce6d69 1232 }
bbce6d69 1233 }
ffc61ed2
JH
1234 else {
1235 while (s < strend) {
1236 if (!isALNUM(*s)) {
1237 if (tmp && (norun || regtry(prog, s)))
1238 goto got_it;
1239 else
1240 tmp = doevery;
1241 }
a0ed51b3 1242 else
ffc61ed2
JH
1243 tmp = 1;
1244 s++;
a0ed51b3 1245 }
a0ed51b3
LW
1246 }
1247 break;
bbce6d69 1248 case NALNUML:
3280af22 1249 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1250 if (do_utf8) {
1251 while (s < strend) {
1252 if (!isALNUM_LC_utf8((U8*)s)) {
1253 if (tmp && (norun || regtry(prog, s)))
1254 goto got_it;
1255 else
1256 tmp = doevery;
1257 }
a687059c 1258 else
ffc61ed2
JH
1259 tmp = 1;
1260 s += UTF8SKIP(s);
a687059c 1261 }
a0d0e21e 1262 }
ffc61ed2
JH
1263 else {
1264 while (s < strend) {
1265 if (!isALNUM_LC(*s)) {
1266 if (tmp && (norun || regtry(prog, s)))
1267 goto got_it;
1268 else
1269 tmp = doevery;
1270 }
a0ed51b3 1271 else
ffc61ed2
JH
1272 tmp = 1;
1273 s++;
a0ed51b3 1274 }
a0ed51b3
LW
1275 }
1276 break;
a0d0e21e 1277 case SPACE:
ffc61ed2 1278 if (do_utf8) {
8269fa76 1279 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1280 while (s < strend) {
3568d838 1281 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1282 if (tmp && (norun || regtry(prog, s)))
1283 goto got_it;
1284 else
1285 tmp = doevery;
1286 }
a0d0e21e 1287 else
ffc61ed2
JH
1288 tmp = 1;
1289 s += UTF8SKIP(s);
2304df62 1290 }
a0d0e21e 1291 }
ffc61ed2
JH
1292 else {
1293 while (s < strend) {
1294 if (isSPACE(*s)) {
1295 if (tmp && (norun || regtry(prog, s)))
1296 goto got_it;
1297 else
1298 tmp = doevery;
1299 }
a0ed51b3 1300 else
ffc61ed2
JH
1301 tmp = 1;
1302 s++;
a0ed51b3 1303 }
a0ed51b3
LW
1304 }
1305 break;
bbce6d69 1306 case SPACEL:
3280af22 1307 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1308 if (do_utf8) {
1309 while (s < strend) {
1310 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1311 if (tmp && (norun || regtry(prog, s)))
1312 goto got_it;
1313 else
1314 tmp = doevery;
1315 }
bbce6d69 1316 else
ffc61ed2
JH
1317 tmp = 1;
1318 s += UTF8SKIP(s);
bbce6d69 1319 }
bbce6d69 1320 }
ffc61ed2
JH
1321 else {
1322 while (s < strend) {
1323 if (isSPACE_LC(*s)) {
1324 if (tmp && (norun || regtry(prog, s)))
1325 goto got_it;
1326 else
1327 tmp = doevery;
1328 }
a0ed51b3 1329 else
ffc61ed2
JH
1330 tmp = 1;
1331 s++;
a0ed51b3 1332 }
a0ed51b3
LW
1333 }
1334 break;
a0d0e21e 1335 case NSPACE:
ffc61ed2 1336 if (do_utf8) {
8269fa76 1337 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1338 while (s < strend) {
3568d838 1339 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1340 if (tmp && (norun || regtry(prog, s)))
1341 goto got_it;
1342 else
1343 tmp = doevery;
1344 }
a0d0e21e 1345 else
ffc61ed2
JH
1346 tmp = 1;
1347 s += UTF8SKIP(s);
a687059c 1348 }
a0d0e21e 1349 }
ffc61ed2
JH
1350 else {
1351 while (s < strend) {
1352 if (!isSPACE(*s)) {
1353 if (tmp && (norun || regtry(prog, s)))
1354 goto got_it;
1355 else
1356 tmp = doevery;
1357 }
a0ed51b3 1358 else
ffc61ed2
JH
1359 tmp = 1;
1360 s++;
a0ed51b3 1361 }
a0ed51b3
LW
1362 }
1363 break;
bbce6d69 1364 case NSPACEL:
3280af22 1365 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1366 if (do_utf8) {
1367 while (s < strend) {
1368 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1369 if (tmp && (norun || regtry(prog, s)))
1370 goto got_it;
1371 else
1372 tmp = doevery;
1373 }
bbce6d69 1374 else
ffc61ed2
JH
1375 tmp = 1;
1376 s += UTF8SKIP(s);
bbce6d69 1377 }
bbce6d69 1378 }
ffc61ed2
JH
1379 else {
1380 while (s < strend) {
1381 if (!isSPACE_LC(*s)) {
1382 if (tmp && (norun || regtry(prog, s)))
1383 goto got_it;
1384 else
1385 tmp = doevery;
1386 }
a0ed51b3 1387 else
ffc61ed2
JH
1388 tmp = 1;
1389 s++;
a0ed51b3 1390 }
a0ed51b3
LW
1391 }
1392 break;
a0d0e21e 1393 case DIGIT:
ffc61ed2 1394 if (do_utf8) {
8269fa76 1395 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1396 while (s < strend) {
3568d838 1397 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1398 if (tmp && (norun || regtry(prog, s)))
1399 goto got_it;
1400 else
1401 tmp = doevery;
1402 }
a0d0e21e 1403 else
ffc61ed2
JH
1404 tmp = 1;
1405 s += UTF8SKIP(s);
2b69d0c2 1406 }
a0d0e21e 1407 }
ffc61ed2
JH
1408 else {
1409 while (s < strend) {
1410 if (isDIGIT(*s)) {
1411 if (tmp && (norun || regtry(prog, s)))
1412 goto got_it;
1413 else
1414 tmp = doevery;
1415 }
a0ed51b3 1416 else
ffc61ed2
JH
1417 tmp = 1;
1418 s++;
a0ed51b3 1419 }
a0ed51b3
LW
1420 }
1421 break;
b8c5462f
JH
1422 case DIGITL:
1423 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1424 if (do_utf8) {
1425 while (s < strend) {
1426 if (isDIGIT_LC_utf8((U8*)s)) {
1427 if (tmp && (norun || regtry(prog, s)))
1428 goto got_it;
1429 else
1430 tmp = doevery;
1431 }
b8c5462f 1432 else
ffc61ed2
JH
1433 tmp = 1;
1434 s += UTF8SKIP(s);
b8c5462f 1435 }
b8c5462f 1436 }
ffc61ed2
JH
1437 else {
1438 while (s < strend) {
1439 if (isDIGIT_LC(*s)) {
1440 if (tmp && (norun || regtry(prog, s)))
1441 goto got_it;
1442 else
1443 tmp = doevery;
1444 }
b8c5462f 1445 else
ffc61ed2
JH
1446 tmp = 1;
1447 s++;
b8c5462f 1448 }
b8c5462f
JH
1449 }
1450 break;
a0d0e21e 1451 case NDIGIT:
ffc61ed2 1452 if (do_utf8) {
8269fa76 1453 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1454 while (s < strend) {
3568d838 1455 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1456 if (tmp && (norun || regtry(prog, s)))
1457 goto got_it;
1458 else
1459 tmp = doevery;
1460 }
a0d0e21e 1461 else
ffc61ed2
JH
1462 tmp = 1;
1463 s += UTF8SKIP(s);
a687059c 1464 }
a0d0e21e 1465 }
ffc61ed2
JH
1466 else {
1467 while (s < strend) {
1468 if (!isDIGIT(*s)) {
1469 if (tmp && (norun || regtry(prog, s)))
1470 goto got_it;
1471 else
1472 tmp = doevery;
1473 }
a0ed51b3 1474 else
ffc61ed2
JH
1475 tmp = 1;
1476 s++;
a0ed51b3 1477 }
a0ed51b3
LW
1478 }
1479 break;
b8c5462f
JH
1480 case NDIGITL:
1481 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1482 if (do_utf8) {
1483 while (s < strend) {
1484 if (!isDIGIT_LC_utf8((U8*)s)) {
1485 if (tmp && (norun || regtry(prog, s)))
1486 goto got_it;
1487 else
1488 tmp = doevery;
1489 }
b8c5462f 1490 else
ffc61ed2
JH
1491 tmp = 1;
1492 s += UTF8SKIP(s);
b8c5462f 1493 }
a0ed51b3 1494 }
ffc61ed2
JH
1495 else {
1496 while (s < strend) {
1497 if (!isDIGIT_LC(*s)) {
1498 if (tmp && (norun || regtry(prog, s)))
1499 goto got_it;
1500 else
1501 tmp = doevery;
1502 }
cf93c79d 1503 else
ffc61ed2
JH
1504 tmp = 1;
1505 s++;
b8c5462f 1506 }
b8c5462f
JH
1507 }
1508 break;
b3c9acc1 1509 default:
3c3eec57
GS
1510 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1511 break;
d6a28714 1512 }
6eb5f6b9
JH
1513 return 0;
1514 got_it:
1515 return s;
1516}
1517
1518/*
1519 - regexec_flags - match a regexp against a string
1520 */
1521I32
1522Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1523 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1524/* strend: pointer to null at end of string */
1525/* strbeg: real beginning of string */
1526/* minend: end of match must be >=minend after stringarg. */
1527/* data: May be used for some additional optimizations. */
1528/* nosave: For optimizations. */
1529{
6eb5f6b9
JH
1530 register char *s;
1531 register regnode *c;
1532 register char *startpos = stringarg;
6eb5f6b9
JH
1533 I32 minlen; /* must match at least this many chars */
1534 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1535 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1536 constant substr. */ /* CC */
1537 I32 end_shift = 0; /* Same for the end. */ /* CC */
1538 I32 scream_pos = -1; /* Internal iterator of scream. */
1539 char *scream_olds;
1540 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1541 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1542#ifdef DEBUGGING
9e55ce06
JH
1543 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1544 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1545#endif
6eb5f6b9
JH
1546
1547 PL_regcc = 0;
1548
1549 cache_re(prog);
1550#ifdef DEBUGGING
aea4f609 1551 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1552#endif
1553
1554 /* Be paranoid... */
1555 if (prog == NULL || startpos == NULL) {
1556 Perl_croak(aTHX_ "NULL regexp parameter");
1557 return 0;
1558 }
1559
1560 minlen = prog->minlen;
61a36c01 1561 if (strend - startpos < minlen) {
a72c7584
JH
1562 DEBUG_r(PerlIO_printf(Perl_debug_log,
1563 "String too short [regexec_flags]...\n"));
1564 goto phooey;
1aa99e6b 1565 }
6eb5f6b9 1566
6eb5f6b9
JH
1567 /* Check validity of program. */
1568 if (UCHARAT(prog->program) != REG_MAGIC) {
1569 Perl_croak(aTHX_ "corrupted regexp program");
1570 }
1571
1572 PL_reg_flags = 0;
1573 PL_reg_eval_set = 0;
1574 PL_reg_maxiter = 0;
1575
1576 if (prog->reganch & ROPT_UTF8)
1577 PL_reg_flags |= RF_utf8;
1578
1579 /* Mark beginning of line for ^ and lookbehind. */
1580 PL_regbol = startpos;
1581 PL_bostr = strbeg;
1582 PL_reg_sv = sv;
1583
1584 /* Mark end of line for $ (and such) */
1585 PL_regeol = strend;
1586
1587 /* see how far we have to get to not match where we matched before */
1588 PL_regtill = startpos+minend;
1589
1590 /* We start without call_cc context. */
1591 PL_reg_call_cc = 0;
1592
1593 /* If there is a "must appear" string, look for it. */
1594 s = startpos;
1595
1596 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1597 MAGIC *mg;
1598
1599 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1600 PL_reg_ganch = startpos;
1601 else if (sv && SvTYPE(sv) >= SVt_PVMG
1602 && SvMAGIC(sv)
14befaf4
DM
1603 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1604 && mg->mg_len >= 0) {
6eb5f6b9
JH
1605 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1606 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1607 if (s > PL_reg_ganch)
6eb5f6b9
JH
1608 goto phooey;
1609 s = PL_reg_ganch;
1610 }
1611 }
1612 else /* pos() not defined */
1613 PL_reg_ganch = strbeg;
1614 }
1615
699c3c34
JH
1616 if (do_utf8 == (UTF!=0) &&
1617 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1618 re_scream_pos_data d;
1619
1620 d.scream_olds = &scream_olds;
1621 d.scream_pos = &scream_pos;
1622 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1623 if (!s) {
1624 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1625 goto phooey; /* not present */
3fa9c3d7 1626 }
6eb5f6b9
JH
1627 }
1628
2a782b5b 1629 DEBUG_r({
9e55ce06
JH
1630 char *s0 = UTF ?
1631 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1632 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1633 prog->precomp;
1634 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1635 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1636 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1637 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1638 if (!PL_colorset)
1639 reginitcolors();
1640 PerlIO_printf(Perl_debug_log,
9e55ce06 1641 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1642 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1643 len0, len0, s0,
2a782b5b 1644 PL_colors[1],
9e55ce06 1645 len0 > 60 ? "..." : "",
2a782b5b 1646 PL_colors[0],
9e55ce06
JH
1647 (int)(len1 > 60 ? 60 : len1),
1648 s1, PL_colors[1],
1649 (len1 > 60 ? "..." : "")
2a782b5b
JH
1650 );
1651 });
6eb5f6b9
JH
1652
1653 /* Simplest case: anchored match need be tried only once. */
1654 /* [unless only anchor is BOL and multiline is set] */
1655 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1656 if (s == startpos && regtry(prog, startpos))
1657 goto got_it;
1658 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1659 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1660 {
1661 char *end;
1662
1663 if (minlen)
1664 dontbother = minlen - 1;
1aa99e6b 1665 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1666 /* for multiline we only have to try after newlines */
1667 if (prog->check_substr) {
1668 if (s == startpos)
1669 goto after_try;
1670 while (1) {
1671 if (regtry(prog, s))
1672 goto got_it;
1673 after_try:
1674 if (s >= end)
1675 goto phooey;
1676 if (prog->reganch & RE_USE_INTUIT) {
1677 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1678 if (!s)
1679 goto phooey;
1680 }
1681 else
1682 s++;
1683 }
1684 } else {
1685 if (s > startpos)
1686 s--;
1687 while (s < end) {
1688 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1689 if (regtry(prog, s))
1690 goto got_it;
1691 }
1692 }
1693 }
1694 }
1695 goto phooey;
1696 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1697 if (regtry(prog, PL_reg_ganch))
1698 goto got_it;
1699 goto phooey;
1700 }
1701
1702 /* Messy cases: unanchored match. */
9041c2e3 1703 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1704 /* we have /x+whatever/ */
1705 /* it must be a one character string (XXXX Except UTF?) */
1706 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1707#ifdef DEBUGGING
1708 int did_match = 0;
1709#endif
1710
1aa99e6b 1711 if (do_utf8) {
6eb5f6b9
JH
1712 while (s < strend) {
1713 if (*s == ch) {
bf93d4cc 1714 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1715 if (regtry(prog, s)) goto got_it;
1716 s += UTF8SKIP(s);
1717 while (s < strend && *s == ch)
1718 s += UTF8SKIP(s);
1719 }
1720 s += UTF8SKIP(s);
1721 }
1722 }
1723 else {
1724 while (s < strend) {
1725 if (*s == ch) {
bf93d4cc 1726 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1727 if (regtry(prog, s)) goto got_it;
1728 s++;
1729 while (s < strend && *s == ch)
1730 s++;
1731 }
1732 s++;
1733 }
1734 }
b7953727 1735 DEBUG_r(if (!did_match)
bf93d4cc 1736 PerlIO_printf(Perl_debug_log,
b7953727
JH
1737 "Did not find anchored character...\n")
1738 );
6eb5f6b9
JH
1739 }
1740 /*SUPPRESS 560*/
1aa99e6b
IH
1741 else if (do_utf8 == (UTF!=0) &&
1742 (prog->anchored_substr != Nullsv
9041c2e3 1743 || (prog->float_substr != Nullsv
1aa99e6b 1744 && prog->float_max_offset < strend - s))) {
9041c2e3 1745 SV *must = prog->anchored_substr
6eb5f6b9 1746 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1747 I32 back_max =
6eb5f6b9 1748 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1749 I32 back_min =
6eb5f6b9 1750 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1751 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1752 -(I32)(CHR_SVLEN(must)
1aa99e6b 1753 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1754 char *last1; /* Last position checked before */
bf93d4cc
GS
1755#ifdef DEBUGGING
1756 int did_match = 0;
1757#endif
6eb5f6b9
JH
1758
1759 if (s > PL_bostr)
1760 last1 = HOPc(s, -1);
1761 else
1762 last1 = s - 1; /* bogus */
1763
1764 /* XXXX check_substr already used to find `s', can optimize if
1765 check_substr==must. */
1766 scream_pos = -1;
1767 dontbother = end_shift;
1768 strend = HOPc(strend, -dontbother);
1769 while ( (s <= last) &&
9041c2e3 1770 ((flags & REXEC_SCREAM)
1aa99e6b 1771 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1772 end_shift, &scream_pos, 0))
1aa99e6b 1773 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1774 (unsigned char*)strend, must,
6eb5f6b9 1775 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1776 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1777 if (HOPc(s, -back_max) > last1) {
1778 last1 = HOPc(s, -back_min);
1779 s = HOPc(s, -back_max);
1780 }
1781 else {
1782 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1783
1784 last1 = HOPc(s, -back_min);
1785 s = t;
1786 }
1aa99e6b 1787 if (do_utf8) {
6eb5f6b9
JH
1788 while (s <= last1) {
1789 if (regtry(prog, s))
1790 goto got_it;
1791 s += UTF8SKIP(s);
1792 }
1793 }
1794 else {
1795 while (s <= last1) {
1796 if (regtry(prog, s))
1797 goto got_it;
1798 s++;
1799 }
1800 }
1801 }
b7953727
JH
1802 DEBUG_r(if (!did_match)
1803 PerlIO_printf(Perl_debug_log,
1804 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1805 ((must == prog->anchored_substr)
1806 ? "anchored" : "floating"),
1807 PL_colors[0],
1808 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1809 SvPVX(must),
b7953727
JH
1810 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1811 );
6eb5f6b9
JH
1812 goto phooey;
1813 }
155aba94 1814 else if ((c = prog->regstclass)) {
66e933ab
GS
1815 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1816 /* don't bother with what can't match */
6eb5f6b9 1817 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1818 DEBUG_r({
1819 SV *prop = sv_newmortal();
9e55ce06
JH
1820 char *s0;
1821 char *s1;
1822 int len0;
1823 int len1;
1824
ffc61ed2 1825 regprop(prop, c);
9e55ce06
JH
1826 s0 = UTF ?
1827 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1828 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1829 SvPVX(prop);
1830 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1831 s1 = UTF ?
c728cb41 1832 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1833 len1 = UTF ? SvCUR(dsv1) : strend - s;
1834 PerlIO_printf(Perl_debug_log,
1835 "Matching stclass `%*.*s' against `%*.*s'\n",
1836 len0, len0, s0,
1837 len1, len1, s1);
ffc61ed2 1838 });
6eb5f6b9
JH
1839 if (find_byclass(prog, c, s, strend, startpos, 0))
1840 goto got_it;
bf93d4cc 1841 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1842 }
1843 else {
1844 dontbother = 0;
1845 if (prog->float_substr != Nullsv) { /* Trim the end. */
1846 char *last;
d6a28714
JH
1847
1848 if (flags & REXEC_SCREAM) {
1849 last = screaminstr(sv, prog->float_substr, s - strbeg,
1850 end_shift, &scream_pos, 1); /* last one */
1851 if (!last)
ffc61ed2 1852 last = scream_olds; /* Only one occurrence. */
b8c5462f 1853 }
d6a28714
JH
1854 else {
1855 STRLEN len;
1856 char *little = SvPV(prog->float_substr, len);
1857
1858 if (SvTAIL(prog->float_substr)) {
1859 if (memEQ(strend - len + 1, little, len - 1))
1860 last = strend - len + 1;
1861 else if (!PL_multiline)
9041c2e3 1862 last = memEQ(strend - len, little, len)
d6a28714 1863 ? strend - len : Nullch;
b8c5462f 1864 else
d6a28714
JH
1865 goto find_last;
1866 } else {
1867 find_last:
9041c2e3 1868 if (len)
d6a28714 1869 last = rninstr(s, strend, little, little + len);
b8c5462f 1870 else
d6a28714 1871 last = strend; /* matching `$' */
b8c5462f 1872 }
b8c5462f 1873 }
bf93d4cc
GS
1874 if (last == NULL) {
1875 DEBUG_r(PerlIO_printf(Perl_debug_log,
1876 "%sCan't trim the tail, match fails (should not happen)%s\n",
1877 PL_colors[4],PL_colors[5]));
1878 goto phooey; /* Should not happen! */
1879 }
d6a28714
JH
1880 dontbother = strend - last + prog->float_min_offset;
1881 }
1882 if (minlen && (dontbother < minlen))
1883 dontbother = minlen - 1;
1884 strend -= dontbother; /* this one's always in bytes! */
1885 /* We don't know much -- general case. */
1aa99e6b 1886 if (do_utf8) {
d6a28714
JH
1887 for (;;) {
1888 if (regtry(prog, s))
1889 goto got_it;
1890 if (s >= strend)
1891 break;
b8c5462f 1892 s += UTF8SKIP(s);
d6a28714
JH
1893 };
1894 }
1895 else {
1896 do {
1897 if (regtry(prog, s))
1898 goto got_it;
1899 } while (s++ < strend);
1900 }
1901 }
1902
1903 /* Failure. */
1904 goto phooey;
1905
1906got_it:
1907 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1908
1909 if (PL_reg_eval_set) {
1910 /* Preserve the current value of $^R */
1911 if (oreplsv != GvSV(PL_replgv))
1912 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1913 restored, the value remains
1914 the same. */
acfe0abc 1915 restore_pos(aTHX_ 0);
d6a28714
JH
1916 }
1917
1918 /* make sure $`, $&, $', and $digit will work later */
1919 if ( !(flags & REXEC_NOT_FIRST) ) {
1920 if (RX_MATCH_COPIED(prog)) {
1921 Safefree(prog->subbeg);
1922 RX_MATCH_COPIED_off(prog);
1923 }
1924 if (flags & REXEC_COPY_STR) {
1925 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1926
1927 s = savepvn(strbeg, i);
1928 prog->subbeg = s;
1929 prog->sublen = i;
1930 RX_MATCH_COPIED_on(prog);
1931 }
1932 else {
1933 prog->subbeg = strbeg;
1934 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1935 }
1936 }
9041c2e3 1937
d6a28714
JH
1938 return 1;
1939
1940phooey:
bf93d4cc
GS
1941 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1942 PL_colors[4],PL_colors[5]));
d6a28714 1943 if (PL_reg_eval_set)
acfe0abc 1944 restore_pos(aTHX_ 0);
d6a28714
JH
1945 return 0;
1946}
1947
1948/*
1949 - regtry - try match at specific point
1950 */
1951STATIC I32 /* 0 failure, 1 success */
1952S_regtry(pTHX_ regexp *prog, char *startpos)
1953{
d6a28714
JH
1954 register I32 i;
1955 register I32 *sp;
1956 register I32 *ep;
1957 CHECKPOINT lastcp;
1958
02db2b7b
IZ
1959#ifdef DEBUGGING
1960 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1961#endif
d6a28714
JH
1962 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1963 MAGIC *mg;
1964
1965 PL_reg_eval_set = RS_init;
1966 DEBUG_r(DEBUG_s(
b900a521
JH
1967 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1968 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1969 ));
e8347627 1970 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1971 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1972 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1973 SAVETMPS;
1974 /* Apparently this is not needed, judging by wantarray. */
e8347627 1975 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1976 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1977
1978 if (PL_reg_sv) {
1979 /* Make $_ available to executed code. */
1980 if (PL_reg_sv != DEFSV) {
4d1ff10f 1981 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1982 SAVESPTR(DEFSV);
1983 DEFSV = PL_reg_sv;
b8c5462f 1984 }
d6a28714 1985
9041c2e3 1986 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1987 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1988 /* prepare for quick setting of pos */
14befaf4
DM
1989 sv_magic(PL_reg_sv, (SV*)0,
1990 PERL_MAGIC_regex_global, Nullch, 0);
1991 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1992 mg->mg_len = -1;
b8c5462f 1993 }
d6a28714
JH
1994 PL_reg_magic = mg;
1995 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1996 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1997 }
09687e5a 1998 if (!PL_reg_curpm) {
0f79a09d 1999 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
2000#ifdef USE_ITHREADS
2001 {
2002 SV* repointer = newSViv(0);
577e12cc 2003 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2004 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2005 av_push(PL_regex_padav,repointer);
2006 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2007 PL_regex_pad = AvARRAY(PL_regex_padav);
2008 }
2009#endif
2010 }
aaa362c4 2011 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2012 PL_reg_oldcurpm = PL_curpm;
2013 PL_curpm = PL_reg_curpm;
2014 if (RX_MATCH_COPIED(prog)) {
2015 /* Here is a serious problem: we cannot rewrite subbeg,
2016 since it may be needed if this match fails. Thus
2017 $` inside (?{}) could fail... */
2018 PL_reg_oldsaved = prog->subbeg;
2019 PL_reg_oldsavedlen = prog->sublen;
2020 RX_MATCH_COPIED_off(prog);
2021 }
2022 else
2023 PL_reg_oldsaved = Nullch;
2024 prog->subbeg = PL_bostr;
2025 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2026 }
2027 prog->startp[0] = startpos - PL_bostr;
2028 PL_reginput = startpos;
2029 PL_regstartp = prog->startp;
2030 PL_regendp = prog->endp;
2031 PL_reglastparen = &prog->lastparen;
a01268b5 2032 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
2033 prog->lastparen = 0;
2034 PL_regsize = 0;
2035 DEBUG_r(PL_reg_starttry = startpos);
2036 if (PL_reg_start_tmpl <= prog->nparens) {
2037 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2038 if(PL_reg_start_tmp)
2039 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2040 else
2041 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2042 }
2043
128e8167
JH
2044#ifdef DEBUGGING
2045 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2046 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2047 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2048#endif
2049
d6a28714
JH
2050 /* XXXX What this code is doing here?!!! There should be no need
2051 to do this again and again, PL_reglastparen should take care of
3dd2943c 2052 this! --ilya*/
dafc8851
JH
2053
2054 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2055 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2056 * PL_reglastparen), is not needed at all by the test suite
2057 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2058 * enough, for building DynaLoader, or otherwise this
2059 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2060 * will happen. Meanwhile, this code *is* needed for the
2061 * above-mentioned test suite tests to succeed. The common theme
2062 * on those tests seems to be returning null fields from matches.
2063 * --jhi */
dafc8851 2064#if 1
d6a28714
JH
2065 sp = prog->startp;
2066 ep = prog->endp;
2067 if (prog->nparens) {
09e8ae3b 2068 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2069 *++sp = -1;
2070 *++ep = -1;
2071 }
2072 }
dafc8851 2073#endif
02db2b7b 2074 REGCP_SET(lastcp);
d6a28714
JH
2075 if (regmatch(prog->program + 1)) {
2076 prog->endp[0] = PL_reginput - PL_bostr;
2077 return 1;
2078 }
02db2b7b 2079 REGCP_UNWIND(lastcp);
d6a28714
JH
2080 return 0;
2081}
2082
02db2b7b
IZ
2083#define RE_UNWIND_BRANCH 1
2084#define RE_UNWIND_BRANCHJ 2
2085
2086union re_unwind_t;
2087
2088typedef struct { /* XX: makes sense to enlarge it... */
2089 I32 type;
2090 I32 prev;
2091 CHECKPOINT lastcp;
2092} re_unwind_generic_t;
2093
2094typedef struct {
2095 I32 type;
2096 I32 prev;
2097 CHECKPOINT lastcp;
2098 I32 lastparen;
2099 regnode *next;
2100 char *locinput;
2101 I32 nextchr;
2102#ifdef DEBUGGING
2103 int regindent;
2104#endif
2105} re_unwind_branch_t;
2106
2107typedef union re_unwind_t {
2108 I32 type;
2109 re_unwind_generic_t generic;
2110 re_unwind_branch_t branch;
2111} re_unwind_t;
2112
8ba1375e
MJD
2113#define sayYES goto yes
2114#define sayNO goto no
e0f9d4a8 2115#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2116#define sayYES_FINAL goto yes_final
2117#define sayYES_LOUD goto yes_loud
2118#define sayNO_FINAL goto no_final
2119#define sayNO_SILENT goto do_no
2120#define saySAME(x) if (x) goto yes; else goto no
2121
2122#define REPORT_CODE_OFF 24
2123
d6a28714
JH
2124/*
2125 - regmatch - main matching routine
2126 *
2127 * Conceptually the strategy is simple: check to see whether the current
2128 * node matches, call self recursively to see whether the rest matches,
2129 * and then act accordingly. In practice we make some effort to avoid
2130 * recursion, in particular by going through "ordinary" nodes (that don't
2131 * need to know whether the rest of the match failed) by a loop instead of
2132 * by recursion.
2133 */
2134/* [lwall] I've hoisted the register declarations to the outer block in order to
2135 * maybe save a little bit of pushing and popping on the stack. It also takes
2136 * advantage of machines that use a register save mask on subroutine entry.
2137 */
2138STATIC I32 /* 0 failure, 1 success */
2139S_regmatch(pTHX_ regnode *prog)
2140{
d6a28714
JH
2141 register regnode *scan; /* Current node. */
2142 regnode *next; /* Next node. */
2143 regnode *inner; /* Next node in internal branch. */
2144 register I32 nextchr; /* renamed nextchr - nextchar colides with
2145 function of same name */
2146 register I32 n; /* no or next */
b7953727
JH
2147 register I32 ln = 0; /* len or last */
2148 register char *s = Nullch; /* operand or save */
d6a28714 2149 register char *locinput = PL_reginput;
b7953727 2150 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2151 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2152 I32 unwind = 0;
b7953727 2153#if 0
02db2b7b 2154 I32 firstcp = PL_savestack_ix;
b7953727 2155#endif
53c4c00c 2156 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2157#ifdef DEBUGGING
ce333219
JH
2158 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2159 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2160 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2161#endif
02db2b7b 2162
d6a28714
JH
2163#ifdef DEBUGGING
2164 PL_regindent++;
2165#endif
2166
2167 /* Note that nextchr is a byte even in UTF */
2168 nextchr = UCHARAT(locinput);
2169 scan = prog;
2170 while (scan != NULL) {
8ba1375e 2171
2a782b5b 2172 DEBUG_r( {
d6a28714
JH
2173 SV *prop = sv_newmortal();
2174 int docolor = *PL_colors[0];
2175 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2176 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2177 /* The part of the string before starttry has one color
2178 (pref0_len chars), between starttry and current
2179 position another one (pref_len - pref0_len chars),
2180 after the current position the third one.
2181 We assume that pref0_len <= pref_len, otherwise we
2182 decrease pref0_len. */
9041c2e3 2183 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2184 ? (5 + taill) - l : locinput - PL_bostr;
2185 int pref0_len;
d6a28714 2186
df1ffd02 2187 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2188 pref_len++;
2189 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2190 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2191 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2192 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2193 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2194 l--;
d6a28714
JH
2195 if (pref0_len < 0)
2196 pref0_len = 0;
2197 if (pref0_len > pref_len)
2198 pref0_len = pref_len;
2199 regprop(prop, scan);
2a782b5b
JH
2200 {
2201 char *s0 =
df1ffd02 2202 do_utf8 ?
2a782b5b 2203 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2204 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2205 locinput - pref_len;
df1ffd02
JH
2206 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2207 char *s1 = do_utf8 ?
2a782b5b 2208 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2209 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2210 locinput - pref_len + pref0_len;
df1ffd02
JH
2211 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2212 char *s2 = do_utf8 ?
2a782b5b 2213 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2214 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2215 locinput;
df1ffd02 2216 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2217 PerlIO_printf(Perl_debug_log,
2218 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2219 (IV)(locinput - PL_bostr),
2220 PL_colors[4],
2221 len0, s0,
2222 PL_colors[5],
2223 PL_colors[2],
2224 len1, s1,
2225 PL_colors[3],
2226 (docolor ? "" : "> <"),
2227 PL_colors[0],
2228 len2, s2,
2229 PL_colors[1],
2230 15 - l - pref_len + 1,
2231 "",
2232 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2233 SvPVX(prop));
2234 }
2235 });
d6a28714
JH
2236
2237 next = scan + NEXT_OFF(scan);
2238 if (next == scan)
2239 next = NULL;
2240
2241 switch (OP(scan)) {
2242 case BOL:
12d33761
HS
2243 if (locinput == PL_bostr || (PL_multiline &&
2244 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2245 {
2246 /* regtill = regbol; */
b8c5462f
JH
2247 break;
2248 }
d6a28714
JH
2249 sayNO;
2250 case MBOL:
12d33761
HS
2251 if (locinput == PL_bostr ||
2252 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2253 {
b8c5462f
JH
2254 break;
2255 }
d6a28714
JH
2256 sayNO;
2257 case SBOL:
c2a73568 2258 if (locinput == PL_bostr)
b8c5462f 2259 break;
d6a28714
JH
2260 sayNO;
2261 case GPOS:
2262 if (locinput == PL_reg_ganch)
2263 break;
2264 sayNO;
2265 case EOL:
2266 if (PL_multiline)
2267 goto meol;
2268 else
2269 goto seol;
2270 case MEOL:
2271 meol:
2272 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2273 sayNO;
b8c5462f 2274 break;
d6a28714
JH
2275 case SEOL:
2276 seol:
2277 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2278 sayNO;
d6a28714 2279 if (PL_regeol - locinput > 1)
b8c5462f 2280 sayNO;
b8c5462f 2281 break;
d6a28714
JH
2282 case EOS:
2283 if (PL_regeol != locinput)
b8c5462f 2284 sayNO;
d6a28714 2285 break;
ffc61ed2 2286 case SANY:
d6a28714 2287 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2288 sayNO;
f33976b4
DB
2289 if (do_utf8) {
2290 locinput += PL_utf8skip[nextchr];
2291 if (locinput > PL_regeol)
2292 sayNO;
2293 nextchr = UCHARAT(locinput);
2294 }
2295 else
2296 nextchr = UCHARAT(++locinput);
2297 break;
2298 case CANY:
2299 if (!nextchr && locinput >= PL_regeol)
2300 sayNO;
b8c5462f 2301 nextchr = UCHARAT(++locinput);
a0d0e21e 2302 break;
ffc61ed2 2303 case REG_ANY:
1aa99e6b
IH
2304 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2305 sayNO;
2306 if (do_utf8) {
b8c5462f 2307 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2308 if (locinput > PL_regeol)
2309 sayNO;
a0ed51b3 2310 nextchr = UCHARAT(locinput);
a0ed51b3 2311 }
1aa99e6b
IH
2312 else
2313 nextchr = UCHARAT(++locinput);
a0ed51b3 2314 break;
d6a28714 2315 case EXACT:
cd439c50
IZ
2316 s = STRING(scan);
2317 ln = STR_LEN(scan);
1aa99e6b 2318 if (do_utf8 != (UTF!=0)) {
bc517b45 2319 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2320 char *l = locinput;
2321 char *e = s + ln;
bc517b45 2322 STRLEN ulen;
a72c7584 2323
5ff6fc6d
JH
2324 if (do_utf8) {
2325 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2326 while (s < e) {
2327 if (l >= PL_regeol)
5ff6fc6d
JH
2328 sayNO;
2329 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2330 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2331 sayNO;
bc517b45 2332 l += ulen;
5ff6fc6d 2333 s ++;
1aa99e6b 2334 }
5ff6fc6d
JH
2335 }
2336 else {
2337 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2338 while (s < e) {
2339 if (l >= PL_regeol)
2340 sayNO;
5ff6fc6d 2341 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2342 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2343 sayNO;
bc517b45 2344 s += ulen;
a72c7584 2345 l ++;
1aa99e6b 2346 }
5ff6fc6d 2347 }
1aa99e6b
IH
2348 locinput = l;
2349 nextchr = UCHARAT(locinput);
2350 break;
2351 }
bc517b45 2352 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2353 /* Inline the first character, for speed. */
2354 if (UCHARAT(s) != nextchr)
2355 sayNO;
2356 if (PL_regeol - locinput < ln)
2357 sayNO;
2358 if (ln > 1 && memNE(s, locinput, ln))
2359 sayNO;
2360 locinput += ln;
2361 nextchr = UCHARAT(locinput);
2362 break;
2363 case EXACTFL:
b8c5462f
JH
2364 PL_reg_flags |= RF_tainted;
2365 /* FALL THROUGH */
d6a28714 2366 case EXACTF:
cd439c50
IZ
2367 s = STRING(scan);
2368 ln = STR_LEN(scan);
d6a28714 2369
d07ddd77
JH
2370 if (do_utf8 || UTF) {
2371 /* Either target or the pattern are utf8. */
d6a28714 2372 char *l = locinput;
d07ddd77 2373 char *e = PL_regeol;
bc517b45 2374
d07ddd77
JH
2375 if (ibcmp_utf8(s, 0, ln, do_utf8,
2376 l, &e, 0, UTF))
2377 sayNO;
2378 locinput = e;
2379 nextchr = UCHARAT(locinput);
2380 break;
a0ed51b3 2381 }
d6a28714 2382
bc517b45
JH
2383 /* Neither the target and the pattern are utf8. */
2384
d6a28714
JH
2385 /* Inline the first character, for speed. */
2386 if (UCHARAT(s) != nextchr &&
2387 UCHARAT(s) != ((OP(scan) == EXACTF)
2388 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2389 sayNO;
d6a28714 2390 if (PL_regeol - locinput < ln)
b8c5462f 2391 sayNO;
d6a28714
JH
2392 if (ln > 1 && (OP(scan) == EXACTF
2393 ? ibcmp(s, locinput, ln)
2394 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2395 sayNO;
d6a28714
JH
2396 locinput += ln;
2397 nextchr = UCHARAT(locinput);
a0d0e21e 2398 break;
d6a28714 2399 case ANYOF:
ffc61ed2 2400 if (do_utf8) {
9e55ce06
JH
2401 STRLEN inclasslen = PL_regeol - locinput;
2402
2403 if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2404 sayNO_ANYOF;
ffc61ed2
JH
2405 if (locinput >= PL_regeol)
2406 sayNO;
9e55ce06 2407 locinput += inclasslen;
b8c5462f 2408 nextchr = UCHARAT(locinput);
e0f9d4a8 2409 break;
ffc61ed2
JH
2410 }
2411 else {
2412 if (nextchr < 0)
2413 nextchr = UCHARAT(locinput);
2414 if (!reginclass(scan, (U8*)locinput, do_utf8))
e0f9d4a8 2415 sayNO_ANYOF;
ffc61ed2
JH
2416 if (!nextchr && locinput >= PL_regeol)
2417 sayNO;
2418 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2419 break;
2420 }
2421 no_anyof:
2422 /* If we might have the case of the German sharp s
2423 * in a casefolding Unicode character class. */
2424
2425 if (ANYOF_UNICODE_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2426 locinput += 2;
2427 nextchr = UCHARAT(locinput);
ffc61ed2 2428 }
e0f9d4a8
JH
2429 else
2430 sayNO;
b8c5462f 2431 break;
d6a28714 2432 case ALNUML:
b8c5462f
JH
2433 PL_reg_flags |= RF_tainted;
2434 /* FALL THROUGH */
d6a28714 2435 case ALNUM:
b8c5462f 2436 if (!nextchr)
4633a7c4 2437 sayNO;
ffc61ed2 2438 if (do_utf8) {
ad24be35 2439 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2440 if (!(OP(scan) == ALNUM
3568d838 2441 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2442 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2443 {
2444 sayNO;
a0ed51b3 2445 }
b8c5462f 2446 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2447 nextchr = UCHARAT(locinput);
2448 break;
2449 }
ffc61ed2 2450 if (!(OP(scan) == ALNUM
d6a28714 2451 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2452 sayNO;
b8c5462f 2453 nextchr = UCHARAT(++locinput);
a0d0e21e 2454 break;
d6a28714 2455 case NALNUML:
b8c5462f
JH
2456 PL_reg_flags |= RF_tainted;
2457 /* FALL THROUGH */
d6a28714
JH
2458 case NALNUM:
2459 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2460 sayNO;
ffc61ed2 2461 if (do_utf8) {
8269fa76 2462 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2463 if (OP(scan) == NALNUM
3568d838 2464 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2465 : isALNUM_LC_utf8((U8*)locinput))
2466 {
b8c5462f 2467 sayNO;
d6a28714 2468 }
b8c5462f
JH
2469 locinput += PL_utf8skip[nextchr];
2470 nextchr = UCHARAT(locinput);
2471 break;
2472 }
ffc61ed2 2473 if (OP(scan) == NALNUM
d6a28714 2474 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2475 sayNO;
76e3520e 2476 nextchr = UCHARAT(++locinput);
a0d0e21e 2477 break;
d6a28714
JH
2478 case BOUNDL:
2479 case NBOUNDL:
3280af22 2480 PL_reg_flags |= RF_tainted;
bbce6d69 2481 /* FALL THROUGH */
d6a28714
JH
2482 case BOUND:
2483 case NBOUND:
2484 /* was last char in word? */
ffc61ed2 2485 if (do_utf8) {
12d33761
HS
2486 if (locinput == PL_bostr)
2487 ln = '\n';
ffc61ed2
JH
2488 else {
2489 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2490
2b9d42f0 2491 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2492 }
2493 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2494 ln = isALNUM_uni(ln);
8269fa76 2495 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2496 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2497 }
2498 else {
9041c2e3 2499 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2500 n = isALNUM_LC_utf8((U8*)locinput);
2501 }
a0ed51b3 2502 }
d6a28714 2503 else {
12d33761
HS
2504 ln = (locinput != PL_bostr) ?
2505 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2506 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2507 ln = isALNUM(ln);
2508 n = isALNUM(nextchr);
2509 }
2510 else {
2511 ln = isALNUM_LC(ln);
2512 n = isALNUM_LC(nextchr);
2513 }
d6a28714 2514 }
ffc61ed2
JH
2515 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2516 OP(scan) == BOUNDL))
2517 sayNO;
a0ed51b3 2518 break;
d6a28714 2519 case SPACEL:
3280af22 2520 PL_reg_flags |= RF_tainted;
bbce6d69 2521 /* FALL THROUGH */
d6a28714 2522 case SPACE:
9442cb0e 2523 if (!nextchr)
4633a7c4 2524 sayNO;
1aa99e6b 2525 if (do_utf8) {
fd400ab9 2526 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2527 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2528 if (!(OP(scan) == SPACE
3568d838 2529 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2530 : isSPACE_LC_utf8((U8*)locinput)))
2531 {
2532 sayNO;
2533 }
2534 locinput += PL_utf8skip[nextchr];
2535 nextchr = UCHARAT(locinput);
2536 break;
d6a28714 2537 }
ffc61ed2
JH
2538 if (!(OP(scan) == SPACE
2539 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2540 sayNO;
2541 nextchr = UCHARAT(++locinput);
2542 }
2543 else {
2544 if (!(OP(scan) == SPACE
2545 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2546 sayNO;
2547 nextchr = UCHARAT(++locinput);
a0ed51b3 2548 }
a0ed51b3 2549 break;
d6a28714 2550 case NSPACEL:
3280af22 2551 PL_reg_flags |= RF_tainted;
bbce6d69 2552 /* FALL THROUGH */
d6a28714 2553 case NSPACE:
9442cb0e 2554 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2555 sayNO;
1aa99e6b 2556 if (do_utf8) {
8269fa76 2557 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2558 if (OP(scan) == NSPACE
3568d838 2559 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2560 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2561 {
2562 sayNO;
2563 }
2564 locinput += PL_utf8skip[nextchr];
2565 nextchr = UCHARAT(locinput);
2566 break;
a0ed51b3 2567 }
ffc61ed2 2568 if (OP(scan) == NSPACE
d6a28714 2569 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2570 sayNO;
76e3520e 2571 nextchr = UCHARAT(++locinput);
a0d0e21e 2572 break;
d6a28714 2573 case DIGITL:
a0ed51b3
LW
2574 PL_reg_flags |= RF_tainted;
2575 /* FALL THROUGH */
d6a28714 2576 case DIGIT:
9442cb0e 2577 if (!nextchr)
a0ed51b3 2578 sayNO;
1aa99e6b 2579 if (do_utf8) {
8269fa76 2580 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2581 if (!(OP(scan) == DIGIT
3568d838 2582 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2583 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2584 {
a0ed51b3 2585 sayNO;
dfe13c55 2586 }
6f06b55f 2587 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2588 nextchr = UCHARAT(locinput);
2589 break;
2590 }
ffc61ed2 2591 if (!(OP(scan) == DIGIT
9442cb0e 2592 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2593 sayNO;
2594 nextchr = UCHARAT(++locinput);
2595 break;
d6a28714 2596 case NDIGITL:
b8c5462f
JH
2597 PL_reg_flags |= RF_tainted;
2598 /* FALL THROUGH */
d6a28714 2599 case NDIGIT:
9442cb0e 2600 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2601 sayNO;
1aa99e6b 2602 if (do_utf8) {
8269fa76 2603 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2604 if (OP(scan) == NDIGIT
3568d838 2605 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2606 : isDIGIT_LC_utf8((U8*)locinput))
2607 {
a0ed51b3 2608 sayNO;
9442cb0e 2609 }
6f06b55f 2610 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2611 nextchr = UCHARAT(locinput);
2612 break;
2613 }
ffc61ed2 2614 if (OP(scan) == NDIGIT
9442cb0e 2615 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2616 sayNO;
2617 nextchr = UCHARAT(++locinput);
2618 break;
2619 case CLUMP:
b7c83a7e 2620 if (locinput >= PL_regeol)
a0ed51b3 2621 sayNO;
b7c83a7e
JH
2622 if (do_utf8) {
2623 LOAD_UTF8_CHARCLASS(mark,"~");
2624 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2625 sayNO;
2626 locinput += PL_utf8skip[nextchr];
2627 while (locinput < PL_regeol &&
2628 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2629 locinput += UTF8SKIP(locinput);
2630 if (locinput > PL_regeol)
2631 sayNO;
eb08e2da
JH
2632 }
2633 else
2634 locinput++;
a0ed51b3
LW
2635 nextchr = UCHARAT(locinput);
2636 break;
c8756f30 2637 case REFFL:
3280af22 2638 PL_reg_flags |= RF_tainted;
c8756f30 2639 /* FALL THROUGH */
c277df42 2640 case REF:
c8756f30 2641 case REFF:
c277df42 2642 n = ARG(scan); /* which paren pair */
cf93c79d 2643 ln = PL_regstartp[n];
2c2d71f5 2644 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2645 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2646 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2647 if (ln == PL_regendp[n])
a0d0e21e 2648 break;
a0ed51b3 2649
cf93c79d 2650 s = PL_bostr + ln;
1aa99e6b 2651 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2652 char *l = locinput;
cf93c79d 2653 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2654 /*
2655 * Note that we can't do the "other character" lookup trick as
2656 * in the 8-bit case (no pun intended) because in Unicode we
2657 * have to map both upper and title case to lower case.
2658 */
2659 if (OP(scan) == REFF) {
a2a2844f 2660 STRLEN ulen1, ulen2;
e7ae6809
JH
2661 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2662 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2663 while (s < e) {
2664 if (l >= PL_regeol)
2665 sayNO;
a2a2844f
JH
2666 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2667 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2668 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2669 sayNO;
a2a2844f
JH
2670 s += ulen1;
2671 l += ulen2;
a0ed51b3
LW
2672 }
2673 }
2674 locinput = l;
2675 nextchr = UCHARAT(locinput);
2676 break;
2677 }
2678
a0d0e21e 2679 /* Inline the first character, for speed. */
76e3520e 2680 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2681 (OP(scan) == REF ||
2682 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2683 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2684 sayNO;
cf93c79d 2685 ln = PL_regendp[n] - ln;
3280af22 2686 if (locinput + ln > PL_regeol)
4633a7c4 2687 sayNO;
c8756f30
AK
2688 if (ln > 1 && (OP(scan) == REF
2689 ? memNE(s, locinput, ln)
2690 : (OP(scan) == REFF
2691 ? ibcmp(s, locinput, ln)
2692 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2693 sayNO;
a0d0e21e 2694 locinput += ln;
76e3520e 2695 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2696 break;
2697
2698 case NOTHING:
c277df42 2699 case TAIL:
a0d0e21e
LW
2700 break;
2701 case BACK:
2702 break;
c277df42
IZ
2703 case EVAL:
2704 {
2705 dSP;
533c011a 2706 OP_4tree *oop = PL_op;
3280af22
NIS
2707 COP *ocurcop = PL_curcop;
2708 SV **ocurpad = PL_curpad;
c277df42 2709 SV *ret;
9041c2e3 2710
c277df42 2711 n = ARG(scan);
533c011a 2712 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2713 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2714 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2715 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2716
8e5e9ebe
RGS
2717 {
2718 SV **before = SP;
2719 CALLRUNOPS(aTHX); /* Scalar context. */
2720 SPAGAIN;
2721 if (SP == before)
2722 ret = Nullsv; /* protect against empty (?{}) blocks. */
2723 else {
2724 ret = POPs;
2725 PUTBACK;
2726 }
2727 }
2728
0f5d15d6
IZ
2729 PL_op = oop;
2730 PL_curpad = ocurpad;
2731 PL_curcop = ocurcop;
c277df42 2732 if (logical) {
0f5d15d6
IZ
2733 if (logical == 2) { /* Postponed subexpression. */
2734 regexp *re;
22c35a8c 2735 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2736 re_cc_state state;
0f5d15d6
IZ
2737 CHECKPOINT cp, lastcp;
2738
2739 if(SvROK(ret) || SvRMAGICAL(ret)) {
2740 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2741
2742 if(SvMAGICAL(sv))
14befaf4 2743 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2744 }
2745 if (mg) {
2746 re = (regexp *)mg->mg_obj;
df0003d4 2747 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2748 }
2749 else {
2750 STRLEN len;
2751 char *t = SvPV(ret, len);
2752 PMOP pm;
2753 char *oprecomp = PL_regprecomp;
2754 I32 osize = PL_regsize;
2755 I32 onpar = PL_regnpar;
2756
5fcd1c1b 2757 Zero(&pm, 1, PMOP);
cea2e8a9 2758 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2759 if (!(SvFLAGS(ret)
0f5d15d6 2760 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2761 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2762 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2763 PL_regprecomp = oprecomp;
2764 PL_regsize = osize;
2765 PL_regnpar = onpar;
2766 }
2767 DEBUG_r(
9041c2e3 2768 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2769 "Entering embedded `%s%.60s%s%s'\n",
2770 PL_colors[0],
2771 re->precomp,
2772 PL_colors[1],
2773 (strlen(re->precomp) > 60 ? "..." : ""))
2774 );
2775 state.node = next;
2776 state.prev = PL_reg_call_cc;
2777 state.cc = PL_regcc;
2778 state.re = PL_reg_re;
2779
2ab05381 2780 PL_regcc = 0;
9041c2e3 2781
0f5d15d6 2782 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2783 REGCP_SET(lastcp);
0f5d15d6
IZ
2784 cache_re(re);
2785 state.ss = PL_savestack_ix;
2786 *PL_reglastparen = 0;
a01268b5 2787 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2788 PL_reg_call_cc = &state;
2789 PL_reginput = locinput;
2c2d71f5
JH
2790
2791 /* XXXX This is too dramatic a measure... */
2792 PL_reg_maxiter = 0;
2793
0f5d15d6 2794 if (regmatch(re->program + 1)) {
2c914db6
IZ
2795 /* Even though we succeeded, we need to restore
2796 global variables, since we may be wrapped inside
2797 SUSPEND, thus the match may be not finished yet. */
2798
2799 /* XXXX Do this only if SUSPENDed? */
2800 PL_reg_call_cc = state.prev;
2801 PL_regcc = state.cc;
2802 PL_reg_re = state.re;
2803 cache_re(PL_reg_re);
2804
2805 /* XXXX This is too dramatic a measure... */
2806 PL_reg_maxiter = 0;
2807
2808 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2809 ReREFCNT_dec(re);
2810 regcpblow(cp);
2811 sayYES;
2812 }
0f5d15d6 2813 ReREFCNT_dec(re);
02db2b7b 2814 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2815 regcppop();
2816 PL_reg_call_cc = state.prev;
2817 PL_regcc = state.cc;
2818 PL_reg_re = state.re;
d3790889 2819 cache_re(PL_reg_re);
2c2d71f5
JH
2820
2821 /* XXXX This is too dramatic a measure... */
2822 PL_reg_maxiter = 0;
2823
8e514ae6 2824 logical = 0;
0f5d15d6
IZ
2825 sayNO;
2826 }
c277df42 2827 sw = SvTRUE(ret);
0f5d15d6 2828 logical = 0;
a0ed51b3
LW
2829 }
2830 else
3280af22 2831 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2832 break;
2833 }
a0d0e21e 2834 case OPEN:
c277df42 2835 n = ARG(scan); /* which paren pair */
3280af22
NIS
2836 PL_reg_start_tmp[n] = locinput;
2837 if (n > PL_regsize)
2838 PL_regsize = n;
a0d0e21e
LW
2839 break;
2840 case CLOSE:
c277df42 2841 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2842 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2843 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2844 if (n > *PL_reglastparen)
2845 *PL_reglastparen = n;
a01268b5 2846 *PL_reglastcloseparen = n;
a0d0e21e 2847 break;
c277df42
IZ
2848 case GROUPP:
2849 n = ARG(scan); /* which paren pair */
cf93c79d 2850 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2851 break;
2852 case IFTHEN:
2c2d71f5 2853 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2854 if (sw)
2855 next = NEXTOPER(NEXTOPER(scan));
2856 else {
2857 next = scan + ARG(scan);
2858 if (OP(next) == IFTHEN) /* Fake one. */
2859 next = NEXTOPER(NEXTOPER(next));
2860 }
2861 break;
2862 case LOGICAL:
0f5d15d6 2863 logical = scan->flags;
c277df42 2864 break;
2ab05381
IZ
2865/*******************************************************************
2866 PL_regcc contains infoblock about the innermost (...)* loop, and
2867 a pointer to the next outer infoblock.
2868
2869 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2870
2871 1) After matching X, regnode for CURLYX is processed;
2872
9041c2e3 2873 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2874 regmatch() recursively with the starting point at WHILEM node;
2875
2876 3) Each hit of WHILEM node tries to match A and Z (in the order
2877 depending on the current iteration, min/max of {min,max} and
2878 greediness). The information about where are nodes for "A"
2879 and "Z" is read from the infoblock, as is info on how many times "A"
2880 was already matched, and greediness.
2881
2882 4) After A matches, the same WHILEM node is hit again.
2883
2884 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2885 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2886 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2887 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2888 of the external loop.
2889
2890 Currently present infoblocks form a tree with a stem formed by PL_curcc
2891 and whatever it mentions via ->next, and additional attached trees
2892 corresponding to temporarily unset infoblocks as in "5" above.
2893
9041c2e3 2894 In the following picture infoblocks for outer loop of
2ab05381
IZ
2895 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2896 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2897 infoblocks are drawn below the "reset" infoblock.
2898
2899 In fact in the picture below we do not show failed matches for Z and T
2900 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2901 more obvious *why* one needs to *temporary* unset infoblocks.]
2902
2903 Matched REx position InfoBlocks Comment
2904 (Y(A)*?Z)*?T x
2905 Y(A)*?Z)*?T x <- O
2906 Y (A)*?Z)*?T x <- O
2907 Y A)*?Z)*?T x <- O <- I
2908 YA )*?Z)*?T x <- O <- I
2909 YA A)*?Z)*?T x <- O <- I
2910 YAA )*?Z)*?T x <- O <- I
2911 YAA Z)*?T x <- O # Temporary unset I
2912 I
2913
2914 YAAZ Y(A)*?Z)*?T x <- O
2915 I
2916
2917 YAAZY (A)*?Z)*?T x <- O
2918 I
2919
2920 YAAZY A)*?Z)*?T x <- O <- I
2921 I
2922
2923 YAAZYA )*?Z)*?T x <- O <- I
2924 I
2925
2926 YAAZYA Z)*?T x <- O # Temporary unset I
2927 I,I
2928
2929 YAAZYAZ )*?T x <- O
2930 I,I
2931
2932 YAAZYAZ T x # Temporary unset O
2933 O
2934 I,I
2935
2936 YAAZYAZT x
2937 O
2938 I,I
2939 *******************************************************************/
a0d0e21e
LW
2940 case CURLYX: {
2941 CURCUR cc;
3280af22 2942 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2943 /* No need to save/restore up to this paren */
2944 I32 parenfloor = scan->flags;
c277df42
IZ
2945
2946 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2947 next += ARG(next);
3280af22
NIS
2948 cc.oldcc = PL_regcc;
2949 PL_regcc = &cc;
cb434fcc
IZ
2950 /* XXXX Probably it is better to teach regpush to support
2951 parenfloor > PL_regsize... */
2952 if (parenfloor > *PL_reglastparen)
2953 parenfloor = *PL_reglastparen; /* Pessimization... */
2954 cc.parenfloor = parenfloor;
a0d0e21e
LW
2955 cc.cur = -1;
2956 cc.min = ARG1(scan);
2957 cc.max = ARG2(scan);
c277df42 2958 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2959 cc.next = next;
2960 cc.minmod = minmod;
2961 cc.lastloc = 0;
3280af22 2962 PL_reginput = locinput;
a0d0e21e
LW
2963 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2964 regcpblow(cp);
3280af22 2965 PL_regcc = cc.oldcc;
4633a7c4 2966 saySAME(n);
a0d0e21e
LW
2967 }
2968 /* NOT REACHED */
2969 case WHILEM: {
2970 /*
2971 * This is really hard to understand, because after we match
2972 * what we're trying to match, we must make sure the rest of
2c2d71f5 2973 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2974 * to go back UP the parse tree by recursing ever deeper. And
2975 * if it fails, we have to reset our parent's current state
2976 * that we can try again after backing off.
2977 */
2978
c277df42 2979 CHECKPOINT cp, lastcp;
3280af22 2980 CURCUR* cc = PL_regcc;
c277df42
IZ
2981 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2982
4633a7c4 2983 n = cc->cur + 1; /* how many we know we matched */
3280af22 2984 PL_reginput = locinput;
a0d0e21e 2985
c277df42 2986 DEBUG_r(
9041c2e3
NIS
2987 PerlIO_printf(Perl_debug_log,
2988 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2989 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2990 (long)n, (long)cc->min,
c277df42
IZ
2991 (long)cc->max, (long)cc)
2992 );
4633a7c4 2993
a0d0e21e
LW
2994 /* If degenerate scan matches "", assume scan done. */
2995
579cf2c3 2996 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2997 PL_regcc = cc->oldcc;
2ab05381
IZ
2998 if (PL_regcc)
2999 ln = PL_regcc->cur;
c277df42 3000 DEBUG_r(
c3464db5
DD
3001 PerlIO_printf(Perl_debug_log,
3002 "%*s empty match detected, try continuation...\n",
3280af22 3003 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3004 );
a0d0e21e 3005 if (regmatch(cc->next))
4633a7c4 3006 sayYES;
2ab05381
IZ
3007 if (PL_regcc)
3008 PL_regcc->cur = ln;
3280af22 3009 PL_regcc = cc;
4633a7c4 3010 sayNO;
a0d0e21e
LW
3011 }
3012
3013 /* First just match a string of min scans. */
3014
3015 if (n < cc->min) {
3016 cc->cur = n;
3017 cc->lastloc = locinput;
4633a7c4
LW
3018 if (regmatch(cc->scan))
3019 sayYES;
3020 cc->cur = n - 1;
c277df42 3021 cc->lastloc = lastloc;
4633a7c4 3022 sayNO;
a0d0e21e
LW
3023 }
3024
2c2d71f5
JH
3025 if (scan->flags) {
3026 /* Check whether we already were at this position.
3027 Postpone detection until we know the match is not
3028 *that* much linear. */
3029 if (!PL_reg_maxiter) {
3030 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3031 PL_reg_leftiter = PL_reg_maxiter;
3032 }
3033 if (PL_reg_leftiter-- == 0) {
3034 I32 size = (PL_reg_maxiter + 7)/8;
3035 if (PL_reg_poscache) {
3036 if (PL_reg_poscache_size < size) {
3037 Renew(PL_reg_poscache, size, char);
3038 PL_reg_poscache_size = size;
3039 }
3040 Zero(PL_reg_poscache, size, char);
3041 }
3042 else {
3043 PL_reg_poscache_size = size;
3044 Newz(29, PL_reg_poscache, size, char);
3045 }
3046 DEBUG_r(
3047 PerlIO_printf(Perl_debug_log,
3048 "%sDetected a super-linear match, switching on caching%s...\n",
3049 PL_colors[4], PL_colors[5])
3050 );
3051 }
3052 if (PL_reg_leftiter < 0) {
3053 I32 o = locinput - PL_bostr, b;
3054
3055 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3056 b = o % 8;
3057 o /= 8;
3058 if (PL_reg_poscache[o] & (1<<b)) {
3059 DEBUG_r(
3060 PerlIO_printf(Perl_debug_log,
3061 "%*s already tried at this position...\n",
3062 REPORT_CODE_OFF+PL_regindent*2, "")
3063 );
7821416a 3064 sayNO_SILENT;
2c2d71f5
JH
3065 }
3066 PL_reg_poscache[o] |= (1<<b);
3067 }
3068 }
3069
a0d0e21e
LW
3070 /* Prefer next over scan for minimal matching. */
3071
3072 if (cc->minmod) {
3280af22 3073 PL_regcc = cc->oldcc;
2ab05381
IZ
3074 if (PL_regcc)
3075 ln = PL_regcc->cur;
5f05dabc 3076 cp = regcppush(cc->parenfloor);
02db2b7b 3077 REGCP_SET(lastcp);
5f05dabc 3078 if (regmatch(cc->next)) {
c277df42 3079 regcpblow(cp);
4633a7c4 3080 sayYES; /* All done. */
5f05dabc 3081 }
02db2b7b 3082 REGCP_UNWIND(lastcp);
5f05dabc 3083 regcppop();
2ab05381
IZ
3084 if (PL_regcc)
3085 PL_regcc->cur = ln;
3280af22 3086 PL_regcc = cc;
a0d0e21e 3087
c277df42 3088 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3089 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3090 && !(PL_reg_flags & RF_warned)) {
3091 PL_reg_flags |= RF_warned;
e476b1b5 3092 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3093 "Complex regular subexpression recursion",
3094 REG_INFTY - 1);
c277df42 3095 }
4633a7c4 3096 sayNO;
c277df42 3097 }
a687059c 3098
c277df42 3099 DEBUG_r(
c3464db5
DD
3100 PerlIO_printf(Perl_debug_log,
3101 "%*s trying longer...\n",
3280af22 3102 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3103 );
a0d0e21e 3104 /* Try scanning more and see if it helps. */
3280af22 3105 PL_reginput = locinput;
a0d0e21e
LW
3106 cc->cur = n;
3107 cc->lastloc = locinput;
5f05dabc 3108 cp = regcppush(cc->parenfloor);
02db2b7b 3109 REGCP_SET(lastcp);
5f05dabc 3110 if (regmatch(cc->scan)) {
c277df42 3111 regcpblow(cp);
4633a7c4 3112 sayYES;
5f05dabc 3113 }
02db2b7b 3114 REGCP_UNWIND(lastcp);
5f05dabc 3115 regcppop();
4633a7c4 3116 cc->cur = n - 1;
c277df42 3117 cc->lastloc = lastloc;
4633a7c4 3118 sayNO;
a0d0e21e
LW
3119 }
3120
3121 /* Prefer scan over next for maximal matching. */
3122
3123 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3124 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3125 cc->cur = n;
3126 cc->lastloc = locinput;
02db2b7b 3127 REGCP_SET(lastcp);
5f05dabc 3128 if (regmatch(cc->scan)) {
c277df42 3129 regcpblow(cp);
4633a7c4 3130 sayYES;
5f05dabc 3131 }
02db2b7b 3132 REGCP_UNWIND(lastcp);
a0d0e21e 3133 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3134 PL_reginput = locinput;
c277df42 3135 DEBUG_r(
c3464db5
DD
3136 PerlIO_printf(Perl_debug_log,
3137 "%*s failed, try continuation...\n",
3280af22 3138 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3139 );
3140 }
9041c2e3 3141 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3142 && !(PL_reg_flags & RF_warned)) {
3280af22 3143 PL_reg_flags |= RF_warned;
e476b1b5 3144 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3145 "Complex regular subexpression recursion",
3146 REG_INFTY - 1);
a0d0e21e
LW
3147 }
3148
3149 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3150 PL_regcc = cc->oldcc;
2ab05381
IZ
3151 if (PL_regcc)
3152 ln = PL_regcc->cur;
a0d0e21e 3153 if (regmatch(cc->next))
4633a7c4 3154 sayYES;
2ab05381
IZ
3155 if (PL_regcc)
3156 PL_regcc->cur = ln;
3280af22 3157 PL_regcc = cc;
4633a7c4 3158 cc->cur = n - 1;
c277df42 3159 cc->lastloc = lastloc;
4633a7c4 3160 sayNO;
a0d0e21e
LW
3161 }
3162 /* NOT REACHED */
9041c2e3 3163 case BRANCHJ:
c277df42
IZ
3164 next = scan + ARG(scan);
3165 if (next == scan)
3166 next = NULL;
3167 inner = NEXTOPER(NEXTOPER(scan));
3168 goto do_branch;
9041c2e3 3169 case BRANCH:
c277df42
IZ
3170 inner = NEXTOPER(scan);
3171 do_branch:
3172 {
c277df42
IZ
3173 c1 = OP(scan);
3174 if (OP(next) != c1) /* No choice. */
3175 next = inner; /* Avoid recursion. */
a0d0e21e 3176 else {
02db2b7b
IZ
3177 I32 lastparen = *PL_reglastparen;
3178 I32 unwind1;
3179 re_unwind_branch_t *uw;
3180
3181 /* Put unwinding data on stack */
3182 unwind1 = SSNEWt(1,re_unwind_branch_t);
3183 uw = SSPTRt(unwind1,re_unwind_branch_t);
3184 uw->prev = unwind;
3185 unwind = unwind1;
3186 uw->type = ((c1 == BRANCH)
3187 ? RE_UNWIND_BRANCH
3188 : RE_UNWIND_BRANCHJ);
3189 uw->lastparen = lastparen;
3190 uw->next = next;
3191 uw->locinput = locinput;
3192 uw->nextchr = nextchr;
3193#ifdef DEBUGGING
3194 uw->regindent = ++PL_regindent;
3195#endif
c277df42 3196
02db2b7b
IZ
3197 REGCP_SET(uw->lastcp);
3198
3199 /* Now go into the first branch */
3200 next = inner;
a687059c 3201 }
a0d0e21e
LW
3202 }
3203 break;
3204 case MINMOD:
3205 minmod = 1;
3206 break;
c277df42
IZ
3207 case CURLYM:
3208 {
00db4c45 3209 I32 l = 0;
c277df42 3210 CHECKPOINT lastcp;
9041c2e3 3211
c277df42
IZ
3212 /* We suppose that the next guy does not need
3213 backtracking: in particular, it is of constant length,
3214 and has no parenths to influence future backrefs. */
3215 ln = ARG1(scan); /* min to match */
3216 n = ARG2(scan); /* max to match */
c277df42
IZ
3217 paren = scan->flags;
3218 if (paren) {
3280af22
NIS
3219 if (paren > PL_regsize)
3220 PL_regsize = paren;
3221 if (paren > *PL_reglastparen)
3222 *PL_reglastparen = paren;
c277df42 3223 }
dc45a647 3224 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3225 if (paren)
3226 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3227 PL_reginput = locinput;
c277df42
IZ
3228 if (minmod) {
3229 minmod = 0;
3230 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3231 sayNO;
f31a99c8
HS
3232 /* if we matched something zero-length we don't need to
3233 backtrack - capturing parens are already defined, so
3234 the caveat in the maximal case doesn't apply
3235
3236 XXXX if ln == 0, we can redo this check first time
3237 through the following loop
3238 */
3239 if (ln && l == 0)
3240 n = ln; /* don't backtrack */
3280af22 3241 locinput = PL_reginput;
cca55fe3 3242 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3243 regnode *text_node = next;
3244
cca55fe3 3245 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3246
cca55fe3 3247 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3248 else {
cca55fe3
JP
3249 if (PL_regkind[(U8)OP(text_node)] == REF) {
3250 I32 n, ln;
3251 n = ARG(text_node); /* which paren pair */
3252 ln = PL_regstartp[n];
3253 /* assume yes if we haven't seen CLOSEn */
3254 if (
3255 *PL_reglastparen < n ||
3256 ln == -1 ||
3257 ln == PL_regendp[n]
3258 ) {
3259 c1 = c2 = -1000;
3260 goto assume_ok_MM;
3261 }
3262 c1 = *(PL_bostr + ln);
3263 }
3264 else { c1 = (U8)*STRING(text_node); }
af5decee 3265 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3266 c2 = PL_fold[c1];
af5decee 3267 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3268 c2 = PL_fold_locale[c1];
3269 else
3270 c2 = c1;
3271 }
a0ed51b3
LW
3272 }
3273 else
c277df42 3274 c1 = c2 = -1000;
cca55fe3 3275 assume_ok_MM:
02db2b7b 3276 REGCP_SET(lastcp);
5f4b28b2 3277 /* This may be improved if l == 0. */
c277df42
IZ
3278 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3279 /* If it could work, try it. */
3280 if (c1 == -1000 ||
3280af22
NIS
3281 UCHARAT(PL_reginput) == c1 ||
3282 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3283 {
3284 if (paren) {
f31a99c8 3285 if (ln) {
cf93c79d
IZ
3286 PL_regstartp[paren] =
3287 HOPc(PL_reginput, -l) - PL_bostr;
3288 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3289 }
3290 else
cf93c79d 3291 PL_regendp[paren] = -1;
c277df42
IZ
3292 }
3293 if (regmatch(next))
3294 sayYES;
02db2b7b 3295 REGCP_UNWIND(lastcp);
c277df42
IZ
3296 }
3297 /* Couldn't or didn't -- move forward. */
3280af22 3298 PL_reginput = locinput;
c277df42
IZ
3299 if (regrepeat_hard(scan, 1, &l)) {
3300 ln++;
3280af22 3301 locinput = PL_reginput;
c277df42
IZ
3302 }
3303 else
3304 sayNO;
3305 }
a0ed51b3
LW
3306 }
3307 else {
c277df42 3308 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3309 /* if we matched something zero-length we don't need to
3310 backtrack, unless the minimum count is zero and we
3311 are capturing the result - in that case the capture
3312 being defined or not may affect later execution
3313 */
3314 if (n != 0 && l == 0 && !(paren && ln == 0))
3315 ln = n; /* don't backtrack */
3280af22 3316 locinput = PL_reginput;
c277df42 3317 DEBUG_r(
5c0ca799 3318 PerlIO_printf(Perl_debug_log,
faccc32b 3319 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3320 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3321 (IV) n, (IV)l)
c277df42
IZ
3322 );
3323 if (n >= ln) {
cca55fe3 3324 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3325 regnode *text_node = next;
3326
cca55fe3 3327 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3328
cca55fe3 3329 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3330 else {
cca55fe3
JP
3331 if (PL_regkind[(U8)OP(text_node)] == REF) {
3332 I32 n, ln;
3333 n = ARG(text_node); /* which paren pair */
3334 ln = PL_regstartp[n];
3335 /* assume yes if we haven't seen CLOSEn */
3336 if (
3337 *PL_reglastparen < n ||
3338 ln == -1 ||
3339 ln == PL_regendp[n]
3340 ) {
3341 c1 = c2 = -1000;
3342 goto assume_ok_REG;
3343 }
3344 c1 = *(PL_bostr + ln);
3345 }
3346 else { c1 = (U8)*STRING(text_node); }
3347
af5decee 3348 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3349 c2 = PL_fold[c1];
af5decee 3350 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3351 c2 = PL_fold_locale[c1];
3352 else
3353 c2 = c1;
3354 }
a0ed51b3
LW
3355 }
3356 else
c277df42
IZ
3357 c1 = c2 = -1000;
3358 }
cca55fe3 3359 assume_ok_REG:
02db2b7b 3360 REGCP_SET(lastcp);
c277df42
IZ
3361 while (n >= ln) {
3362 /* If it could work, try it. */
3363 if (c1 == -1000 ||
3280af22
NIS
3364 UCHARAT(PL_reginput) == c1 ||
3365 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3366 {
3367 DEBUG_r(
c3464db5 3368 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3369 "%*s trying tail with n=%"IVdf"...\n",
3370 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3371 );
3372 if (paren) {
3373 if (n) {
cf93c79d
IZ
3374 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3375 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3376 }
a0ed51b3 3377 else
cf93c79d 3378 PL_regendp[paren] = -1;
c277df42 3379 }
a0ed51b3
LW
3380 if (regmatch(next))
3381 sayYES;
02db2b7b 3382 REGCP_UNWIND(lastcp);
a0ed51b3 3383 }
c277df42
IZ
3384 /* Couldn't or didn't -- back up. */
3385 n--;
dfe13c55 3386 locinput = HOPc(locinput, -l);
3280af22 3387 PL_reginput = locinput;
c277df42
IZ
3388 }
3389 }
3390 sayNO;
3391 break;
3392 }
3393 case CURLYN:
3394 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3395 if (paren > PL_regsize)
3396 PL_regsize = paren;
3397 if (paren > *PL_reglastparen)
3398 *PL_reglastparen = paren;
c277df42
IZ
3399 ln = ARG1(scan); /* min to match */
3400 n = ARG2(scan); /* max to match */
dc45a647 3401 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3402 goto repeat;
a0d0e21e 3403 case CURLY:
c277df42 3404 paren = 0;
a0d0e21e
LW
3405 ln = ARG1(scan); /* min to match */
3406 n = ARG2(scan); /* max to match */
dc45a647 3407 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3408 goto repeat;
3409 case STAR:
3410 ln = 0;
c277df42 3411 n = REG_INFTY;
a0d0e21e 3412 scan = NEXTOPER(scan);
c277df42 3413 paren = 0;
a0d0e21e
LW
3414 goto repeat;
3415 case PLUS:
c277df42
IZ
3416 ln = 1;
3417 n = REG_INFTY;
3418 scan = NEXTOPER(scan);
3419 paren = 0;
3420 repeat:
a0d0e21e
LW
3421 /*
3422 * Lookahead to avoid useless match attempts
3423 * when we know what character comes next.
3424 */
5f80c4cf
JP
3425
3426 /*
3427 * Used to only do .*x and .*?x, but now it allows
3428 * for )'s, ('s and (?{ ... })'s to be in the way
3429 * of the quantifier and the EXACT-like node. -- japhy
3430 */
3431
cca55fe3 3432 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3433 U8 *s;
3434 regnode *text_node = next;
3435
cca55fe3 3436 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3437
cca55fe3 3438 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3439 else {
cca55fe3
JP
3440 if (PL_regkind[(U8)OP(text_node)] == REF) {
3441 I32 n, ln;
3442 n = ARG(text_node); /* which paren pair */
3443 ln = PL_regstartp[n];
3444 /* assume yes if we haven't seen CLOSEn */
3445 if (
3446 *PL_reglastparen < n ||
3447 ln == -1 ||
3448 ln == PL_regendp[n]
3449 ) {
3450 c1 = c2 = -1000;
3451 goto assume_ok_easy;
3452 }
9246c65e 3453 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3454 }
3455 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3456
3457 if (!UTF) {
3458 c2 = c1 = *s;
f65d3ee7 3459 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3460 c2 = PL_fold[c1];
f65d3ee7 3461 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3462 c2 = PL_fold_locale[c1];
1aa99e6b 3463 }
5f80c4cf 3464 else { /* UTF */
f65d3ee7 3465 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3466 STRLEN ulen1, ulen2;
e7ae6809
JH
3467 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3468 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3469
3470 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3471 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3472
3473 c1 = utf8_to_uvuni(tmpbuf1, 0);
3474 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3475 }
3476 else {
3477 c2 = c1 = utf8_to_uvchr(s, NULL);
3478 }
1aa99e6b
IH
3479 }
3480 }
bbce6d69 3481 }
a0d0e21e 3482 else
bbce6d69 3483 c1 = c2 = -1000;
cca55fe3 3484 assume_ok_easy:
3280af22 3485 PL_reginput = locinput;
a0d0e21e 3486 if (minmod) {
c277df42 3487 CHECKPOINT lastcp;
a0d0e21e
LW
3488 minmod = 0;
3489 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3490 sayNO;
a0ed51b3 3491 locinput = PL_reginput;
02db2b7b 3492 REGCP_SET(lastcp);
0fe9bf95 3493 if (c1 != -1000) {
1aa99e6b 3494 char *e; /* Should not check after this */
0fe9bf95
IZ
3495 char *old = locinput;
3496
1aa99e6b 3497 if (n == REG_INFTY) {
0fe9bf95 3498 e = PL_regeol - 1;
1aa99e6b
IH
3499 if (do_utf8)
3500 while (UTF8_IS_CONTINUATION(*(U8*)e))
3501 e--;
3502 }
3503 else if (do_utf8) {
3504 int m = n - ln;
3505 for (e = locinput;
3506 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3507 e += UTF8SKIP(e);
3508 }
3509 else {
3510 e = locinput + n - ln;
3511 if (e >= PL_regeol)
3512 e = PL_regeol - 1;
3513 }
0fe9bf95 3514 while (1) {
1aa99e6b 3515 int count;
0fe9bf95 3516 /* Find place 'next' could work */
1aa99e6b
IH
3517 if (!do_utf8) {
3518 if (c1 == c2) {
a8e8ab15
JH
3519 while (locinput <= e &&
3520 UCHARAT(locinput) != c1)
1aa99e6b
IH
3521 locinput++;
3522 } else {
9041c2e3 3523 while (locinput <= e
a8e8ab15
JH
3524 && UCHARAT(locinput) != c1
3525 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3526 locinput++;
3527 }
3528 count = locinput - old;
3529 }
3530 else {
3531 STRLEN len;
3532 if (c1 == c2) {
3533 for (count = 0;
3534 locinput <= e &&
9041c2e3 3535 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3536 count++)
3537 locinput += len;
3538
3539 } else {
3540 for (count = 0; locinput <= e; count++) {
9041c2e3 3541 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3542 if (c == c1 || c == c2)
3543 break;
9041c2e3 3544 locinput += len;
1aa99e6b
IH
3545 }
3546 }
0fe9bf95 3547 }
9041c2e3 3548 if (locinput > e)
0fe9bf95
IZ
3549 sayNO;
3550 /* PL_reginput == old now */
3551 if (locinput != old) {
3552 ln = 1; /* Did some */
1aa99e6b 3553 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3554 sayNO;
3555 }
3556 /* PL_reginput == locinput now */
29d1e993 3557 TRYPAREN(paren, ln, locinput);
0fe9bf95 3558 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3559 REGCP_UNWIND(lastcp);
0fe9bf95 3560 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3561 old = locinput;
3562 if (do_utf8)
3563 locinput += UTF8SKIP(locinput);
3564 else
3565 locinput++;
0fe9bf95
IZ
3566 }
3567 }
3568 else
c277df42 3569 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3570 UV c;
3571 if (c1 != -1000) {
3572 if (do_utf8)
9041c2e3 3573 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3574 else
9041c2e3 3575 c = UCHARAT(PL_reginput);
2390ecbc
PP
3576 /* If it could work, try it. */
3577 if (c == c1 || c == c2)
3578 {
3579 TRYPAREN(paren, n, PL_reginput);
3580 REGCP_UNWIND(lastcp);
3581 }
1aa99e6b 3582 }
a0d0e21e 3583 /* If it could work, try it. */
2390ecbc 3584 else if (c1 == -1000)
bbce6d69 3585 {
29d1e993 3586 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3587 REGCP_UNWIND(lastcp);
bbce6d69 3588 }
c277df42 3589 /* Couldn't or didn't -- move forward. */
a0ed51b3 3590 PL_reginput = locinput;
a0d0e21e
LW
3591 if (regrepeat(scan, 1)) {
3592 ln++;
a0ed51b3
LW
3593 locinput = PL_reginput;
3594 }
3595 else
4633a7c4 3596 sayNO;
a0d0e21e
LW
3597 }
3598 }
3599 else {
c277df42 3600 CHECKPOINT lastcp;
a0d0e21e 3601 n = regrepeat(scan, n);
a0ed51b3 3602 locinput = PL_reginput;
22c35a8c 3603 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
15272685
HS
3604 ((!PL_multiline && OP(next) != MEOL) ||
3605 OP(next) == SEOL || OP(next) == EOS))
3606 {
a0d0e21e 3607 ln = n; /* why back off? */
1aeab75a
GS
3608 /* ...because $ and \Z can match before *and* after
3609 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3610 We should back off by one in this case. */
3611 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3612 ln--;
3613 }
02db2b7b 3614 REGCP_SET(lastcp);
c277df42 3615 if (paren) {
8fa7f367 3616 UV c = 0;
c277df42 3617 while (n >= ln) {
1aa99e6b
IH
3618 if (c1 != -1000) {
3619 if (do_utf8)
9041c2e3 3620 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3621 else
9041c2e3 3622 c = UCHARAT(PL_reginput);
1aa99e6b 3623 }
c277df42 3624 /* If it could work, try it. */
1aa99e6b 3625 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3626 {
29d1e993 3627 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3628 REGCP_UNWIND(lastcp);
c277df42
IZ
3629 }
3630 /* Couldn't or didn't -- back up. */
3631 n--;
dfe13c55 3632 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3633 }
a0ed51b3
LW
3634 }
3635 else {
8fa7f367 3636 UV c = 0;
c277df42 3637 while (n >= ln) {
1aa99e6b
IH
3638 if (c1 != -1000) {
3639 if (do_utf8)
9041c2e3 3640 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3641 else
9041c2e3 3642 c = UCHARAT(PL_reginput);
1aa99e6b 3643 }
c277df42 3644 /* If it could work, try it. */
1aa99e6b 3645 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3646 {
29d1e993 3647 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3648 REGCP_UNWIND(lastcp);
c277df42
IZ
3649 }
3650 /* Couldn't or didn't -- back up. */
3651 n--;
dfe13c55 3652 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3653 }
a0d0e21e
LW
3654 }
3655 }
4633a7c4 3656 sayNO;
c277df42 3657 break;
a0d0e21e 3658 case END:
0f5d15d6
IZ
3659 if (PL_reg_call_cc) {
3660 re_cc_state *cur_call_cc = PL_reg_call_cc;
3661 CURCUR *cctmp = PL_regcc;
3662 regexp *re = PL_reg_re;
3663 CHECKPOINT cp, lastcp;
3664
3665 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3666 REGCP_SET(lastcp);
0f5d15d6
IZ
3667 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3668 the caller. */
3669 PL_reginput = locinput; /* Make position available to
3670 the callcc. */
3671 cache_re(PL_reg_call_cc->re);
3672 PL_regcc = PL_reg_call_cc->cc;
3673 PL_reg_call_cc = PL_reg_call_cc->prev;
3674 if (regmatch(cur_call_cc->node)) {
3675 PL_reg_call_cc = cur_call_cc;
3676 regcpblow(cp);
3677 sayYES;
3678 }
02db2b7b 3679 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3680 regcppop();
3681 PL_reg_call_cc = cur_call_cc;
3682 PL_regcc = cctmp;
3683 PL_reg_re = re;
3684 cache_re(re);
3685
3686 DEBUG_r(
3687 PerlIO_printf(Perl_debug_log,
3688 "%*s continuation failed...\n",
3689 REPORT_CODE_OFF+PL_regindent*2, "")
3690 );
7821416a 3691 sayNO_SILENT;
0f5d15d6 3692 }
7821416a
IZ
3693 if (locinput < PL_regtill) {
3694 DEBUG_r(PerlIO_printf(Perl_debug_log,
3695 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3696 PL_colors[4],
3697 (long)(locinput - PL_reg_starttry),
3698 (long)(PL_regtill - PL_reg_starttry),
3699 PL_colors[5]));
3700 sayNO_FINAL; /* Cannot match: too short. */
3701 }
3702 PL_reginput = locinput; /* put where regtry can find it */
3703 sayYES_FINAL; /* Success! */
7e5428c5 3704 case SUCCEED:
3280af22 3705 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3706 sayYES_LOUD; /* Success! */
c277df42
IZ
3707 case SUSPEND:
3708 n = 1;
9fe1d20c 3709 PL_reginput = locinput;
9041c2e3 3710 goto do_ifmatch;
a0d0e21e 3711 case UNLESSM:
c277df42 3712 n = 0;
a0ed51b3 3713 if (scan->flags) {
efb30f32
HS
3714 s = HOPBACKc(locinput, scan->flags);
3715 if (!s)
3716 goto say_yes;
3717 PL_reginput = s;
a0ed51b3
LW
3718 }
3719 else
3720 PL_reginput = locinput;
c277df42
IZ
3721 goto do_ifmatch;
3722 case IFMATCH:
3723 n = 1;
a0ed51b3 3724 if (scan->flags) {
efb30f32
HS
3725 s = HOPBACKc(locinput, scan->flags);
3726 if (!s)
3727 goto say_no;
3728 PL_reginput = s;
a0ed51b3
LW
3729 }
3730 else
3731 PL_reginput = locinput;
3732
c277df42 3733 do_ifmatch:
c277df42
IZ
3734 inner = NEXTOPER(NEXTOPER(scan));
3735 if (regmatch(inner) != n) {
3736 say_no:
3737 if (logical) {
3738 logical = 0;
3739 sw = 0;
3740 goto do_longjump;
a0ed51b3
LW
3741 }
3742 else
c277df42
IZ
3743 sayNO;
3744 }
3745 say_yes:
3746 if (logical) {
3747 logical = 0;
3748 sw = 1;
3749 }
fe44a5e8 3750 if (OP(scan) == SUSPEND) {
3280af22 3751 locinput = PL_reginput;
565764a8 3752 nextchr = UCHARAT(locinput);
fe44a5e8 3753 }
c277df42
IZ
3754 /* FALL THROUGH. */
3755 case LONGJMP:
3756 do_longjump:
3757 next = scan + ARG(scan);
3758 if (next == scan)
3759 next = NULL;
a0d0e21e
LW
3760 break;
3761 default:
b900a521 3762 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3763 PTR2UV(scan), OP(scan));
cea2e8a9 3764 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3765 }
02db2b7b 3766 reenter:
a0d0e21e
LW
3767 scan = next;
3768 }
a687059c 3769
a0d0e21e
LW
3770 /*
3771 * We get here only if there's trouble -- normally "case END" is
3772 * the terminating point.
3773 */
cea2e8a9 3774 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3775 /*NOTREACHED*/
4633a7c4
LW
3776 sayNO;
3777
7821416a
IZ
3778yes_loud:
3779 DEBUG_r(
3780 PerlIO_printf(Perl_debug_log,
3781 "%*s %scould match...%s\n",
3782 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3783 );
3784 goto yes;
3785yes_final:
3786 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3787 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3788yes:
3789#ifdef DEBUGGING
3280af22 3790 PL_regindent--;
4633a7c4 3791#endif
02db2b7b
IZ
3792
3793#if 0 /* Breaks $^R */
3794 if (unwind)
3795 regcpblow(firstcp);
3796#endif
4633a7c4
LW
3797 return 1;
3798
3799no:
7821416a
IZ
3800 DEBUG_r(
3801 PerlIO_printf(Perl_debug_log,
3802 "%*s %sfailed...%s\n",
3803 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3804 );
3805 goto do_no;
3806no_final:
3807do_no:
02db2b7b
IZ
3808 if (unwind) {
3809 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3810
3811 switch (uw->type) {
3812 case RE_UNWIND_BRANCH:
3813 case RE_UNWIND_BRANCHJ:
3814 {
3815 re_unwind_branch_t *uwb = &(uw->branch);
3816 I32 lastparen = uwb->lastparen;
9041c2e3 3817
02db2b7b
IZ
3818 REGCP_UNWIND(uwb->lastcp);
3819 for (n = *PL_reglastparen; n > lastparen; n--)
3820 PL_regendp[n] = -1;
3821 *PL_reglastparen = n;
3822 scan = next = uwb->next;
9041c2e3
NIS
3823 if ( !scan ||
3824 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3825 ? BRANCH : BRANCHJ) ) { /* Failure */
3826 unwind = uwb->prev;
3827#ifdef DEBUGGING
3828 PL_regindent--;
3829#endif
3830 goto do_no;
3831 }
3832 /* Have more choice yet. Reuse the same uwb. */
3833 /*SUPPRESS 560*/
3834 if ((n = (uwb->type == RE_UNWIND_BRANCH
3835 ? NEXT_OFF(next) : ARG(next))))
3836 next += n;
3837 else
3838 next = NULL; /* XXXX Needn't unwinding in this case... */
3839 uwb->next = next;
3840 next = NEXTOPER(scan);
3841 if (uwb->type == RE_UNWIND_BRANCHJ)
3842 next = NEXTOPER(next);
3843 locinput = uwb->locinput;
3844 nextchr = uwb->nextchr;
3845#ifdef DEBUGGING
3846 PL_regindent = uwb->regindent;
3847#endif
3848
3849 goto reenter;
3850 }
3851 /* NOT REACHED */
3852 default:
3853 Perl_croak(aTHX_ "regexp unwind memory corruption");
3854 }
3855 /* NOT REACHED */
3856 }
4633a7c4 3857#ifdef DEBUGGING
3280af22 3858 PL_regindent--;
4633a7c4 3859#endif
a0d0e21e 3860 return 0;
a687059c
LW
3861}
3862
3863/*
3864 - regrepeat - repeatedly match something simple, report how many
3865 */
3866/*
3867 * [This routine now assumes that it will only match on things of length 1.
3868 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3869 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3870 */
76e3520e 3871STATIC I32
cea2e8a9 3872S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3873{
a0d0e21e 3874 register char *scan;
a0d0e21e 3875 register I32 c;
3280af22 3876 register char *loceol = PL_regeol;
a0ed51b3 3877 register I32 hardcount = 0;
53c4c00c 3878 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3879
3280af22 3880 scan = PL_reginput;
c277df42 3881 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3882 loceol = scan + max;
a0d0e21e 3883 switch (OP(p)) {
22c35a8c 3884 case REG_ANY:
1aa99e6b 3885 if (do_utf8) {
ffc61ed2 3886 loceol = PL_regeol;
1aa99e6b 3887 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3888 scan += UTF8SKIP(scan);
3889 hardcount++;
3890 }
3891 } else {
3892 while (scan < loceol && *scan != '\n')
3893 scan++;
a0ed51b3
LW
3894 }
3895 break;
ffc61ed2 3896 case SANY:
def8e4ea
JH
3897 if (do_utf8) {
3898 loceol = PL_regeol;
a0804c9e 3899 while (scan < loceol && hardcount < max) {
def8e4ea
JH
3900 scan += UTF8SKIP(scan);
3901 hardcount++;
3902 }
3903 }
3904 else
3905 scan = loceol;
a0ed51b3 3906 break;
f33976b4
DB
3907 case CANY:
3908 scan = loceol;
3909 break;
bbce6d69 3910 case EXACT: /* length of string is 1 */
cd439c50 3911 c = (U8)*STRING(p);
bbce6d69 3912 while (scan < loceol && UCHARAT(scan) == c)
3913 scan++;
3914 break;
3915 case EXACTF: /* length of string is 1 */
cd439c50 3916 c = (U8)*STRING(p);
bbce6d69 3917 while (scan < loceol &&
22c35a8c 3918 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3919 scan++;
3920 break;
3921 case EXACTFL: /* length of string is 1 */
3280af22 3922 PL_reg_flags |= RF_tainted;
cd439c50 3923 c = (U8)*STRING(p);
bbce6d69 3924 while (scan < loceol &&
22c35a8c 3925 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3926 scan++;
3927 break;
3928 case ANYOF:
ffc61ed2
JH
3929 if (do_utf8) {
3930 loceol = PL_regeol;
1aa99e6b
IH
3931 while (hardcount < max && scan < loceol &&
3932 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3933 scan += UTF8SKIP(scan);
3934 hardcount++;
3935 }
3936 } else {
3937 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3938 scan++;
3939 }
a0d0e21e
LW
3940 break;
3941 case ALNUM:
1aa99e6b 3942 if (do_utf8) {
ffc61ed2 3943 loceol = PL_regeol;
8269fa76 3944 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3945 while (hardcount < max && scan < loceol &&
3568d838 3946 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3947 scan += UTF8SKIP(scan);
3948 hardcount++;
3949 }
3950 } else {
3951 while (scan < loceol && isALNUM(*scan))
3952 scan++;
a0ed51b3
LW
3953 }
3954 break;
bbce6d69 3955 case ALNUML:
3280af22 3956 PL_reg_flags |= RF_tainted;
1aa99e6b 3957 if (do_utf8) {
ffc61ed2 3958 loceol = PL_regeol;
1aa99e6b
IH
3959 while (hardcount < max && scan < loceol &&
3960 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3961 scan += UTF8SKIP(scan);
3962 hardcount++;
3963 }
3964 } else {
3965 while (scan < loceol && isALNUM_LC(*scan))
3966 scan++;
a0ed51b3
LW
3967 }
3968 break;
a0d0e21e 3969 case NALNUM:
1aa99e6b 3970 if (do_utf8) {
ffc61ed2 3971 loceol = PL_regeol;
8269fa76 3972 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3973 while (hardcount < max && scan < loceol &&
3568d838 3974 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3975 scan += UTF8SKIP(scan);
3976 hardcount++;
3977 }
3978 } else {
3979 while (scan < loceol && !isALNUM(*scan))
3980 scan++;
a0ed51b3
LW
3981 }
3982 break;
bbce6d69 3983 case NALNUML:
3280af22 3984 PL_reg_flags |= RF_tainted;
1aa99e6b 3985 if (do_utf8) {
ffc61ed2 3986 loceol = PL_regeol;
1aa99e6b
IH
3987 while (hardcount < max && scan < loceol &&
3988 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3989 scan += UTF8SKIP(scan);
3990 hardcount++;
3991 }
3992 } else {
3993 while (scan < loceol && !isALNUM_LC(*scan))
3994 scan++;
a0ed51b3
LW
3995 }
3996 break;
a0d0e21e 3997 case SPACE:
1aa99e6b 3998 if (do_utf8) {
ffc61ed2 3999 loceol = PL_regeol;
8269fa76 4000 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4001 while (hardcount < max && scan < loceol &&
3568d838
JH
4002 (*scan == ' ' ||
4003 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4004 scan += UTF8SKIP(scan);
4005 hardcount++;
4006 }
4007 } else {
4008 while (scan < loceol && isSPACE(*scan))
4009 scan++;
a0ed51b3
LW
4010 }
4011 break;
bbce6d69 4012 case SPACEL:
3280af22 4013 PL_reg_flags |= RF_tainted;
1aa99e6b 4014 if (do_utf8) {
ffc61ed2 4015 loceol = PL_regeol;
1aa99e6b 4016 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4017 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4018 scan += UTF8SKIP(scan);
4019 hardcount++;
4020 }
4021 } else {
4022 while (scan < loceol && isSPACE_LC(*scan))
4023 scan++;
a0ed51b3
LW
4024 }
4025 break;
a0d0e21e 4026 case NSPACE:
1aa99e6b 4027 if (do_utf8) {
ffc61ed2 4028 loceol = PL_regeol;
8269fa76 4029 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4030 while (hardcount < max && scan < loceol &&
3568d838
JH
4031 !(*scan == ' ' ||
4032 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4033 scan += UTF8SKIP(scan);
4034 hardcount++;
4035 }
4036 } else {
4037 while (scan < loceol && !isSPACE(*scan))
4038 scan++;
4039 break;
a0ed51b3 4040 }
bbce6d69 4041 case NSPACEL:
3280af22 4042 PL_reg_flags |= RF_tainted;
1aa99e6b 4043 if (do_utf8) {
ffc61ed2 4044 loceol = PL_regeol;
1aa99e6b 4045 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4046 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4047 scan += UTF8SKIP(scan);
4048 hardcount++;
4049 }
4050 } else {
4051 while (scan < loceol && !isSPACE_LC(*scan))
4052 scan++;
a0ed51b3
LW
4053 }
4054 break;
a0d0e21e 4055 case DIGIT:
1aa99e6b 4056 if (do_utf8) {
ffc61ed2 4057 loceol = PL_regeol;
8269fa76 4058 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4059 while (hardcount < max && scan < loceol &&
3568d838 4060 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4061 scan += UTF8SKIP(scan);
4062 hardcount++;
4063 }
4064 } else {
4065 while (scan < loceol && isDIGIT(*scan))
4066 scan++;
a0ed51b3
LW
4067 }
4068 break;
a0d0e21e 4069 case NDIGIT:
1aa99e6b 4070 if (do_utf8) {
ffc61ed2 4071 loceol = PL_regeol;
8269fa76 4072 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4073 while (hardcount < max && scan < loceol &&
3568d838 4074 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4075 scan += UTF8SKIP(scan);
4076 hardcount++;
4077 }
4078 } else {
4079 while (scan < loceol && !isDIGIT(*scan))
4080 scan++;
a0ed51b3
LW
4081 }
4082 break;
a0d0e21e
LW
4083 default: /* Called on something of 0 width. */
4084 break; /* So match right here or not at all. */
4085 }
a687059c 4086
a0ed51b3
LW
4087 if (hardcount)
4088 c = hardcount;
4089 else
4090 c = scan - PL_reginput;
3280af22 4091 PL_reginput = scan;
a687059c 4092
9041c2e3 4093 DEBUG_r(
c277df42
IZ
4094 {
4095 SV *prop = sv_newmortal();
4096
4097 regprop(prop, p);
9041c2e3
NIS
4098 PerlIO_printf(Perl_debug_log,
4099 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4100 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4101 });
9041c2e3 4102
a0d0e21e 4103 return(c);
a687059c
LW
4104}
4105
4106/*
c277df42 4107 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4108 *
c277df42
IZ
4109 * The repeater is supposed to have constant length.
4110 */
4111
76e3520e 4112STATIC I32
cea2e8a9 4113S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4114{
b7953727 4115 register char *scan = Nullch;
c277df42 4116 register char *start;
3280af22 4117 register char *loceol = PL_regeol;
a0ed51b3 4118 I32 l = 0;
708e3b05 4119 I32 count = 0, res = 1;
a0ed51b3
LW
4120
4121 if (!max)
4122 return 0;
c277df42 4123
3280af22 4124 start = PL_reginput;
53c4c00c 4125 if (PL_reg_match_utf8) {
708e3b05 4126 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4127 if (!count++) {
4128 l = 0;
4129 while (start < PL_reginput) {
4130 l++;
4131 start += UTF8SKIP(start);
4132 }
4133 *lp = l;
4134 if (l == 0)
4135 return max;
4136 }
4137 if (count == max)
4138 return count;
4139 }
4140 }
4141 else {
708e3b05 4142 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4143 if (!count++) {
4144 *lp = l = PL_reginput - start;
4145 if (max != REG_INFTY && l*max < loceol - scan)
4146 loceol = scan + l*max;
4147 if (l == 0)
4148 return max;
c277df42
IZ
4149 }
4150 }
4151 }
708e3b05 4152 if (!res)
3280af22 4153 PL_reginput = scan;
9041c2e3 4154
a0ed51b3 4155 return count;
c277df42
IZ
4156}
4157
4158/*
ffc61ed2
JH
4159- regclass_swash - prepare the utf8 swash
4160*/
4161
4162SV *
9e55ce06 4163Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4164{
9e55ce06
JH
4165 SV *sw = NULL;
4166 SV *si = NULL;
4167 SV *alt = NULL;
ffc61ed2
JH
4168
4169 if (PL_regdata && PL_regdata->count) {
4170 U32 n = ARG(node);
4171
4172 if (PL_regdata->what[n] == 's') {
4173 SV *rv = (SV*)PL_regdata->data[n];
4174 AV *av = (AV*)SvRV((SV*)rv);
9e55ce06 4175 SV **a, **b;
9041c2e3 4176
9e55ce06
JH
4177 /* See the end of regcomp.c:S_reglass() for
4178 * documentation of these array elements. */
4179
4180 si = *av_fetch(av, 0, FALSE);
4181 a = av_fetch(av, 1, FALSE);
4182 b = av_fetch(av, 2, FALSE);
9041c2e3 4183
ffc61ed2
JH
4184 if (a)
4185 sw = *a;
4186 else if (si && doinit) {
4187 sw = swash_init("utf8", "", si, 1, 0);
4188 (void)av_store(av, 1, sw);
4189 }
9e55ce06
JH
4190 if (b)
4191 alt = *b;
ffc61ed2
JH
4192 }
4193 }
4194
9e55ce06
JH
4195 if (listsvp)
4196 *listsvp = si;
4197 if (altsvp)
4198 *altsvp = alt;
ffc61ed2
JH
4199
4200 return sw;
4201}
4202
4203/*
832705d4
JH
4204 - reginclasslen - determine if a character falls into a character class
4205
4206 The n is the ANYOF regnode, the p is the target string, lenp
4207 is pointer to the maximum length of how far to go in the p
4208 (if the lenp is zero, UTF8SKIP(p) is used),
4209 do_utf8 tells whether the target string is in UTF-8.
4210
bbce6d69 4211 */
4212
76e3520e 4213STATIC bool
9e55ce06 4214S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4215{
ffc61ed2 4216 char flags = ANYOF_FLAGS(n);
bbce6d69 4217 bool match = FALSE;
1aa99e6b 4218 UV c;
3568d838 4219 STRLEN len = 0;
9e55ce06 4220 STRLEN plen;
1aa99e6b 4221
3568d838 4222 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4223
9e55ce06 4224 plen = lenp ? *lenp : UNISKIP(c);
ffc61ed2 4225 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4226 if (lenp)
4227 *lenp = 0;
ffc61ed2 4228 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4229 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4230 match = TRUE;
bbce6d69 4231 }
3568d838 4232 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4233 match = TRUE;
ffc61ed2 4234 if (!match) {
9e55ce06
JH
4235 AV *av;
4236 SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4237
4238 if (sw) {
3568d838 4239 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4240 match = TRUE;
4241 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4242 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4243 STRLEN tmplen;
4244
4245 if (!match && lenp && av) {
4246 I32 i;
4247
4248 for (i = 0; i <= av_len(av); i++) {
4249 SV* sv = *av_fetch(av, i, FALSE);
4250 STRLEN len;
4251 char *s = SvPV(sv, len);
4252
4253 if (len <= plen && memEQ(s, p, len)) {
4254 *lenp = len;
4255 match = TRUE;
4256 break;
4257 }
4258 }
4259 }
4260 if (!match) {
4261 to_utf8_fold(p, tmpbuf, &tmplen);
4262 if (swash_fetch(sw, tmpbuf, do_utf8))
4263 match = TRUE;
4264 }
4265 if (!match) {
4266 to_utf8_upper(p, tmpbuf, &tmplen);
4267 if (swash_fetch(sw, tmpbuf, do_utf8))
4268 match = TRUE;
4269 }
ffc61ed2
JH
4270 }
4271 }
bbce6d69 4272 }
9e55ce06
JH
4273 if (match && lenp && *lenp == 0)
4274 *lenp = UNISKIP(c);
bbce6d69 4275 }
1aa99e6b 4276 if (!match && c < 256) {
ffc61ed2
JH
4277 if (ANYOF_BITMAP_TEST(n, c))
4278 match = TRUE;
4279 else if (flags & ANYOF_FOLD) {
3568d838 4280 I32 f;
a0ed51b3 4281
ffc61ed2
JH
4282 if (flags & ANYOF_LOCALE) {
4283 PL_reg_flags |= RF_tainted;
4284 f = PL_fold_locale[c];
4285 }
4286 else
4287 f = PL_fold[c];
4288 if (f != c && ANYOF_BITMAP_TEST(n, f))
4289 match = TRUE;
4290 }
4291
4292 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4293 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4294 if (
4295 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4296 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4297 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4298 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4299 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4300 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4301 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4302 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4303 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4304 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4305 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4306 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4307 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4308 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4309 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4310 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4311 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4312 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4313 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4314 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4315 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4316 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4317 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4318 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4319 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4320 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4321 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4322 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4323 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4324 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4325 ) /* How's that for a conditional? */
4326 {
4327 match = TRUE;
4328 }
a0ed51b3 4329 }
a0ed51b3
LW
4330 }
4331
a0ed51b3
LW
4332 return (flags & ANYOF_INVERT) ? !match : match;
4333}
161b471a 4334
832705d4
JH
4335/*
4336 - reginclass - determine if a character falls into a character class
4337
4338 The n is the ANYOF regnode, the p is the target string, do_utf8 tells
4339 whether the target string is in UTF-8.
4340
4341 */
4342
9e55ce06
JH
4343STATIC bool
4344S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
4345{
4346 return S_reginclasslen(aTHX_ n, p, 0, do_utf8);
4347}
4348
dfe13c55 4349STATIC U8 *
cea2e8a9 4350S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4351{
1aa99e6b
IH
4352 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4353}
4354
4355STATIC U8 *
4356S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4357{
a0ed51b3 4358 if (off >= 0) {
1aa99e6b 4359 while (off-- && s < lim) {
ffc61ed2 4360 /* XXX could check well-formedness here */
a0ed51b3 4361 s += UTF8SKIP(s);
ffc61ed2 4362 }
a0ed51b3
LW
4363 }
4364 else {
4365 while (off++) {
1aa99e6b 4366 if (s > lim) {
a0ed51b3 4367 s--;
ffc61ed2 4368 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4369 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4370 s--;
ffc61ed2
JH
4371 }
4372 /* XXX could check well-formedness here */
a0ed51b3
LW
4373 }
4374 }
4375 }
4376 return s;
4377}
161b471a 4378
dfe13c55 4379STATIC U8 *
1aa99e6b 4380S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4381{
1aa99e6b
IH
4382 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4383}
4384
4385STATIC U8 *
4386S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4387{
4388 if (off >= 0) {
1aa99e6b 4389 while (off-- && s < lim) {
ffc61ed2 4390 /* XXX could check well-formedness here */
a0ed51b3 4391 s += UTF8SKIP(s);
ffc61ed2 4392 }
a0ed51b3
LW
4393 if (off >= 0)
4394 return 0;
4395 }
4396 else {
4397 while (off++) {
1aa99e6b 4398 if (s > lim) {
a0ed51b3 4399 s--;
ffc61ed2 4400 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4401 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4402 s--;
ffc61ed2
JH
4403 }
4404 /* XXX could check well-formedness here */
a0ed51b3
LW
4405 }
4406 else
4407 break;
4408 }
4409 if (off <= 0)
4410 return 0;
4411 }
4412 return s;
4413}
51371543 4414
51371543 4415static void
acfe0abc 4416restore_pos(pTHX_ void *arg)
51371543 4417{
51371543
GS
4418 if (PL_reg_eval_set) {
4419 if (PL_reg_oldsaved) {
4420 PL_reg_re->subbeg = PL_reg_oldsaved;
4421 PL_reg_re->sublen = PL_reg_oldsavedlen;
4422 RX_MATCH_COPIED_on(PL_reg_re);
4423 }
4424 PL_reg_magic->mg_len = PL_reg_oldpos;
4425 PL_reg_eval_set = 0;
4426 PL_curpm = PL_reg_oldcurpm;
4427 }
4428}