This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Slight comment tweakage.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded
JH
403 char *s = PL_reg_match_utf8 ?
404 sv_uni_display(dsv, sv, 60, 0) : strpos;
405 int len = PL_reg_match_utf8 ?
406 strlen(s) : strend - strpos;
2a782b5b
JH
407 if (!PL_colorset)
408 reginitcolors();
b8d68ded
JH
409 if (PL_reg_match_utf8)
410 DEBUG_r(PerlIO_printf(Perl_debug_log,
411 "UTF-8 target...\n"));
2a782b5b
JH
412 PerlIO_printf(Perl_debug_log,
413 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
414 PL_colors[4],PL_colors[5],PL_colors[0],
415 prog->precomp,
416 PL_colors[1],
417 (strlen(prog->precomp) > 60 ? "..." : ""),
418 PL_colors[0],
419 (int)(len > 60 ? 60 : len),
420 s, PL_colors[1],
421 (len > 60 ? "..." : "")
422 );
423 });
cad2e5aa 424
1aa99e6b 425 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
426 DEBUG_r(PerlIO_printf(Perl_debug_log,
427 "String too short... [re_intuit_start]\n"));
cad2e5aa 428 goto fail;
2c2d71f5 429 }
a1933d95 430 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 431 PL_regeol = strend;
653099ff 432 check = prog->check_substr;
2c2d71f5 433 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
434 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
435 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 436 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 437
7e25d62c
JH
438 if (!ml_anch) {
439 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
440 | ROPT_IMPLICIT)) /* not a real BOL */
441 /* SvCUR is not set on references: SvRV and SvPVX overlap */
442 && sv && !SvROK(sv)
443 && (strpos != strbeg)) {
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
445 goto fail;
446 }
447 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 448 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 449 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
450 I32 slen;
451
1aa99e6b 452 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
453 if (SvTAIL(check)) {
454 slen = SvCUR(check); /* >= 1 */
cad2e5aa 455
9041c2e3 456 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
457 || (strend - s == slen && strend[-1] != '\n')) {
458 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
459 goto fail_finish;
cad2e5aa
JH
460 }
461 /* Now should match s[0..slen-2] */
462 slen--;
653099ff 463 if (slen && (*SvPVX(check) != *s
cad2e5aa 464 || (slen > 1
653099ff 465 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
466 report_neq:
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
468 goto fail_finish;
469 }
cad2e5aa 470 }
653099ff
GS
471 else if (*SvPVX(check) != *s
472 || ((slen = SvCUR(check)) > 1
473 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
474 goto report_neq;
475 goto success_at_start;
7e25d62c 476 }
cad2e5aa 477 }
2c2d71f5 478 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 479 s = strpos;
2c2d71f5 480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 483 if (!ml_anch) {
653099ff
GS
484 I32 end = prog->check_offset_max + CHR_SVLEN(check)
485 - (SvTAIL(check) != 0);
1aa99e6b 486 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
487
488 if (end_shift < eshift)
489 end_shift = eshift;
490 }
cad2e5aa 491 }
2c2d71f5 492 else { /* Can match at random position */
cad2e5aa
JH
493 ml_anch = 0;
494 s = strpos;
2c2d71f5
JH
495 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
496 /* Should be nonnegative! */
497 end_shift = prog->minlen - start_shift -
653099ff 498 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
499 }
500
2c2d71f5 501#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 502 if (end_shift < 0)
6bbae5e6 503 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
504#endif
505
2c2d71f5
JH
506 restart:
507 /* Find a possible match in the region s..strend by looking for
508 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 509 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
510 I32 p = -1; /* Internal iterator of scream. */
511 I32 *pp = data ? data->scream_pos : &p;
512
2c2d71f5
JH
513 if (PL_screamfirst[BmRARE(check)] >= 0
514 || ( BmRARE(check) == '\n'
515 && (BmPREVIOUS(check) == SvCUR(check) - 1)
516 && SvTAIL(check) ))
9041c2e3 517 s = screaminstr(sv, check,
2c2d71f5 518 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 519 else
2c2d71f5 520 goto fail_finish;
cad2e5aa
JH
521 if (data)
522 *data->scream_olds = s;
523 }
f33976b4 524 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
525 s = fbm_instr((U8*)(s + start_shift),
526 (U8*)(strend - end_shift),
527 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 528 else
1aa99e6b
IH
529 s = fbm_instr(HOP3(s, start_shift, strend),
530 HOP3(strend, -end_shift, strbeg),
2c2d71f5 531 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
532
533 /* Update the count-of-usability, remove useless subpatterns,
534 unshift s. */
2c2d71f5
JH
535
536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
537 (s ? "Found" : "Did not find"),
538 ((check == prog->anchored_substr) ? "anchored" : "floating"),
539 PL_colors[0],
7b0972df
JH
540 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
541 SvPVX(check),
2c2d71f5
JH
542 PL_colors[1], (SvTAIL(check) ? "$" : ""),
543 (s ? " at offset " : "...\n") ) );
544
545 if (!s)
546 goto fail_finish;
547
6eb5f6b9
JH
548 check_at = s;
549
2c2d71f5 550 /* Finish the diagnostic message */
30944b6d 551 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
552
553 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
554 Start with the other substr.
555 XXXX no SCREAM optimization yet - and a very coarse implementation
556 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
557 *always* match. Probably should be marked during compile...
558 Probably it is right to do no SCREAM here...
559 */
560
561 if (prog->float_substr && prog->anchored_substr) {
30944b6d 562 /* Take into account the "other" substring. */
2c2d71f5
JH
563 /* XXXX May be hopelessly wrong for UTF... */
564 if (!other_last)
6eb5f6b9 565 other_last = strpos;
2c2d71f5 566 if (check == prog->float_substr) {
30944b6d
IZ
567 do_other_anchored:
568 {
1aa99e6b 569 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
570 char *s1 = s;
571
2c2d71f5
JH
572 t = s - prog->check_offset_max;
573 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
574 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 575 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 576 && t > strpos)))
30944b6d 577 /* EMPTY */;
2c2d71f5
JH
578 else
579 t = strpos;
1aa99e6b 580 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
581 if (t < other_last) /* These positions already checked */
582 t = other_last;
1aa99e6b 583 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
584 if (last < last1)
585 last1 = last;
586 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
587 /* On end-of-str: see comment below. */
588 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
589 HOP3(HOP3(last1, prog->anchored_offset, strend)
590 + SvCUR(prog->anchored_substr),
591 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
592 prog->anchored_substr,
593 PL_multiline ? FBMrf_MULTILINE : 0);
594 DEBUG_r(PerlIO_printf(Perl_debug_log,
595 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
596 (s ? "Found" : "Contradicts"),
597 PL_colors[0],
7b0972df
JH
598 (int)(SvCUR(prog->anchored_substr)
599 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
600 SvPVX(prog->anchored_substr),
601 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
602 if (!s) {
603 if (last1 >= last2) {
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
606 goto fail_finish;
607 }
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying floating at offset %ld...\n",
1aa99e6b
IH
610 (long)(HOP3c(s1, 1, strend) - i_strpos)));
611 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
612 s = HOP3c(last, 1, strend);
2c2d71f5
JH
613 goto restart;
614 }
615 else {
616 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 617 (long)(s - i_strpos)));
1aa99e6b
IH
618 t = HOP3c(s, -prog->anchored_offset, strbeg);
619 other_last = HOP3c(s, 1, strend);
30944b6d 620 s = s1;
2c2d71f5
JH
621 if (t == strpos)
622 goto try_at_start;
2c2d71f5
JH
623 goto try_at_offset;
624 }
30944b6d 625 }
2c2d71f5
JH
626 }
627 else { /* Take into account the floating substring. */
628 char *last, *last1;
629 char *s1 = s;
630
1aa99e6b
IH
631 t = HOP3c(s, -start_shift, strbeg);
632 last1 = last =
633 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
634 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
635 last = HOP3c(t, prog->float_max_offset, strend);
636 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
637 if (s < other_last)
638 s = other_last;
2c2d71f5
JH
639 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
640 /* fbm_instr() takes into account exact value of end-of-str
641 if the check is SvTAIL(ed). Since false positives are OK,
642 and end-of-str is not later than strend we are OK. */
643 s = fbm_instr((unsigned char*)s,
644 (unsigned char*)last + SvCUR(prog->float_substr)
645 - (SvTAIL(prog->float_substr)!=0),
646 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
647 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
7b0972df
JH
650 (int)(SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
652 SvPVX(prog->float_substr),
653 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
654 if (!s) {
655 if (last1 == last) {
656 DEBUG_r(PerlIO_printf(Perl_debug_log,
657 ", giving up...\n"));
658 goto fail_finish;
659 }
660 DEBUG_r(PerlIO_printf(Perl_debug_log,
661 ", trying anchored starting at offset %ld...\n",
30944b6d 662 (long)(s1 + 1 - i_strpos)));
803ff556 663 other_last = last;
1aa99e6b 664 s = HOP3c(t, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
803ff556 670 other_last = s; /* Fix this later. --Hugo */
30944b6d 671 s = s1;
2c2d71f5
JH
672 if (t == strpos)
673 goto try_at_start;
2c2d71f5
JH
674 goto try_at_offset;
675 }
676 }
cad2e5aa 677 }
2c2d71f5
JH
678
679 t = s - prog->check_offset_max;
2c2d71f5
JH
680 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
681 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
682 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
683 && t > strpos))) {
2c2d71f5
JH
684 /* Fixed substring is found far enough so that the match
685 cannot start at strpos. */
686 try_at_offset:
cad2e5aa 687 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
688 /* Eventually fbm_*() should handle this, but often
689 anchored_offset is not 0, so this check will not be wasted. */
690 /* XXXX In the code below we prefer to look for "^" even in
691 presence of anchored substrings. And we search even
692 beyond the found float position. These pessimizations
693 are historical artefacts only. */
694 find_anchor:
2c2d71f5 695 while (t < strend - prog->minlen) {
cad2e5aa 696 if (*t == '\n') {
4ee3650e 697 if (t < check_at - prog->check_offset_min) {
30944b6d 698 if (prog->anchored_substr) {
4ee3650e
GS
699 /* Since we moved from the found position,
700 we definitely contradict the found anchored
30944b6d
IZ
701 substr. Due to the above check we do not
702 contradict "check" substr.
703 Thus we can arrive here only if check substr
704 is float. Redo checking for "other"=="fixed".
705 */
9041c2e3 706 strpos = t + 1;
30944b6d
IZ
707 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
708 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
709 goto do_other_anchored;
710 }
4ee3650e
GS
711 /* We don't contradict the found floating substring. */
712 /* XXXX Why not check for STCLASS? */
cad2e5aa 713 s = t + 1;
2c2d71f5 714 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 715 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
716 goto set_useful;
717 }
4ee3650e
GS
718 /* Position contradicts check-string */
719 /* XXXX probably better to look for check-string
720 than for "\n", so one should lower the limit for t? */
721 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 722 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 723 other_last = strpos = s = t + 1;
cad2e5aa
JH
724 goto restart;
725 }
726 t++;
727 }
2c2d71f5
JH
728 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
729 PL_colors[0],PL_colors[1]));
730 goto fail_finish;
cad2e5aa 731 }
f5952150
GS
732 else {
733 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
734 PL_colors[0],PL_colors[1]));
735 }
cad2e5aa
JH
736 s = t;
737 set_useful:
2c2d71f5 738 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
739 }
740 else {
f5952150 741 /* The found string does not prohibit matching at strpos,
2c2d71f5 742 - no optimization of calling REx engine can be performed,
f5952150
GS
743 unless it was an MBOL and we are not after MBOL,
744 or a future STCLASS check will fail this. */
2c2d71f5
JH
745 try_at_start:
746 /* Even in this situation we may use MBOL flag if strpos is offset
747 wrt the start of the string. */
05b4157f 748 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 749 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
750 /* May be due to an implicit anchor of m{.*foo} */
751 && !(prog->reganch & ROPT_IMPLICIT))
752 {
cad2e5aa
JH
753 t = strpos;
754 goto find_anchor;
755 }
30944b6d 756 DEBUG_r( if (ml_anch)
f5952150
GS
757 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
758 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 759 );
2c2d71f5 760 success_at_start:
30944b6d 761 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 762 && prog->check_substr /* Could be deleted already */
cad2e5aa 763 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
764 && prog->check_substr == prog->float_substr)
765 {
cad2e5aa 766 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
768 SvREFCNT_dec(prog->check_substr);
769 prog->check_substr = Nullsv; /* disable */
770 prog->float_substr = Nullsv; /* clear */
5e39e1e5 771 check = Nullsv; /* abort */
cad2e5aa 772 s = strpos;
3cf5c195
IZ
773 /* XXXX This is a remnant of the old implementation. It
774 looks wasteful, since now INTUIT can use many
6eb5f6b9 775 other heuristics. */
cad2e5aa
JH
776 prog->reganch &= ~RE_USE_INTUIT;
777 }
778 else
779 s = strpos;
780 }
781
6eb5f6b9
JH
782 /* Last resort... */
783 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
784 if (prog->regstclass) {
785 /* minlen == 0 is possible if regstclass is \b or \B,
786 and the fixed substr is ''$.
787 Since minlen is already taken into account, s+1 is before strend;
788 accidentally, minlen >= 1 guaranties no false positives at s + 1
789 even for \b or \B. But (minlen? 1 : 0) below assumes that
790 regstclass does not come from lookahead... */
791 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
792 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 793 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 794 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 795 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 796 : 1);
6eb5f6b9 797 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
798 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
799 : (prog->float_substr
800 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
801 cl_l, strend)
802 : strend);
a1933d95 803 char *startpos = strbeg;
6eb5f6b9
JH
804
805 t = s;
76384e4a 806 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 807 PL_regdata = prog->data;
76384e4a
GS
808 PL_bostr = startpos;
809 }
f33976b4 810 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
811 if (!s) {
812#ifdef DEBUGGING
b7953727 813 char *what = 0;
6eb5f6b9
JH
814#endif
815 if (endpos == strend) {
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
66e933ab
GS
820 DEBUG_r( PerlIO_printf(Perl_debug_log,
821 "This position contradicts STCLASS...\n") );
653099ff
GS
822 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
823 goto fail;
6eb5f6b9
JH
824 /* Contradict one of substrings */
825 if (prog->anchored_substr) {
6eb5f6b9
JH
826 if (prog->anchored_substr == check) {
827 DEBUG_r( what = "anchored" );
828 hop_and_restart:
1aa99e6b 829 s = HOP3c(t, 1, strend);
66e933ab
GS
830 if (s + start_shift + end_shift > strend) {
831 /* XXXX Should be taken into account earlier? */
832 DEBUG_r( PerlIO_printf(Perl_debug_log,
833 "Could not match STCLASS...\n") );
834 goto fail;
835 }
5e39e1e5
HS
836 if (!check)
837 goto giveup;
6eb5f6b9 838 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 839 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
840 what, (long)(s + start_shift - i_strpos)) );
841 goto restart;
842 }
66e933ab 843 /* Have both, check_string is floating */
6eb5f6b9
JH
844 if (t + start_shift >= check_at) /* Contradicts floating=check */
845 goto retry_floating_check;
846 /* Recheck anchored substring, but not floating... */
9041c2e3 847 s = check_at;
5e39e1e5
HS
848 if (!check)
849 goto giveup;
6eb5f6b9 850 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 851 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
852 (long)(other_last - i_strpos)) );
853 goto do_other_anchored;
854 }
60e71179
GS
855 /* Another way we could have checked stclass at the
856 current position only: */
857 if (ml_anch) {
858 s = t = t + 1;
5e39e1e5
HS
859 if (!check)
860 goto giveup;
60e71179 861 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
862 "Looking for /%s^%s/m starting at offset %ld...\n",
863 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 864 goto try_at_offset;
66e933ab 865 }
60e71179
GS
866 if (!prog->float_substr) /* Could have been deleted */
867 goto fail;
6eb5f6b9
JH
868 /* Check is floating subtring. */
869 retry_floating_check:
870 t = check_at - start_shift;
871 DEBUG_r( what = "floating" );
872 goto hop_and_restart;
873 }
b7953727
JH
874 if (t != s) {
875 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 876 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
877 (long)(t - i_strpos), (long)(s - i_strpos))
878 );
879 }
880 else {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
882 "Does not contradict STCLASS...\n");
883 );
884 }
6eb5f6b9 885 }
5e39e1e5
HS
886 giveup:
887 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
888 PL_colors[4], (check ? "Guessed" : "Giving up"),
889 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 890 return s;
2c2d71f5
JH
891
892 fail_finish: /* Substring not found */
66e933ab
GS
893 if (prog->check_substr) /* could be removed already */
894 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 895 fail:
2c2d71f5 896 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
897 PL_colors[4],PL_colors[5]));
898 return Nullch;
899}
9661b544 900
6eb5f6b9 901/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
902STATIC char *
903S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 904{
6eb5f6b9
JH
905 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
906 char *m;
d8093b23
G
907 STRLEN ln;
908 unsigned int c1;
909 unsigned int c2;
6eb5f6b9
JH
910 char *e;
911 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 912 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 913
6eb5f6b9
JH
914 /* We know what class it must start with. */
915 switch (OP(c)) {
6eb5f6b9 916 case ANYOF:
a0ed51b3 917 while (s < strend) {
ffc61ed2 918 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
919 if (tmp && (norun || regtry(prog, s)))
920 goto got_it;
921 else
922 tmp = doevery;
a0ed51b3 923 }
6eb5f6b9
JH
924 else
925 tmp = 1;
ffc61ed2 926 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 927 }
6eb5f6b9 928 break;
f33976b4
DB
929 case CANY:
930 while (s < strend) {
931 if (tmp && (norun || regtry(prog, s)))
932 goto got_it;
933 else
934 tmp = doevery;
935 s++;
936 }
937 break;
6eb5f6b9
JH
938 case EXACTF:
939 m = STRING(c);
940 ln = STR_LEN(c);
1aa99e6b 941 if (UTF) {
a2a2844f 942 STRLEN ulen1, ulen2;
e7ae6809
JH
943 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
944 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
945
946 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
947 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
948
949 c1 = utf8_to_uvuni(tmpbuf1, 0);
950 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
951 }
952 else {
953 c1 = *(U8*)m;
954 c2 = PL_fold[c1];
955 }
6eb5f6b9
JH
956 goto do_exactf;
957 case EXACTFL:
958 m = STRING(c);
959 ln = STR_LEN(c);
d8093b23 960 c1 = *(U8*)m;
6eb5f6b9
JH
961 c2 = PL_fold_locale[c1];
962 do_exactf:
963 e = strend - ln;
b3c9acc1 964
6eb5f6b9
JH
965 if (norun && e < s)
966 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
967
968 if (do_utf8) {
575cac57
JH
969 UV c, f;
970 U8 tmpbuf [UTF8_MAXLEN+1];
971 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
972 STRLEN len, foldlen;
973
cadb39a9
JH
974 /* The ibcmp_utf8() uses to_uni_fold() which is more
975 * correct folding for Unicode than using lowercase.
976 * However, it doesn't work quite fully since the folding
977 * is a one-to-many mapping and the regex optimizer is
978 * unaware of this, so it may throw out good matches.
979 * Fortunately, not getting this right is allowed
980 * for Unicode Regular Expression Support level 1,
981 * only one-to-one matching is required. --jhi */
80aecb99 982
09091399 983 if (c1 == c2) {
1aa99e6b 984 while (s <= e) {
575cac57 985 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
986 if ( c == c1
987 && (ln == len ||
988 !ibcmp_utf8(s, do_utf8, strend - s,
989 m, UTF, ln))
55da9344 990 && (norun || regtry(prog, s)) )
1aa99e6b 991 goto got_it;
80aecb99
JH
992 else {
993 uvchr_to_utf8(tmpbuf, c);
994 to_utf8_fold(tmpbuf, foldbuf, &foldlen);
995 f = utf8_to_uvchr(foldbuf, 0);
996 if ( f != c
997 && (f == c1 || f == c2)
998 && (ln == foldlen ||
999 !ibcmp_utf8((char *)foldbuf,
1000 do_utf8, foldlen,
1001 m, UTF, ln))
1002 && (norun || regtry(prog, s)) )
1003 goto got_it;
1004 }
1aa99e6b
IH
1005 s += len;
1006 }
09091399
JH
1007 }
1008 else {
1aa99e6b 1009 while (s <= e) {
575cac57 1010 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1011
880bd946
JH
1012 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1013 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1014 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1015
1016 if ( (c == c1 || c == c2)
1017 && (ln == len ||
1018 !ibcmp_utf8(s, do_utf8, strend - s,
1019 m, UTF, ln))
55da9344 1020 && (norun || regtry(prog, s)) )
1aa99e6b 1021 goto got_it;
80aecb99
JH
1022 else {
1023 uvchr_to_utf8(tmpbuf, c);
1024 to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1025 f = utf8_to_uvchr(foldbuf, 0);
1026 if ( f != c
1027 && (f == c1 || f == c2)
1028 && (ln == foldlen ||
1029 !ibcmp_utf8((char *)foldbuf,
1030 do_utf8, foldlen,
1031 m, UTF, ln))
1032 && (norun || regtry(prog, s)) )
1033 goto got_it;
1034 }
1aa99e6b
IH
1035 s += len;
1036 }
09091399 1037 }
1aa99e6b
IH
1038 }
1039 else {
1040 if (c1 == c2)
1041 while (s <= e) {
1042 if ( *(U8*)s == c1
1043 && (ln == 1 || !(OP(c) == EXACTF
1044 ? ibcmp(s, m, ln)
1045 : ibcmp_locale(s, m, ln)))
1046 && (norun || regtry(prog, s)) )
1047 goto got_it;
1048 s++;
1049 }
1050 else
1051 while (s <= e) {
1052 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1053 && (ln == 1 || !(OP(c) == EXACTF
1054 ? ibcmp(s, m, ln)
1055 : ibcmp_locale(s, m, ln)))
1056 && (norun || regtry(prog, s)) )
1057 goto got_it;
1058 s++;
1059 }
b3c9acc1
IZ
1060 }
1061 break;
bbce6d69 1062 case BOUNDL:
3280af22 1063 PL_reg_flags |= RF_tainted;
bbce6d69 1064 /* FALL THROUGH */
a0d0e21e 1065 case BOUND:
ffc61ed2 1066 if (do_utf8) {
12d33761 1067 if (s == PL_bostr)
ffc61ed2
JH
1068 tmp = '\n';
1069 else {
1aa99e6b 1070 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1071
0064a8a9
JH
1072 if (s > (char*)r)
1073 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1074 }
1075 tmp = ((OP(c) == BOUND ?
9041c2e3 1076 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1077 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1078 while (s < strend) {
1079 if (tmp == !(OP(c) == BOUND ?
3568d838 1080 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1081 isALNUM_LC_utf8((U8*)s)))
1082 {
1083 tmp = !tmp;
1084 if ((norun || regtry(prog, s)))
1085 goto got_it;
1086 }
1087 s += UTF8SKIP(s);
a687059c 1088 }
a0d0e21e 1089 }
667bb95a 1090 else {
12d33761 1091 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1092 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1093 while (s < strend) {
1094 if (tmp ==
1095 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1096 tmp = !tmp;
1097 if ((norun || regtry(prog, s)))
1098 goto got_it;
1099 }
1100 s++;
a0ed51b3 1101 }
a0ed51b3 1102 }
6eb5f6b9 1103 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1104 goto got_it;
1105 break;
bbce6d69 1106 case NBOUNDL:
3280af22 1107 PL_reg_flags |= RF_tainted;
bbce6d69 1108 /* FALL THROUGH */
a0d0e21e 1109 case NBOUND:
ffc61ed2 1110 if (do_utf8) {
12d33761 1111 if (s == PL_bostr)
ffc61ed2
JH
1112 tmp = '\n';
1113 else {
1aa99e6b 1114 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1115
0064a8a9
JH
1116 if (s > (char*)r)
1117 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1118 }
1119 tmp = ((OP(c) == NBOUND ?
9041c2e3 1120 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1121 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1122 while (s < strend) {
1123 if (tmp == !(OP(c) == NBOUND ?
3568d838 1124 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1125 isALNUM_LC_utf8((U8*)s)))
1126 tmp = !tmp;
1127 else if ((norun || regtry(prog, s)))
1128 goto got_it;
1129 s += UTF8SKIP(s);
1130 }
a0d0e21e 1131 }
667bb95a 1132 else {
12d33761 1133 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1134 tmp = ((OP(c) == NBOUND ?
1135 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1136 while (s < strend) {
1137 if (tmp ==
1138 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1139 tmp = !tmp;
1140 else if ((norun || regtry(prog, s)))
1141 goto got_it;
1142 s++;
1143 }
a0ed51b3 1144 }
6eb5f6b9 1145 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1146 goto got_it;
1147 break;
a0d0e21e 1148 case ALNUM:
ffc61ed2 1149 if (do_utf8) {
8269fa76 1150 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1151 while (s < strend) {
3568d838 1152 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1153 if (tmp && (norun || regtry(prog, s)))
1154 goto got_it;
1155 else
1156 tmp = doevery;
1157 }
bbce6d69 1158 else
ffc61ed2
JH
1159 tmp = 1;
1160 s += UTF8SKIP(s);
bbce6d69 1161 }
bbce6d69 1162 }
ffc61ed2
JH
1163 else {
1164 while (s < strend) {
1165 if (isALNUM(*s)) {
1166 if (tmp && (norun || regtry(prog, s)))
1167 goto got_it;
1168 else
1169 tmp = doevery;
1170 }
a0ed51b3 1171 else
ffc61ed2
JH
1172 tmp = 1;
1173 s++;
a0ed51b3 1174 }
a0ed51b3
LW
1175 }
1176 break;
bbce6d69 1177 case ALNUML:
3280af22 1178 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1179 if (do_utf8) {
1180 while (s < strend) {
1181 if (isALNUM_LC_utf8((U8*)s)) {
1182 if (tmp && (norun || regtry(prog, s)))
1183 goto got_it;
1184 else
1185 tmp = doevery;
1186 }
a687059c 1187 else
ffc61ed2
JH
1188 tmp = 1;
1189 s += UTF8SKIP(s);
a0d0e21e 1190 }
a0d0e21e 1191 }
ffc61ed2
JH
1192 else {
1193 while (s < strend) {
1194 if (isALNUM_LC(*s)) {
1195 if (tmp && (norun || regtry(prog, s)))
1196 goto got_it;
1197 else
1198 tmp = doevery;
1199 }
a0ed51b3 1200 else
ffc61ed2
JH
1201 tmp = 1;
1202 s++;
a0ed51b3 1203 }
a0ed51b3
LW
1204 }
1205 break;
a0d0e21e 1206 case NALNUM:
ffc61ed2 1207 if (do_utf8) {
8269fa76 1208 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1209 while (s < strend) {
3568d838 1210 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1211 if (tmp && (norun || regtry(prog, s)))
1212 goto got_it;
1213 else
1214 tmp = doevery;
1215 }
bbce6d69 1216 else
ffc61ed2
JH
1217 tmp = 1;
1218 s += UTF8SKIP(s);
bbce6d69 1219 }
bbce6d69 1220 }
ffc61ed2
JH
1221 else {
1222 while (s < strend) {
1223 if (!isALNUM(*s)) {
1224 if (tmp && (norun || regtry(prog, s)))
1225 goto got_it;
1226 else
1227 tmp = doevery;
1228 }
a0ed51b3 1229 else
ffc61ed2
JH
1230 tmp = 1;
1231 s++;
a0ed51b3 1232 }
a0ed51b3
LW
1233 }
1234 break;
bbce6d69 1235 case NALNUML:
3280af22 1236 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1237 if (do_utf8) {
1238 while (s < strend) {
1239 if (!isALNUM_LC_utf8((U8*)s)) {
1240 if (tmp && (norun || regtry(prog, s)))
1241 goto got_it;
1242 else
1243 tmp = doevery;
1244 }
a687059c 1245 else
ffc61ed2
JH
1246 tmp = 1;
1247 s += UTF8SKIP(s);
a687059c 1248 }
a0d0e21e 1249 }
ffc61ed2
JH
1250 else {
1251 while (s < strend) {
1252 if (!isALNUM_LC(*s)) {
1253 if (tmp && (norun || regtry(prog, s)))
1254 goto got_it;
1255 else
1256 tmp = doevery;
1257 }
a0ed51b3 1258 else
ffc61ed2
JH
1259 tmp = 1;
1260 s++;
a0ed51b3 1261 }
a0ed51b3
LW
1262 }
1263 break;
a0d0e21e 1264 case SPACE:
ffc61ed2 1265 if (do_utf8) {
8269fa76 1266 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1267 while (s < strend) {
3568d838 1268 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1269 if (tmp && (norun || regtry(prog, s)))
1270 goto got_it;
1271 else
1272 tmp = doevery;
1273 }
a0d0e21e 1274 else
ffc61ed2
JH
1275 tmp = 1;
1276 s += UTF8SKIP(s);
2304df62 1277 }
a0d0e21e 1278 }
ffc61ed2
JH
1279 else {
1280 while (s < strend) {
1281 if (isSPACE(*s)) {
1282 if (tmp && (norun || regtry(prog, s)))
1283 goto got_it;
1284 else
1285 tmp = doevery;
1286 }
a0ed51b3 1287 else
ffc61ed2
JH
1288 tmp = 1;
1289 s++;
a0ed51b3 1290 }
a0ed51b3
LW
1291 }
1292 break;
bbce6d69 1293 case SPACEL:
3280af22 1294 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1295 if (do_utf8) {
1296 while (s < strend) {
1297 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1298 if (tmp && (norun || regtry(prog, s)))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
bbce6d69 1303 else
ffc61ed2
JH
1304 tmp = 1;
1305 s += UTF8SKIP(s);
bbce6d69 1306 }
bbce6d69 1307 }
ffc61ed2
JH
1308 else {
1309 while (s < strend) {
1310 if (isSPACE_LC(*s)) {
1311 if (tmp && (norun || regtry(prog, s)))
1312 goto got_it;
1313 else
1314 tmp = doevery;
1315 }
a0ed51b3 1316 else
ffc61ed2
JH
1317 tmp = 1;
1318 s++;
a0ed51b3 1319 }
a0ed51b3
LW
1320 }
1321 break;
a0d0e21e 1322 case NSPACE:
ffc61ed2 1323 if (do_utf8) {
8269fa76 1324 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1325 while (s < strend) {
3568d838 1326 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1327 if (tmp && (norun || regtry(prog, s)))
1328 goto got_it;
1329 else
1330 tmp = doevery;
1331 }
a0d0e21e 1332 else
ffc61ed2
JH
1333 tmp = 1;
1334 s += UTF8SKIP(s);
a687059c 1335 }
a0d0e21e 1336 }
ffc61ed2
JH
1337 else {
1338 while (s < strend) {
1339 if (!isSPACE(*s)) {
1340 if (tmp && (norun || regtry(prog, s)))
1341 goto got_it;
1342 else
1343 tmp = doevery;
1344 }
a0ed51b3 1345 else
ffc61ed2
JH
1346 tmp = 1;
1347 s++;
a0ed51b3 1348 }
a0ed51b3
LW
1349 }
1350 break;
bbce6d69 1351 case NSPACEL:
3280af22 1352 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1353 if (do_utf8) {
1354 while (s < strend) {
1355 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1356 if (tmp && (norun || regtry(prog, s)))
1357 goto got_it;
1358 else
1359 tmp = doevery;
1360 }
bbce6d69 1361 else
ffc61ed2
JH
1362 tmp = 1;
1363 s += UTF8SKIP(s);
bbce6d69 1364 }
bbce6d69 1365 }
ffc61ed2
JH
1366 else {
1367 while (s < strend) {
1368 if (!isSPACE_LC(*s)) {
1369 if (tmp && (norun || regtry(prog, s)))
1370 goto got_it;
1371 else
1372 tmp = doevery;
1373 }
a0ed51b3 1374 else
ffc61ed2
JH
1375 tmp = 1;
1376 s++;
a0ed51b3 1377 }
a0ed51b3
LW
1378 }
1379 break;
a0d0e21e 1380 case DIGIT:
ffc61ed2 1381 if (do_utf8) {
8269fa76 1382 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1383 while (s < strend) {
3568d838 1384 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1385 if (tmp && (norun || regtry(prog, s)))
1386 goto got_it;
1387 else
1388 tmp = doevery;
1389 }
a0d0e21e 1390 else
ffc61ed2
JH
1391 tmp = 1;
1392 s += UTF8SKIP(s);
2b69d0c2 1393 }
a0d0e21e 1394 }
ffc61ed2
JH
1395 else {
1396 while (s < strend) {
1397 if (isDIGIT(*s)) {
1398 if (tmp && (norun || regtry(prog, s)))
1399 goto got_it;
1400 else
1401 tmp = doevery;
1402 }
a0ed51b3 1403 else
ffc61ed2
JH
1404 tmp = 1;
1405 s++;
a0ed51b3 1406 }
a0ed51b3
LW
1407 }
1408 break;
b8c5462f
JH
1409 case DIGITL:
1410 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1411 if (do_utf8) {
1412 while (s < strend) {
1413 if (isDIGIT_LC_utf8((U8*)s)) {
1414 if (tmp && (norun || regtry(prog, s)))
1415 goto got_it;
1416 else
1417 tmp = doevery;
1418 }
b8c5462f 1419 else
ffc61ed2
JH
1420 tmp = 1;
1421 s += UTF8SKIP(s);
b8c5462f 1422 }
b8c5462f 1423 }
ffc61ed2
JH
1424 else {
1425 while (s < strend) {
1426 if (isDIGIT_LC(*s)) {
1427 if (tmp && (norun || regtry(prog, s)))
1428 goto got_it;
1429 else
1430 tmp = doevery;
1431 }
b8c5462f 1432 else
ffc61ed2
JH
1433 tmp = 1;
1434 s++;
b8c5462f 1435 }
b8c5462f
JH
1436 }
1437 break;
a0d0e21e 1438 case NDIGIT:
ffc61ed2 1439 if (do_utf8) {
8269fa76 1440 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1441 while (s < strend) {
3568d838 1442 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1443 if (tmp && (norun || regtry(prog, s)))
1444 goto got_it;
1445 else
1446 tmp = doevery;
1447 }
a0d0e21e 1448 else
ffc61ed2
JH
1449 tmp = 1;
1450 s += UTF8SKIP(s);
a687059c 1451 }
a0d0e21e 1452 }
ffc61ed2
JH
1453 else {
1454 while (s < strend) {
1455 if (!isDIGIT(*s)) {
1456 if (tmp && (norun || regtry(prog, s)))
1457 goto got_it;
1458 else
1459 tmp = doevery;
1460 }
a0ed51b3 1461 else
ffc61ed2
JH
1462 tmp = 1;
1463 s++;
a0ed51b3 1464 }
a0ed51b3
LW
1465 }
1466 break;
b8c5462f
JH
1467 case NDIGITL:
1468 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1469 if (do_utf8) {
1470 while (s < strend) {
1471 if (!isDIGIT_LC_utf8((U8*)s)) {
1472 if (tmp && (norun || regtry(prog, s)))
1473 goto got_it;
1474 else
1475 tmp = doevery;
1476 }
b8c5462f 1477 else
ffc61ed2
JH
1478 tmp = 1;
1479 s += UTF8SKIP(s);
b8c5462f 1480 }
a0ed51b3 1481 }
ffc61ed2
JH
1482 else {
1483 while (s < strend) {
1484 if (!isDIGIT_LC(*s)) {
1485 if (tmp && (norun || regtry(prog, s)))
1486 goto got_it;
1487 else
1488 tmp = doevery;
1489 }
cf93c79d 1490 else
ffc61ed2
JH
1491 tmp = 1;
1492 s++;
b8c5462f 1493 }
b8c5462f
JH
1494 }
1495 break;
b3c9acc1 1496 default:
3c3eec57
GS
1497 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1498 break;
d6a28714 1499 }
6eb5f6b9
JH
1500 return 0;
1501 got_it:
1502 return s;
1503}
1504
1505/*
1506 - regexec_flags - match a regexp against a string
1507 */
1508I32
1509Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1510 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1511/* strend: pointer to null at end of string */
1512/* strbeg: real beginning of string */
1513/* minend: end of match must be >=minend after stringarg. */
1514/* data: May be used for some additional optimizations. */
1515/* nosave: For optimizations. */
1516{
6eb5f6b9
JH
1517 register char *s;
1518 register regnode *c;
1519 register char *startpos = stringarg;
6eb5f6b9
JH
1520 I32 minlen; /* must match at least this many chars */
1521 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1522 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1523 constant substr. */ /* CC */
1524 I32 end_shift = 0; /* Same for the end. */ /* CC */
1525 I32 scream_pos = -1; /* Internal iterator of scream. */
1526 char *scream_olds;
1527 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1528 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1529#ifdef DEBUGGING
ce333219 1530 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
2a782b5b 1531#endif
6eb5f6b9
JH
1532
1533 PL_regcc = 0;
1534
1535 cache_re(prog);
1536#ifdef DEBUGGING
aea4f609 1537 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1538#endif
1539
1540 /* Be paranoid... */
1541 if (prog == NULL || startpos == NULL) {
1542 Perl_croak(aTHX_ "NULL regexp parameter");
1543 return 0;
1544 }
1545
1546 minlen = prog->minlen;
a72c7584
JH
1547 if (strend - startpos < minlen) {
1548 DEBUG_r(PerlIO_printf(Perl_debug_log,
1549 "String too short [regexec_flags]...\n"));
1550 goto phooey;
1aa99e6b 1551 }
6eb5f6b9 1552
6eb5f6b9
JH
1553 /* Check validity of program. */
1554 if (UCHARAT(prog->program) != REG_MAGIC) {
1555 Perl_croak(aTHX_ "corrupted regexp program");
1556 }
1557
1558 PL_reg_flags = 0;
1559 PL_reg_eval_set = 0;
1560 PL_reg_maxiter = 0;
1561
1562 if (prog->reganch & ROPT_UTF8)
1563 PL_reg_flags |= RF_utf8;
1564
1565 /* Mark beginning of line for ^ and lookbehind. */
1566 PL_regbol = startpos;
1567 PL_bostr = strbeg;
1568 PL_reg_sv = sv;
1569
1570 /* Mark end of line for $ (and such) */
1571 PL_regeol = strend;
1572
1573 /* see how far we have to get to not match where we matched before */
1574 PL_regtill = startpos+minend;
1575
1576 /* We start without call_cc context. */
1577 PL_reg_call_cc = 0;
1578
1579 /* If there is a "must appear" string, look for it. */
1580 s = startpos;
1581
1582 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1583 MAGIC *mg;
1584
1585 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1586 PL_reg_ganch = startpos;
1587 else if (sv && SvTYPE(sv) >= SVt_PVMG
1588 && SvMAGIC(sv)
14befaf4
DM
1589 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1590 && mg->mg_len >= 0) {
6eb5f6b9
JH
1591 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1592 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1593 if (s > PL_reg_ganch)
6eb5f6b9
JH
1594 goto phooey;
1595 s = PL_reg_ganch;
1596 }
1597 }
1598 else /* pos() not defined */
1599 PL_reg_ganch = strbeg;
1600 }
1601
699c3c34
JH
1602 if (do_utf8 == (UTF!=0) &&
1603 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1604 re_scream_pos_data d;
1605
1606 d.scream_olds = &scream_olds;
1607 d.scream_pos = &scream_pos;
1608 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1609 if (!s) {
1610 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1611 goto phooey; /* not present */
3fa9c3d7 1612 }
6eb5f6b9
JH
1613 }
1614
2a782b5b 1615 DEBUG_r({
df1ffd02
JH
1616 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1617 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1618 if (!PL_colorset)
1619 reginitcolors();
1620 PerlIO_printf(Perl_debug_log,
1621 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1622 PL_colors[4],PL_colors[5],PL_colors[0],
1623 prog->precomp,
1624 PL_colors[1],
1625 (strlen(prog->precomp) > 60 ? "..." : ""),
1626 PL_colors[0],
1627 (int)(len > 60 ? 60 : len),
1628 s, PL_colors[1],
1629 (len > 60 ? "..." : "")
1630 );
1631 });
6eb5f6b9
JH
1632
1633 /* Simplest case: anchored match need be tried only once. */
1634 /* [unless only anchor is BOL and multiline is set] */
1635 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1636 if (s == startpos && regtry(prog, startpos))
1637 goto got_it;
1638 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1639 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1640 {
1641 char *end;
1642
1643 if (minlen)
1644 dontbother = minlen - 1;
1aa99e6b 1645 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1646 /* for multiline we only have to try after newlines */
1647 if (prog->check_substr) {
1648 if (s == startpos)
1649 goto after_try;
1650 while (1) {
1651 if (regtry(prog, s))
1652 goto got_it;
1653 after_try:
1654 if (s >= end)
1655 goto phooey;
1656 if (prog->reganch & RE_USE_INTUIT) {
1657 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1658 if (!s)
1659 goto phooey;
1660 }
1661 else
1662 s++;
1663 }
1664 } else {
1665 if (s > startpos)
1666 s--;
1667 while (s < end) {
1668 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1669 if (regtry(prog, s))
1670 goto got_it;
1671 }
1672 }
1673 }
1674 }
1675 goto phooey;
1676 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1677 if (regtry(prog, PL_reg_ganch))
1678 goto got_it;
1679 goto phooey;
1680 }
1681
1682 /* Messy cases: unanchored match. */
9041c2e3 1683 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1684 /* we have /x+whatever/ */
1685 /* it must be a one character string (XXXX Except UTF?) */
1686 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1687#ifdef DEBUGGING
1688 int did_match = 0;
1689#endif
1690
1aa99e6b 1691 if (do_utf8) {
6eb5f6b9
JH
1692 while (s < strend) {
1693 if (*s == ch) {
bf93d4cc 1694 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1695 if (regtry(prog, s)) goto got_it;
1696 s += UTF8SKIP(s);
1697 while (s < strend && *s == ch)
1698 s += UTF8SKIP(s);
1699 }
1700 s += UTF8SKIP(s);
1701 }
1702 }
1703 else {
1704 while (s < strend) {
1705 if (*s == ch) {
bf93d4cc 1706 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1707 if (regtry(prog, s)) goto got_it;
1708 s++;
1709 while (s < strend && *s == ch)
1710 s++;
1711 }
1712 s++;
1713 }
1714 }
b7953727 1715 DEBUG_r(if (!did_match)
bf93d4cc 1716 PerlIO_printf(Perl_debug_log,
b7953727
JH
1717 "Did not find anchored character...\n")
1718 );
6eb5f6b9
JH
1719 }
1720 /*SUPPRESS 560*/
1aa99e6b
IH
1721 else if (do_utf8 == (UTF!=0) &&
1722 (prog->anchored_substr != Nullsv
9041c2e3 1723 || (prog->float_substr != Nullsv
1aa99e6b 1724 && prog->float_max_offset < strend - s))) {
9041c2e3 1725 SV *must = prog->anchored_substr
6eb5f6b9 1726 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1727 I32 back_max =
6eb5f6b9 1728 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1729 I32 back_min =
6eb5f6b9 1730 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1731 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1732 -(I32)(CHR_SVLEN(must)
1aa99e6b 1733 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1734 char *last1; /* Last position checked before */
bf93d4cc
GS
1735#ifdef DEBUGGING
1736 int did_match = 0;
1737#endif
6eb5f6b9
JH
1738
1739 if (s > PL_bostr)
1740 last1 = HOPc(s, -1);
1741 else
1742 last1 = s - 1; /* bogus */
1743
1744 /* XXXX check_substr already used to find `s', can optimize if
1745 check_substr==must. */
1746 scream_pos = -1;
1747 dontbother = end_shift;
1748 strend = HOPc(strend, -dontbother);
1749 while ( (s <= last) &&
9041c2e3 1750 ((flags & REXEC_SCREAM)
1aa99e6b 1751 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1752 end_shift, &scream_pos, 0))
1aa99e6b 1753 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1754 (unsigned char*)strend, must,
6eb5f6b9 1755 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1756 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1757 if (HOPc(s, -back_max) > last1) {
1758 last1 = HOPc(s, -back_min);
1759 s = HOPc(s, -back_max);
1760 }
1761 else {
1762 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1763
1764 last1 = HOPc(s, -back_min);
1765 s = t;
1766 }
1aa99e6b 1767 if (do_utf8) {
6eb5f6b9
JH
1768 while (s <= last1) {
1769 if (regtry(prog, s))
1770 goto got_it;
1771 s += UTF8SKIP(s);
1772 }
1773 }
1774 else {
1775 while (s <= last1) {
1776 if (regtry(prog, s))
1777 goto got_it;
1778 s++;
1779 }
1780 }
1781 }
b7953727
JH
1782 DEBUG_r(if (!did_match)
1783 PerlIO_printf(Perl_debug_log,
1784 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1785 ((must == prog->anchored_substr)
1786 ? "anchored" : "floating"),
1787 PL_colors[0],
1788 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1789 SvPVX(must),
b7953727
JH
1790 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1791 );
6eb5f6b9
JH
1792 goto phooey;
1793 }
155aba94 1794 else if ((c = prog->regstclass)) {
66e933ab
GS
1795 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1796 /* don't bother with what can't match */
6eb5f6b9 1797 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1798 DEBUG_r({
1799 SV *prop = sv_newmortal();
1800 regprop(prop, c);
2a782b5b 1801 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1802 });
6eb5f6b9
JH
1803 if (find_byclass(prog, c, s, strend, startpos, 0))
1804 goto got_it;
bf93d4cc 1805 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1806 }
1807 else {
1808 dontbother = 0;
1809 if (prog->float_substr != Nullsv) { /* Trim the end. */
1810 char *last;
d6a28714
JH
1811
1812 if (flags & REXEC_SCREAM) {
1813 last = screaminstr(sv, prog->float_substr, s - strbeg,
1814 end_shift, &scream_pos, 1); /* last one */
1815 if (!last)
ffc61ed2 1816 last = scream_olds; /* Only one occurrence. */
b8c5462f 1817 }
d6a28714
JH
1818 else {
1819 STRLEN len;
1820 char *little = SvPV(prog->float_substr, len);
1821
1822 if (SvTAIL(prog->float_substr)) {
1823 if (memEQ(strend - len + 1, little, len - 1))
1824 last = strend - len + 1;
1825 else if (!PL_multiline)
9041c2e3 1826 last = memEQ(strend - len, little, len)
d6a28714 1827 ? strend - len : Nullch;
b8c5462f 1828 else
d6a28714
JH
1829 goto find_last;
1830 } else {
1831 find_last:
9041c2e3 1832 if (len)
d6a28714 1833 last = rninstr(s, strend, little, little + len);
b8c5462f 1834 else
d6a28714 1835 last = strend; /* matching `$' */
b8c5462f 1836 }
b8c5462f 1837 }
bf93d4cc
GS
1838 if (last == NULL) {
1839 DEBUG_r(PerlIO_printf(Perl_debug_log,
1840 "%sCan't trim the tail, match fails (should not happen)%s\n",
1841 PL_colors[4],PL_colors[5]));
1842 goto phooey; /* Should not happen! */
1843 }
d6a28714
JH
1844 dontbother = strend - last + prog->float_min_offset;
1845 }
1846 if (minlen && (dontbother < minlen))
1847 dontbother = minlen - 1;
1848 strend -= dontbother; /* this one's always in bytes! */
1849 /* We don't know much -- general case. */
1aa99e6b 1850 if (do_utf8) {
d6a28714
JH
1851 for (;;) {
1852 if (regtry(prog, s))
1853 goto got_it;
1854 if (s >= strend)
1855 break;
b8c5462f 1856 s += UTF8SKIP(s);
d6a28714
JH
1857 };
1858 }
1859 else {
1860 do {
1861 if (regtry(prog, s))
1862 goto got_it;
1863 } while (s++ < strend);
1864 }
1865 }
1866
1867 /* Failure. */
1868 goto phooey;
1869
1870got_it:
1871 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1872
1873 if (PL_reg_eval_set) {
1874 /* Preserve the current value of $^R */
1875 if (oreplsv != GvSV(PL_replgv))
1876 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1877 restored, the value remains
1878 the same. */
acfe0abc 1879 restore_pos(aTHX_ 0);
d6a28714
JH
1880 }
1881
1882 /* make sure $`, $&, $', and $digit will work later */
1883 if ( !(flags & REXEC_NOT_FIRST) ) {
1884 if (RX_MATCH_COPIED(prog)) {
1885 Safefree(prog->subbeg);
1886 RX_MATCH_COPIED_off(prog);
1887 }
1888 if (flags & REXEC_COPY_STR) {
1889 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1890
1891 s = savepvn(strbeg, i);
1892 prog->subbeg = s;
1893 prog->sublen = i;
1894 RX_MATCH_COPIED_on(prog);
1895 }
1896 else {
1897 prog->subbeg = strbeg;
1898 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1899 }
1900 }
9041c2e3 1901
d6a28714
JH
1902 return 1;
1903
1904phooey:
bf93d4cc
GS
1905 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1906 PL_colors[4],PL_colors[5]));
d6a28714 1907 if (PL_reg_eval_set)
acfe0abc 1908 restore_pos(aTHX_ 0);
d6a28714
JH
1909 return 0;
1910}
1911
1912/*
1913 - regtry - try match at specific point
1914 */
1915STATIC I32 /* 0 failure, 1 success */
1916S_regtry(pTHX_ regexp *prog, char *startpos)
1917{
d6a28714
JH
1918 register I32 i;
1919 register I32 *sp;
1920 register I32 *ep;
1921 CHECKPOINT lastcp;
1922
02db2b7b
IZ
1923#ifdef DEBUGGING
1924 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1925#endif
d6a28714
JH
1926 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1927 MAGIC *mg;
1928
1929 PL_reg_eval_set = RS_init;
1930 DEBUG_r(DEBUG_s(
b900a521
JH
1931 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1932 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1933 ));
e8347627 1934 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1935 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1936 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1937 SAVETMPS;
1938 /* Apparently this is not needed, judging by wantarray. */
e8347627 1939 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1940 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1941
1942 if (PL_reg_sv) {
1943 /* Make $_ available to executed code. */
1944 if (PL_reg_sv != DEFSV) {
4d1ff10f 1945 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1946 SAVESPTR(DEFSV);
1947 DEFSV = PL_reg_sv;
b8c5462f 1948 }
d6a28714 1949
9041c2e3 1950 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1951 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1952 /* prepare for quick setting of pos */
14befaf4
DM
1953 sv_magic(PL_reg_sv, (SV*)0,
1954 PERL_MAGIC_regex_global, Nullch, 0);
1955 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1956 mg->mg_len = -1;
b8c5462f 1957 }
d6a28714
JH
1958 PL_reg_magic = mg;
1959 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1960 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1961 }
09687e5a 1962 if (!PL_reg_curpm) {
0f79a09d 1963 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1964#ifdef USE_ITHREADS
1965 {
1966 SV* repointer = newSViv(0);
577e12cc 1967 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1968 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1969 av_push(PL_regex_padav,repointer);
1970 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1971 PL_regex_pad = AvARRAY(PL_regex_padav);
1972 }
1973#endif
1974 }
aaa362c4 1975 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1976 PL_reg_oldcurpm = PL_curpm;
1977 PL_curpm = PL_reg_curpm;
1978 if (RX_MATCH_COPIED(prog)) {
1979 /* Here is a serious problem: we cannot rewrite subbeg,
1980 since it may be needed if this match fails. Thus
1981 $` inside (?{}) could fail... */
1982 PL_reg_oldsaved = prog->subbeg;
1983 PL_reg_oldsavedlen = prog->sublen;
1984 RX_MATCH_COPIED_off(prog);
1985 }
1986 else
1987 PL_reg_oldsaved = Nullch;
1988 prog->subbeg = PL_bostr;
1989 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1990 }
1991 prog->startp[0] = startpos - PL_bostr;
1992 PL_reginput = startpos;
1993 PL_regstartp = prog->startp;
1994 PL_regendp = prog->endp;
1995 PL_reglastparen = &prog->lastparen;
a01268b5 1996 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1997 prog->lastparen = 0;
1998 PL_regsize = 0;
1999 DEBUG_r(PL_reg_starttry = startpos);
2000 if (PL_reg_start_tmpl <= prog->nparens) {
2001 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2002 if(PL_reg_start_tmp)
2003 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2004 else
2005 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2006 }
2007
128e8167
JH
2008#ifdef DEBUGGING
2009 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2010 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2011 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2012#endif
2013
d6a28714
JH
2014 /* XXXX What this code is doing here?!!! There should be no need
2015 to do this again and again, PL_reglastparen should take care of
3dd2943c 2016 this! --ilya*/
dafc8851
JH
2017
2018 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2019 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2020 * PL_reglastparen), is not needed at all by the test suite
2021 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2022 * enough, for building DynaLoader, or otherwise this
2023 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2024 * will happen. Meanwhile, this code *is* needed for the
2025 * above-mentioned test suite tests to succeed. The common theme
2026 * on those tests seems to be returning null fields from matches.
2027 * --jhi */
dafc8851 2028#if 1
d6a28714
JH
2029 sp = prog->startp;
2030 ep = prog->endp;
2031 if (prog->nparens) {
09e8ae3b 2032 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2033 *++sp = -1;
2034 *++ep = -1;
2035 }
2036 }
dafc8851 2037#endif
02db2b7b 2038 REGCP_SET(lastcp);
d6a28714
JH
2039 if (regmatch(prog->program + 1)) {
2040 prog->endp[0] = PL_reginput - PL_bostr;
2041 return 1;
2042 }
02db2b7b 2043 REGCP_UNWIND(lastcp);
d6a28714
JH
2044 return 0;
2045}
2046
02db2b7b
IZ
2047#define RE_UNWIND_BRANCH 1
2048#define RE_UNWIND_BRANCHJ 2
2049
2050union re_unwind_t;
2051
2052typedef struct { /* XX: makes sense to enlarge it... */
2053 I32 type;
2054 I32 prev;
2055 CHECKPOINT lastcp;
2056} re_unwind_generic_t;
2057
2058typedef struct {
2059 I32 type;
2060 I32 prev;
2061 CHECKPOINT lastcp;
2062 I32 lastparen;
2063 regnode *next;
2064 char *locinput;
2065 I32 nextchr;
2066#ifdef DEBUGGING
2067 int regindent;
2068#endif
2069} re_unwind_branch_t;
2070
2071typedef union re_unwind_t {
2072 I32 type;
2073 re_unwind_generic_t generic;
2074 re_unwind_branch_t branch;
2075} re_unwind_t;
2076
8ba1375e
MJD
2077#define sayYES goto yes
2078#define sayNO goto no
2079#define sayYES_FINAL goto yes_final
2080#define sayYES_LOUD goto yes_loud
2081#define sayNO_FINAL goto no_final
2082#define sayNO_SILENT goto do_no
2083#define saySAME(x) if (x) goto yes; else goto no
2084
2085#define REPORT_CODE_OFF 24
2086
d6a28714
JH
2087/*
2088 - regmatch - main matching routine
2089 *
2090 * Conceptually the strategy is simple: check to see whether the current
2091 * node matches, call self recursively to see whether the rest matches,
2092 * and then act accordingly. In practice we make some effort to avoid
2093 * recursion, in particular by going through "ordinary" nodes (that don't
2094 * need to know whether the rest of the match failed) by a loop instead of
2095 * by recursion.
2096 */
2097/* [lwall] I've hoisted the register declarations to the outer block in order to
2098 * maybe save a little bit of pushing and popping on the stack. It also takes
2099 * advantage of machines that use a register save mask on subroutine entry.
2100 */
2101STATIC I32 /* 0 failure, 1 success */
2102S_regmatch(pTHX_ regnode *prog)
2103{
d6a28714
JH
2104 register regnode *scan; /* Current node. */
2105 regnode *next; /* Next node. */
2106 regnode *inner; /* Next node in internal branch. */
2107 register I32 nextchr; /* renamed nextchr - nextchar colides with
2108 function of same name */
2109 register I32 n; /* no or next */
b7953727
JH
2110 register I32 ln = 0; /* len or last */
2111 register char *s = Nullch; /* operand or save */
d6a28714 2112 register char *locinput = PL_reginput;
b7953727 2113 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2114 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2115 I32 unwind = 0;
b7953727 2116#if 0
02db2b7b 2117 I32 firstcp = PL_savestack_ix;
b7953727 2118#endif
53c4c00c 2119 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2120#ifdef DEBUGGING
ce333219
JH
2121 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2122 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2123 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2124#endif
02db2b7b 2125
d6a28714
JH
2126#ifdef DEBUGGING
2127 PL_regindent++;
2128#endif
2129
2130 /* Note that nextchr is a byte even in UTF */
2131 nextchr = UCHARAT(locinput);
2132 scan = prog;
2133 while (scan != NULL) {
8ba1375e 2134
2a782b5b 2135 DEBUG_r( {
d6a28714
JH
2136 SV *prop = sv_newmortal();
2137 int docolor = *PL_colors[0];
2138 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2139 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2140 /* The part of the string before starttry has one color
2141 (pref0_len chars), between starttry and current
2142 position another one (pref_len - pref0_len chars),
2143 after the current position the third one.
2144 We assume that pref0_len <= pref_len, otherwise we
2145 decrease pref0_len. */
9041c2e3 2146 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2147 ? (5 + taill) - l : locinput - PL_bostr;
2148 int pref0_len;
d6a28714 2149
df1ffd02 2150 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2151 pref_len++;
2152 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2153 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2154 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2155 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2156 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2157 l--;
d6a28714
JH
2158 if (pref0_len < 0)
2159 pref0_len = 0;
2160 if (pref0_len > pref_len)
2161 pref0_len = pref_len;
2162 regprop(prop, scan);
2a782b5b
JH
2163 {
2164 char *s0 =
df1ffd02 2165 do_utf8 ?
2a782b5b
JH
2166 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2167 pref0_len, 60, 0) :
2168 locinput - pref_len;
df1ffd02
JH
2169 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2170 char *s1 = do_utf8 ?
2a782b5b
JH
2171 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2172 pref_len - pref0_len, 60, 0) :
2173 locinput - pref_len + pref0_len;
df1ffd02
JH
2174 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2175 char *s2 = do_utf8 ?
2a782b5b
JH
2176 pv_uni_display(dsv2, (U8*)locinput,
2177 PL_regeol - locinput, 60, 0) :
2178 locinput;
df1ffd02 2179 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2180 PerlIO_printf(Perl_debug_log,
2181 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2182 (IV)(locinput - PL_bostr),
2183 PL_colors[4],
2184 len0, s0,
2185 PL_colors[5],
2186 PL_colors[2],
2187 len1, s1,
2188 PL_colors[3],
2189 (docolor ? "" : "> <"),
2190 PL_colors[0],
2191 len2, s2,
2192 PL_colors[1],
2193 15 - l - pref_len + 1,
2194 "",
2195 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2196 SvPVX(prop));
2197 }
2198 });
d6a28714
JH
2199
2200 next = scan + NEXT_OFF(scan);
2201 if (next == scan)
2202 next = NULL;
2203
2204 switch (OP(scan)) {
2205 case BOL:
12d33761
HS
2206 if (locinput == PL_bostr || (PL_multiline &&
2207 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2208 {
2209 /* regtill = regbol; */
b8c5462f
JH
2210 break;
2211 }
d6a28714
JH
2212 sayNO;
2213 case MBOL:
12d33761
HS
2214 if (locinput == PL_bostr ||
2215 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2216 {
b8c5462f
JH
2217 break;
2218 }
d6a28714
JH
2219 sayNO;
2220 case SBOL:
c2a73568 2221 if (locinput == PL_bostr)
b8c5462f 2222 break;
d6a28714
JH
2223 sayNO;
2224 case GPOS:
2225 if (locinput == PL_reg_ganch)
2226 break;
2227 sayNO;
2228 case EOL:
2229 if (PL_multiline)
2230 goto meol;
2231 else
2232 goto seol;
2233 case MEOL:
2234 meol:
2235 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2236 sayNO;
b8c5462f 2237 break;
d6a28714
JH
2238 case SEOL:
2239 seol:
2240 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2241 sayNO;
d6a28714 2242 if (PL_regeol - locinput > 1)
b8c5462f 2243 sayNO;
b8c5462f 2244 break;
d6a28714
JH
2245 case EOS:
2246 if (PL_regeol != locinput)
b8c5462f 2247 sayNO;
d6a28714 2248 break;
ffc61ed2 2249 case SANY:
d6a28714 2250 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2251 sayNO;
f33976b4
DB
2252 if (do_utf8) {
2253 locinput += PL_utf8skip[nextchr];
2254 if (locinput > PL_regeol)
2255 sayNO;
2256 nextchr = UCHARAT(locinput);
2257 }
2258 else
2259 nextchr = UCHARAT(++locinput);
2260 break;
2261 case CANY:
2262 if (!nextchr && locinput >= PL_regeol)
2263 sayNO;
b8c5462f 2264 nextchr = UCHARAT(++locinput);
a0d0e21e 2265 break;
ffc61ed2 2266 case REG_ANY:
1aa99e6b
IH
2267 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2268 sayNO;
2269 if (do_utf8) {
b8c5462f 2270 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2271 if (locinput > PL_regeol)
2272 sayNO;
a0ed51b3 2273 nextchr = UCHARAT(locinput);
a0ed51b3 2274 }
1aa99e6b
IH
2275 else
2276 nextchr = UCHARAT(++locinput);
a0ed51b3 2277 break;
d6a28714 2278 case EXACT:
cd439c50
IZ
2279 s = STRING(scan);
2280 ln = STR_LEN(scan);
1aa99e6b 2281 if (do_utf8 != (UTF!=0)) {
bc517b45 2282 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2283 char *l = locinput;
2284 char *e = s + ln;
bc517b45 2285 STRLEN ulen;
a72c7584 2286
5ff6fc6d
JH
2287 if (do_utf8) {
2288 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2289 while (s < e) {
2290 if (l >= PL_regeol)
5ff6fc6d
JH
2291 sayNO;
2292 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2293 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2294 sayNO;
bc517b45 2295 l += ulen;
5ff6fc6d 2296 s ++;
1aa99e6b 2297 }
5ff6fc6d
JH
2298 }
2299 else {
2300 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2301 while (s < e) {
2302 if (l >= PL_regeol)
2303 sayNO;
5ff6fc6d 2304 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2305 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2306 sayNO;
bc517b45 2307 s += ulen;
a72c7584 2308 l ++;
1aa99e6b 2309 }
5ff6fc6d 2310 }
1aa99e6b
IH
2311 locinput = l;
2312 nextchr = UCHARAT(locinput);
2313 break;
2314 }
bc517b45 2315 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2316 /* Inline the first character, for speed. */
2317 if (UCHARAT(s) != nextchr)
2318 sayNO;
2319 if (PL_regeol - locinput < ln)
2320 sayNO;
2321 if (ln > 1 && memNE(s, locinput, ln))
2322 sayNO;
2323 locinput += ln;
2324 nextchr = UCHARAT(locinput);
2325 break;
2326 case EXACTFL:
b8c5462f
JH
2327 PL_reg_flags |= RF_tainted;
2328 /* FALL THROUGH */
d6a28714 2329 case EXACTF:
cd439c50
IZ
2330 s = STRING(scan);
2331 ln = STR_LEN(scan);
d6a28714 2332
bc517b45 2333 {
d6a28714 2334 char *l = locinput;
bc517b45
JH
2335 char *e = s + ln;
2336 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
2337
2338 if (do_utf8 != (UTF!=0)) {
2339 /* The target and the pattern have differing utf8ness. */
2340 STRLEN ulen1, ulen2;
2341 UV cs, cl;
2342
2343 if (do_utf8) {
2344 /* The target is utf8, the pattern is not utf8. */
2345 while (s < e) {
2346 if (l >= PL_regeol)
2347 sayNO;
2348
2349 cs = to_uni_fold(NATIVE_TO_UNI(*(U8*)s),
2350 (U8*)s, &ulen1);
2351 cl = utf8_to_uvchr((U8*)l, &ulen2);
2352
2353 if (cs != cl) {
2354 cl = to_uni_fold(cl, (U8*)l, &ulen2);
2355 if (ulen1 != ulen2 || cs != cl)
2356 sayNO;
2357 }
2358 l += ulen1;
2359 s ++;
2360 }
2361 }
2362 else {
2363 /* The target is not utf8, the pattern is utf8. */
2364 while (s < e) {
2365 if (l >= PL_regeol)
2366 sayNO;
2367
2368 cs = utf8_to_uvchr((U8*)s, &ulen1);
2369
2370 cl = to_uni_fold(NATIVE_TO_UNI(*(U8*)l),
2371 (U8*)l, &ulen2);
2372
2373 if (cs != cl) {
2374 cs = to_uni_fold(cs, (U8*)s, &ulen1);
2375 if (ulen1 != ulen2 || cs != cl)
2376 sayNO;
2377 }
2378 l ++;
2379 s += ulen1;
2380 }
2381 }
2382 locinput = l;
2383 nextchr = UCHARAT(locinput);
2384 break;
2385 }
2386
2387 if (do_utf8 && UTF) {
2388 /* Both the target and the pattern are utf8. */
2389 STRLEN ulen;
2390
2391 while (s < e) {
2392 if (l >= PL_regeol)
2393 sayNO;
2394 if (UTF8SKIP(s) != UTF8SKIP(l) ||
2395 memNE(s, (char*)l, UTF8SKIP(s))) {
80aecb99
JH
2396 U8 lfoldbuf[UTF8_MAXLEN_FOLD+1];
2397 STRLEN lfoldlen;
2398
2399 to_utf8_fold((U8*)l, lfoldbuf, &lfoldlen);
2400 if (UTF8SKIP(s) != lfoldlen ||
2401 memNE(s, (char*)lfoldbuf, lfoldlen)) {
2402 U8 sfoldbuf[UTF8_MAXLEN_FOLD+1];
2403 STRLEN sfoldlen;
2404
2405 to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen);
2406 if (sfoldlen != lfoldlen ||
2407 memNE((char*)sfoldbuf,
2408 (char*)lfoldbuf, lfoldlen))
2409 sayNO;
2410 }
bc517b45
JH
2411 }
2412 l += UTF8SKIP(l);
2413 s += UTF8SKIP(s);
2414 }
2415 locinput = l;
2416 nextchr = UCHARAT(locinput);
2417 break;
b8c5462f 2418 }
a0ed51b3 2419 }
d6a28714 2420
bc517b45
JH
2421 /* Neither the target and the pattern are utf8. */
2422
d6a28714
JH
2423 /* Inline the first character, for speed. */
2424 if (UCHARAT(s) != nextchr &&
2425 UCHARAT(s) != ((OP(scan) == EXACTF)
2426 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2427 sayNO;
d6a28714 2428 if (PL_regeol - locinput < ln)
b8c5462f 2429 sayNO;
d6a28714
JH
2430 if (ln > 1 && (OP(scan) == EXACTF
2431 ? ibcmp(s, locinput, ln)
2432 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2433 sayNO;
d6a28714
JH
2434 locinput += ln;
2435 nextchr = UCHARAT(locinput);
a0d0e21e 2436 break;
d6a28714 2437 case ANYOF:
ffc61ed2
JH
2438 if (do_utf8) {
2439 if (!reginclass(scan, (U8*)locinput, do_utf8))
2440 sayNO;
2441 if (locinput >= PL_regeol)
2442 sayNO;
2443 locinput += PL_utf8skip[nextchr];
b8c5462f 2444 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2445 }
2446 else {
2447 if (nextchr < 0)
2448 nextchr = UCHARAT(locinput);
2449 if (!reginclass(scan, (U8*)locinput, do_utf8))
2450 sayNO;
2451 if (!nextchr && locinput >= PL_regeol)
2452 sayNO;
2453 nextchr = UCHARAT(++locinput);
2454 }
b8c5462f 2455 break;
d6a28714 2456 case ALNUML:
b8c5462f
JH
2457 PL_reg_flags |= RF_tainted;
2458 /* FALL THROUGH */
d6a28714 2459 case ALNUM:
b8c5462f 2460 if (!nextchr)
4633a7c4 2461 sayNO;
ffc61ed2 2462 if (do_utf8) {
ad24be35 2463 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2464 if (!(OP(scan) == ALNUM
3568d838 2465 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2466 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2467 {
2468 sayNO;
a0ed51b3 2469 }
b8c5462f 2470 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2471 nextchr = UCHARAT(locinput);
2472 break;
2473 }
ffc61ed2 2474 if (!(OP(scan) == ALNUM
d6a28714 2475 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2476 sayNO;
b8c5462f 2477 nextchr = UCHARAT(++locinput);
a0d0e21e 2478 break;
d6a28714 2479 case NALNUML:
b8c5462f
JH
2480 PL_reg_flags |= RF_tainted;
2481 /* FALL THROUGH */
d6a28714
JH
2482 case NALNUM:
2483 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2484 sayNO;
ffc61ed2 2485 if (do_utf8) {
8269fa76 2486 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2487 if (OP(scan) == NALNUM
3568d838 2488 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2489 : isALNUM_LC_utf8((U8*)locinput))
2490 {
b8c5462f 2491 sayNO;
d6a28714 2492 }
b8c5462f
JH
2493 locinput += PL_utf8skip[nextchr];
2494 nextchr = UCHARAT(locinput);
2495 break;
2496 }
ffc61ed2 2497 if (OP(scan) == NALNUM
d6a28714 2498 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2499 sayNO;
76e3520e 2500 nextchr = UCHARAT(++locinput);
a0d0e21e 2501 break;
d6a28714
JH
2502 case BOUNDL:
2503 case NBOUNDL:
3280af22 2504 PL_reg_flags |= RF_tainted;
bbce6d69 2505 /* FALL THROUGH */
d6a28714
JH
2506 case BOUND:
2507 case NBOUND:
2508 /* was last char in word? */
ffc61ed2 2509 if (do_utf8) {
12d33761
HS
2510 if (locinput == PL_bostr)
2511 ln = '\n';
ffc61ed2
JH
2512 else {
2513 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2514
2b9d42f0 2515 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2516 }
2517 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2518 ln = isALNUM_uni(ln);
8269fa76 2519 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2520 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2521 }
2522 else {
9041c2e3 2523 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2524 n = isALNUM_LC_utf8((U8*)locinput);
2525 }
a0ed51b3 2526 }
d6a28714 2527 else {
12d33761
HS
2528 ln = (locinput != PL_bostr) ?
2529 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2530 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2531 ln = isALNUM(ln);
2532 n = isALNUM(nextchr);
2533 }
2534 else {
2535 ln = isALNUM_LC(ln);
2536 n = isALNUM_LC(nextchr);
2537 }
d6a28714 2538 }
ffc61ed2
JH
2539 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2540 OP(scan) == BOUNDL))
2541 sayNO;
a0ed51b3 2542 break;
d6a28714 2543 case SPACEL:
3280af22 2544 PL_reg_flags |= RF_tainted;
bbce6d69 2545 /* FALL THROUGH */
d6a28714 2546 case SPACE:
9442cb0e 2547 if (!nextchr)
4633a7c4 2548 sayNO;
1aa99e6b 2549 if (do_utf8) {
fd400ab9 2550 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2551 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2552 if (!(OP(scan) == SPACE
3568d838 2553 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2554 : isSPACE_LC_utf8((U8*)locinput)))
2555 {
2556 sayNO;
2557 }
2558 locinput += PL_utf8skip[nextchr];
2559 nextchr = UCHARAT(locinput);
2560 break;
d6a28714 2561 }
ffc61ed2
JH
2562 if (!(OP(scan) == SPACE
2563 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2564 sayNO;
2565 nextchr = UCHARAT(++locinput);
2566 }
2567 else {
2568 if (!(OP(scan) == SPACE
2569 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2570 sayNO;
2571 nextchr = UCHARAT(++locinput);
a0ed51b3 2572 }
a0ed51b3 2573 break;
d6a28714 2574 case NSPACEL:
3280af22 2575 PL_reg_flags |= RF_tainted;
bbce6d69 2576 /* FALL THROUGH */
d6a28714 2577 case NSPACE:
9442cb0e 2578 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2579 sayNO;
1aa99e6b 2580 if (do_utf8) {
8269fa76 2581 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2582 if (OP(scan) == NSPACE
3568d838 2583 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2584 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2585 {
2586 sayNO;
2587 }
2588 locinput += PL_utf8skip[nextchr];
2589 nextchr = UCHARAT(locinput);
2590 break;
a0ed51b3 2591 }
ffc61ed2 2592 if (OP(scan) == NSPACE
d6a28714 2593 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2594 sayNO;
76e3520e 2595 nextchr = UCHARAT(++locinput);
a0d0e21e 2596 break;
d6a28714 2597 case DIGITL:
a0ed51b3
LW
2598 PL_reg_flags |= RF_tainted;
2599 /* FALL THROUGH */
d6a28714 2600 case DIGIT:
9442cb0e 2601 if (!nextchr)
a0ed51b3 2602 sayNO;
1aa99e6b 2603 if (do_utf8) {
8269fa76 2604 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2605 if (!(OP(scan) == DIGIT
3568d838 2606 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2607 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2608 {
a0ed51b3 2609 sayNO;
dfe13c55 2610 }
6f06b55f 2611 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2612 nextchr = UCHARAT(locinput);
2613 break;
2614 }
ffc61ed2 2615 if (!(OP(scan) == DIGIT
9442cb0e 2616 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2617 sayNO;
2618 nextchr = UCHARAT(++locinput);
2619 break;
d6a28714 2620 case NDIGITL:
b8c5462f
JH
2621 PL_reg_flags |= RF_tainted;
2622 /* FALL THROUGH */
d6a28714 2623 case NDIGIT:
9442cb0e 2624 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2625 sayNO;
1aa99e6b 2626 if (do_utf8) {
8269fa76 2627 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2628 if (OP(scan) == NDIGIT
3568d838 2629 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2630 : isDIGIT_LC_utf8((U8*)locinput))
2631 {
a0ed51b3 2632 sayNO;
9442cb0e 2633 }
6f06b55f 2634 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2635 nextchr = UCHARAT(locinput);
2636 break;
2637 }
ffc61ed2 2638 if (OP(scan) == NDIGIT
9442cb0e 2639 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2640 sayNO;
2641 nextchr = UCHARAT(++locinput);
2642 break;
2643 case CLUMP:
b7c83a7e 2644 if (locinput >= PL_regeol)
a0ed51b3 2645 sayNO;
b7c83a7e
JH
2646 if (do_utf8) {
2647 LOAD_UTF8_CHARCLASS(mark,"~");
2648 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2649 sayNO;
2650 locinput += PL_utf8skip[nextchr];
2651 while (locinput < PL_regeol &&
2652 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2653 locinput += UTF8SKIP(locinput);
2654 if (locinput > PL_regeol)
2655 sayNO;
eb08e2da
JH
2656 }
2657 else
2658 locinput++;
a0ed51b3
LW
2659 nextchr = UCHARAT(locinput);
2660 break;
c8756f30 2661 case REFFL:
3280af22 2662 PL_reg_flags |= RF_tainted;
c8756f30 2663 /* FALL THROUGH */
c277df42 2664 case REF:
c8756f30 2665 case REFF:
c277df42 2666 n = ARG(scan); /* which paren pair */
cf93c79d 2667 ln = PL_regstartp[n];
2c2d71f5 2668 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2669 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2670 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2671 if (ln == PL_regendp[n])
a0d0e21e 2672 break;
a0ed51b3 2673
cf93c79d 2674 s = PL_bostr + ln;
1aa99e6b 2675 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2676 char *l = locinput;
cf93c79d 2677 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2678 /*
2679 * Note that we can't do the "other character" lookup trick as
2680 * in the 8-bit case (no pun intended) because in Unicode we
2681 * have to map both upper and title case to lower case.
2682 */
2683 if (OP(scan) == REFF) {
a2a2844f 2684 STRLEN ulen1, ulen2;
e7ae6809
JH
2685 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2686 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2687 while (s < e) {
2688 if (l >= PL_regeol)
2689 sayNO;
a2a2844f
JH
2690 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2691 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2692 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2693 sayNO;
a2a2844f
JH
2694 s += ulen1;
2695 l += ulen2;
a0ed51b3
LW
2696 }
2697 }
2698 locinput = l;
2699 nextchr = UCHARAT(locinput);
2700 break;
2701 }
2702
a0d0e21e 2703 /* Inline the first character, for speed. */
76e3520e 2704 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2705 (OP(scan) == REF ||
2706 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2707 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2708 sayNO;
cf93c79d 2709 ln = PL_regendp[n] - ln;
3280af22 2710 if (locinput + ln > PL_regeol)
4633a7c4 2711 sayNO;
c8756f30
AK
2712 if (ln > 1 && (OP(scan) == REF
2713 ? memNE(s, locinput, ln)
2714 : (OP(scan) == REFF
2715 ? ibcmp(s, locinput, ln)
2716 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2717 sayNO;
a0d0e21e 2718 locinput += ln;
76e3520e 2719 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2720 break;
2721
2722 case NOTHING:
c277df42 2723 case TAIL:
a0d0e21e
LW
2724 break;
2725 case BACK:
2726 break;
c277df42
IZ
2727 case EVAL:
2728 {
2729 dSP;
533c011a 2730 OP_4tree *oop = PL_op;
3280af22
NIS
2731 COP *ocurcop = PL_curcop;
2732 SV **ocurpad = PL_curpad;
c277df42 2733 SV *ret;
9041c2e3 2734
c277df42 2735 n = ARG(scan);
533c011a 2736 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2737 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2738 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2739 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2740
8e5e9ebe
RGS
2741 {
2742 SV **before = SP;
2743 CALLRUNOPS(aTHX); /* Scalar context. */
2744 SPAGAIN;
2745 if (SP == before)
2746 ret = Nullsv; /* protect against empty (?{}) blocks. */
2747 else {
2748 ret = POPs;
2749 PUTBACK;
2750 }
2751 }
2752
0f5d15d6
IZ
2753 PL_op = oop;
2754 PL_curpad = ocurpad;
2755 PL_curcop = ocurcop;
c277df42 2756 if (logical) {
0f5d15d6
IZ
2757 if (logical == 2) { /* Postponed subexpression. */
2758 regexp *re;
22c35a8c 2759 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2760 re_cc_state state;
0f5d15d6
IZ
2761 CHECKPOINT cp, lastcp;
2762
2763 if(SvROK(ret) || SvRMAGICAL(ret)) {
2764 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2765
2766 if(SvMAGICAL(sv))
14befaf4 2767 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2768 }
2769 if (mg) {
2770 re = (regexp *)mg->mg_obj;
df0003d4 2771 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2772 }
2773 else {
2774 STRLEN len;
2775 char *t = SvPV(ret, len);
2776 PMOP pm;
2777 char *oprecomp = PL_regprecomp;
2778 I32 osize = PL_regsize;
2779 I32 onpar = PL_regnpar;
2780
5fcd1c1b 2781 Zero(&pm, 1, PMOP);
cea2e8a9 2782 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2783 if (!(SvFLAGS(ret)
0f5d15d6 2784 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2785 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2786 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2787 PL_regprecomp = oprecomp;
2788 PL_regsize = osize;
2789 PL_regnpar = onpar;
2790 }
2791 DEBUG_r(
9041c2e3 2792 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2793 "Entering embedded `%s%.60s%s%s'\n",
2794 PL_colors[0],
2795 re->precomp,
2796 PL_colors[1],
2797 (strlen(re->precomp) > 60 ? "..." : ""))
2798 );
2799 state.node = next;
2800 state.prev = PL_reg_call_cc;
2801 state.cc = PL_regcc;
2802 state.re = PL_reg_re;
2803
2ab05381 2804 PL_regcc = 0;
9041c2e3 2805
0f5d15d6 2806 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2807 REGCP_SET(lastcp);
0f5d15d6
IZ
2808 cache_re(re);
2809 state.ss = PL_savestack_ix;
2810 *PL_reglastparen = 0;
a01268b5 2811 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2812 PL_reg_call_cc = &state;
2813 PL_reginput = locinput;
2c2d71f5
JH
2814
2815 /* XXXX This is too dramatic a measure... */
2816 PL_reg_maxiter = 0;
2817
0f5d15d6 2818 if (regmatch(re->program + 1)) {
2c914db6
IZ
2819 /* Even though we succeeded, we need to restore
2820 global variables, since we may be wrapped inside
2821 SUSPEND, thus the match may be not finished yet. */
2822
2823 /* XXXX Do this only if SUSPENDed? */
2824 PL_reg_call_cc = state.prev;
2825 PL_regcc = state.cc;
2826 PL_reg_re = state.re;
2827 cache_re(PL_reg_re);
2828
2829 /* XXXX This is too dramatic a measure... */
2830 PL_reg_maxiter = 0;
2831
2832 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2833 ReREFCNT_dec(re);
2834 regcpblow(cp);
2835 sayYES;
2836 }
0f5d15d6 2837 ReREFCNT_dec(re);
02db2b7b 2838 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2839 regcppop();
2840 PL_reg_call_cc = state.prev;
2841 PL_regcc = state.cc;
2842 PL_reg_re = state.re;
d3790889 2843 cache_re(PL_reg_re);
2c2d71f5
JH
2844
2845 /* XXXX This is too dramatic a measure... */
2846 PL_reg_maxiter = 0;
2847
8e514ae6 2848 logical = 0;
0f5d15d6
IZ
2849 sayNO;
2850 }
c277df42 2851 sw = SvTRUE(ret);
0f5d15d6 2852 logical = 0;
a0ed51b3
LW
2853 }
2854 else
3280af22 2855 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2856 break;
2857 }
a0d0e21e 2858 case OPEN:
c277df42 2859 n = ARG(scan); /* which paren pair */
3280af22
NIS
2860 PL_reg_start_tmp[n] = locinput;
2861 if (n > PL_regsize)
2862 PL_regsize = n;
a0d0e21e
LW
2863 break;
2864 case CLOSE:
c277df42 2865 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2866 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2867 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2868 if (n > *PL_reglastparen)
2869 *PL_reglastparen = n;
a01268b5 2870 *PL_reglastcloseparen = n;
a0d0e21e 2871 break;
c277df42
IZ
2872 case GROUPP:
2873 n = ARG(scan); /* which paren pair */
cf93c79d 2874 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2875 break;
2876 case IFTHEN:
2c2d71f5 2877 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2878 if (sw)
2879 next = NEXTOPER(NEXTOPER(scan));
2880 else {
2881 next = scan + ARG(scan);
2882 if (OP(next) == IFTHEN) /* Fake one. */
2883 next = NEXTOPER(NEXTOPER(next));
2884 }
2885 break;
2886 case LOGICAL:
0f5d15d6 2887 logical = scan->flags;
c277df42 2888 break;
2ab05381
IZ
2889/*******************************************************************
2890 PL_regcc contains infoblock about the innermost (...)* loop, and
2891 a pointer to the next outer infoblock.
2892
2893 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2894
2895 1) After matching X, regnode for CURLYX is processed;
2896
9041c2e3 2897 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2898 regmatch() recursively with the starting point at WHILEM node;
2899
2900 3) Each hit of WHILEM node tries to match A and Z (in the order
2901 depending on the current iteration, min/max of {min,max} and
2902 greediness). The information about where are nodes for "A"
2903 and "Z" is read from the infoblock, as is info on how many times "A"
2904 was already matched, and greediness.
2905
2906 4) After A matches, the same WHILEM node is hit again.
2907
2908 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2909 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2910 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2911 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2912 of the external loop.
2913
2914 Currently present infoblocks form a tree with a stem formed by PL_curcc
2915 and whatever it mentions via ->next, and additional attached trees
2916 corresponding to temporarily unset infoblocks as in "5" above.
2917
9041c2e3 2918 In the following picture infoblocks for outer loop of
2ab05381
IZ
2919 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2920 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2921 infoblocks are drawn below the "reset" infoblock.
2922
2923 In fact in the picture below we do not show failed matches for Z and T
2924 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2925 more obvious *why* one needs to *temporary* unset infoblocks.]
2926
2927 Matched REx position InfoBlocks Comment
2928 (Y(A)*?Z)*?T x
2929 Y(A)*?Z)*?T x <- O
2930 Y (A)*?Z)*?T x <- O
2931 Y A)*?Z)*?T x <- O <- I
2932 YA )*?Z)*?T x <- O <- I
2933 YA A)*?Z)*?T x <- O <- I
2934 YAA )*?Z)*?T x <- O <- I
2935 YAA Z)*?T x <- O # Temporary unset I
2936 I
2937
2938 YAAZ Y(A)*?Z)*?T x <- O
2939 I
2940
2941 YAAZY (A)*?Z)*?T x <- O
2942 I
2943
2944 YAAZY A)*?Z)*?T x <- O <- I
2945 I
2946
2947 YAAZYA )*?Z)*?T x <- O <- I
2948 I
2949
2950 YAAZYA Z)*?T x <- O # Temporary unset I
2951 I,I
2952
2953 YAAZYAZ )*?T x <- O
2954 I,I
2955
2956 YAAZYAZ T x # Temporary unset O
2957 O
2958 I,I
2959
2960 YAAZYAZT x
2961 O
2962 I,I
2963 *******************************************************************/
a0d0e21e
LW
2964 case CURLYX: {
2965 CURCUR cc;
3280af22 2966 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2967 /* No need to save/restore up to this paren */
2968 I32 parenfloor = scan->flags;
c277df42
IZ
2969
2970 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2971 next += ARG(next);
3280af22
NIS
2972 cc.oldcc = PL_regcc;
2973 PL_regcc = &cc;
cb434fcc
IZ
2974 /* XXXX Probably it is better to teach regpush to support
2975 parenfloor > PL_regsize... */
2976 if (parenfloor > *PL_reglastparen)
2977 parenfloor = *PL_reglastparen; /* Pessimization... */
2978 cc.parenfloor = parenfloor;
a0d0e21e
LW
2979 cc.cur = -1;
2980 cc.min = ARG1(scan);
2981 cc.max = ARG2(scan);
c277df42 2982 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2983 cc.next = next;
2984 cc.minmod = minmod;
2985 cc.lastloc = 0;
3280af22 2986 PL_reginput = locinput;
a0d0e21e
LW
2987 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2988 regcpblow(cp);
3280af22 2989 PL_regcc = cc.oldcc;
4633a7c4 2990 saySAME(n);
a0d0e21e
LW
2991 }
2992 /* NOT REACHED */
2993 case WHILEM: {
2994 /*
2995 * This is really hard to understand, because after we match
2996 * what we're trying to match, we must make sure the rest of
2c2d71f5 2997 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2998 * to go back UP the parse tree by recursing ever deeper. And
2999 * if it fails, we have to reset our parent's current state
3000 * that we can try again after backing off.
3001 */
3002
c277df42 3003 CHECKPOINT cp, lastcp;
3280af22 3004 CURCUR* cc = PL_regcc;
c277df42
IZ
3005 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3006
4633a7c4 3007 n = cc->cur + 1; /* how many we know we matched */
3280af22 3008 PL_reginput = locinput;
a0d0e21e 3009
c277df42 3010 DEBUG_r(
9041c2e3
NIS
3011 PerlIO_printf(Perl_debug_log,
3012 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 3013 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3014 (long)n, (long)cc->min,
c277df42
IZ
3015 (long)cc->max, (long)cc)
3016 );
4633a7c4 3017
a0d0e21e
LW
3018 /* If degenerate scan matches "", assume scan done. */
3019
579cf2c3 3020 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3021 PL_regcc = cc->oldcc;
2ab05381
IZ
3022 if (PL_regcc)
3023 ln = PL_regcc->cur;
c277df42 3024 DEBUG_r(
c3464db5
DD
3025 PerlIO_printf(Perl_debug_log,
3026 "%*s empty match detected, try continuation...\n",
3280af22 3027 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3028 );
a0d0e21e 3029 if (regmatch(cc->next))
4633a7c4 3030 sayYES;
2ab05381
IZ
3031 if (PL_regcc)
3032 PL_regcc->cur = ln;
3280af22 3033 PL_regcc = cc;
4633a7c4 3034 sayNO;
a0d0e21e
LW
3035 }
3036
3037 /* First just match a string of min scans. */
3038
3039 if (n < cc->min) {
3040 cc->cur = n;
3041 cc->lastloc = locinput;
4633a7c4
LW
3042 if (regmatch(cc->scan))
3043 sayYES;
3044 cc->cur = n - 1;
c277df42 3045 cc->lastloc = lastloc;
4633a7c4 3046 sayNO;
a0d0e21e
LW
3047 }
3048
2c2d71f5
JH
3049 if (scan->flags) {
3050 /* Check whether we already were at this position.
3051 Postpone detection until we know the match is not
3052 *that* much linear. */
3053 if (!PL_reg_maxiter) {
3054 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3055 PL_reg_leftiter = PL_reg_maxiter;
3056 }
3057 if (PL_reg_leftiter-- == 0) {
3058 I32 size = (PL_reg_maxiter + 7)/8;
3059 if (PL_reg_poscache) {
3060 if (PL_reg_poscache_size < size) {
3061 Renew(PL_reg_poscache, size, char);
3062 PL_reg_poscache_size = size;
3063 }
3064 Zero(PL_reg_poscache, size, char);
3065 }
3066 else {
3067 PL_reg_poscache_size = size;
3068 Newz(29, PL_reg_poscache, size, char);
3069 }
3070 DEBUG_r(
3071 PerlIO_printf(Perl_debug_log,
3072 "%sDetected a super-linear match, switching on caching%s...\n",
3073 PL_colors[4], PL_colors[5])
3074 );
3075 }
3076 if (PL_reg_leftiter < 0) {
3077 I32 o = locinput - PL_bostr, b;
3078
3079 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3080 b = o % 8;
3081 o /= 8;
3082 if (PL_reg_poscache[o] & (1<<b)) {
3083 DEBUG_r(
3084 PerlIO_printf(Perl_debug_log,
3085 "%*s already tried at this position...\n",
3086 REPORT_CODE_OFF+PL_regindent*2, "")
3087 );
7821416a 3088 sayNO_SILENT;
2c2d71f5
JH
3089 }
3090 PL_reg_poscache[o] |= (1<<b);
3091 }
3092 }
3093
a0d0e21e
LW
3094 /* Prefer next over scan for minimal matching. */
3095
3096 if (cc->minmod) {
3280af22 3097 PL_regcc = cc->oldcc;
2ab05381
IZ
3098 if (PL_regcc)
3099 ln = PL_regcc->cur;
5f05dabc 3100 cp = regcppush(cc->parenfloor);
02db2b7b 3101 REGCP_SET(lastcp);
5f05dabc 3102 if (regmatch(cc->next)) {
c277df42 3103 regcpblow(cp);
4633a7c4 3104 sayYES; /* All done. */
5f05dabc 3105 }
02db2b7b 3106 REGCP_UNWIND(lastcp);
5f05dabc 3107 regcppop();
2ab05381
IZ
3108 if (PL_regcc)
3109 PL_regcc->cur = ln;
3280af22 3110 PL_regcc = cc;
a0d0e21e 3111
c277df42 3112 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3113 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3114 && !(PL_reg_flags & RF_warned)) {
3115 PL_reg_flags |= RF_warned;
e476b1b5 3116 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3117 "Complex regular subexpression recursion",
3118 REG_INFTY - 1);
c277df42 3119 }
4633a7c4 3120 sayNO;
c277df42 3121 }
a687059c 3122
c277df42 3123 DEBUG_r(
c3464db5
DD
3124 PerlIO_printf(Perl_debug_log,
3125 "%*s trying longer...\n",
3280af22 3126 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3127 );
a0d0e21e 3128 /* Try scanning more and see if it helps. */
3280af22 3129 PL_reginput = locinput;
a0d0e21e
LW
3130 cc->cur = n;
3131 cc->lastloc = locinput;
5f05dabc 3132 cp = regcppush(cc->parenfloor);
02db2b7b 3133 REGCP_SET(lastcp);
5f05dabc 3134 if (regmatch(cc->scan)) {
c277df42 3135 regcpblow(cp);
4633a7c4 3136 sayYES;
5f05dabc 3137 }
02db2b7b 3138 REGCP_UNWIND(lastcp);
5f05dabc 3139 regcppop();
4633a7c4 3140 cc->cur = n - 1;
c277df42 3141 cc->lastloc = lastloc;
4633a7c4 3142 sayNO;
a0d0e21e
LW
3143 }
3144
3145 /* Prefer scan over next for maximal matching. */
3146
3147 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3148 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3149 cc->cur = n;
3150 cc->lastloc = locinput;
02db2b7b 3151 REGCP_SET(lastcp);
5f05dabc 3152 if (regmatch(cc->scan)) {
c277df42 3153 regcpblow(cp);
4633a7c4 3154 sayYES;
5f05dabc 3155 }
02db2b7b 3156 REGCP_UNWIND(lastcp);
a0d0e21e 3157 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3158 PL_reginput = locinput;
c277df42 3159 DEBUG_r(
c3464db5
DD
3160 PerlIO_printf(Perl_debug_log,
3161 "%*s failed, try continuation...\n",
3280af22 3162 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3163 );
3164 }
9041c2e3 3165 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3166 && !(PL_reg_flags & RF_warned)) {
3280af22 3167 PL_reg_flags |= RF_warned;
e476b1b5 3168 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3169 "Complex regular subexpression recursion",
3170 REG_INFTY - 1);
a0d0e21e
LW
3171 }
3172
3173 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3174 PL_regcc = cc->oldcc;
2ab05381
IZ
3175 if (PL_regcc)
3176 ln = PL_regcc->cur;
a0d0e21e 3177 if (regmatch(cc->next))
4633a7c4 3178 sayYES;
2ab05381
IZ
3179 if (PL_regcc)
3180 PL_regcc->cur = ln;
3280af22 3181 PL_regcc = cc;
4633a7c4 3182 cc->cur = n - 1;
c277df42 3183 cc->lastloc = lastloc;
4633a7c4 3184 sayNO;
a0d0e21e
LW
3185 }
3186 /* NOT REACHED */
9041c2e3 3187 case BRANCHJ:
c277df42
IZ
3188 next = scan + ARG(scan);
3189 if (next == scan)
3190 next = NULL;
3191 inner = NEXTOPER(NEXTOPER(scan));
3192 goto do_branch;
9041c2e3 3193 case BRANCH:
c277df42
IZ
3194 inner = NEXTOPER(scan);
3195 do_branch:
3196 {
c277df42
IZ
3197 c1 = OP(scan);
3198 if (OP(next) != c1) /* No choice. */
3199 next = inner; /* Avoid recursion. */
a0d0e21e 3200 else {
02db2b7b
IZ
3201 I32 lastparen = *PL_reglastparen;
3202 I32 unwind1;
3203 re_unwind_branch_t *uw;
3204
3205 /* Put unwinding data on stack */
3206 unwind1 = SSNEWt(1,re_unwind_branch_t);
3207 uw = SSPTRt(unwind1,re_unwind_branch_t);
3208 uw->prev = unwind;
3209 unwind = unwind1;
3210 uw->type = ((c1 == BRANCH)
3211 ? RE_UNWIND_BRANCH
3212 : RE_UNWIND_BRANCHJ);
3213 uw->lastparen = lastparen;
3214 uw->next = next;
3215 uw->locinput = locinput;
3216 uw->nextchr = nextchr;
3217#ifdef DEBUGGING
3218 uw->regindent = ++PL_regindent;
3219#endif
c277df42 3220
02db2b7b
IZ
3221 REGCP_SET(uw->lastcp);
3222
3223 /* Now go into the first branch */
3224 next = inner;
a687059c 3225 }
a0d0e21e
LW
3226 }
3227 break;
3228 case MINMOD:
3229 minmod = 1;
3230 break;
c277df42
IZ
3231 case CURLYM:
3232 {
00db4c45 3233 I32 l = 0;
c277df42 3234 CHECKPOINT lastcp;
9041c2e3 3235
c277df42
IZ
3236 /* We suppose that the next guy does not need
3237 backtracking: in particular, it is of constant length,
3238 and has no parenths to influence future backrefs. */
3239 ln = ARG1(scan); /* min to match */
3240 n = ARG2(scan); /* max to match */
c277df42
IZ
3241 paren = scan->flags;
3242 if (paren) {
3280af22
NIS
3243 if (paren > PL_regsize)
3244 PL_regsize = paren;
3245 if (paren > *PL_reglastparen)
3246 *PL_reglastparen = paren;
c277df42 3247 }
dc45a647 3248 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3249 if (paren)
3250 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3251 PL_reginput = locinput;
c277df42
IZ
3252 if (minmod) {
3253 minmod = 0;
3254 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3255 sayNO;
f31a99c8
HS
3256 /* if we matched something zero-length we don't need to
3257 backtrack - capturing parens are already defined, so
3258 the caveat in the maximal case doesn't apply
3259
3260 XXXX if ln == 0, we can redo this check first time
3261 through the following loop
3262 */
3263 if (ln && l == 0)
3264 n = ln; /* don't backtrack */
3280af22 3265 locinput = PL_reginput;
cca55fe3 3266 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3267 regnode *text_node = next;
3268
cca55fe3 3269 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3270
cca55fe3 3271 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3272 else {
cca55fe3
JP
3273 if (PL_regkind[(U8)OP(text_node)] == REF) {
3274 I32 n, ln;
3275 n = ARG(text_node); /* which paren pair */
3276 ln = PL_regstartp[n];
3277 /* assume yes if we haven't seen CLOSEn */
3278 if (
3279 *PL_reglastparen < n ||
3280 ln == -1 ||
3281 ln == PL_regendp[n]
3282 ) {
3283 c1 = c2 = -1000;
3284 goto assume_ok_MM;
3285 }
3286 c1 = *(PL_bostr + ln);
3287 }
3288 else { c1 = (U8)*STRING(text_node); }
af5decee 3289 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3290 c2 = PL_fold[c1];
af5decee 3291 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3292 c2 = PL_fold_locale[c1];
3293 else
3294 c2 = c1;
3295 }
a0ed51b3
LW
3296 }
3297 else
c277df42 3298 c1 = c2 = -1000;
cca55fe3 3299 assume_ok_MM:
02db2b7b 3300 REGCP_SET(lastcp);
5f4b28b2 3301 /* This may be improved if l == 0. */
c277df42
IZ
3302 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3303 /* If it could work, try it. */
3304 if (c1 == -1000 ||
3280af22
NIS
3305 UCHARAT(PL_reginput) == c1 ||
3306 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3307 {
3308 if (paren) {
f31a99c8 3309 if (ln) {
cf93c79d
IZ
3310 PL_regstartp[paren] =
3311 HOPc(PL_reginput, -l) - PL_bostr;
3312 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3313 }
3314 else
cf93c79d 3315 PL_regendp[paren] = -1;
c277df42
IZ
3316 }
3317 if (regmatch(next))
3318 sayYES;
02db2b7b 3319 REGCP_UNWIND(lastcp);
c277df42
IZ
3320 }
3321 /* Couldn't or didn't -- move forward. */
3280af22 3322 PL_reginput = locinput;
c277df42
IZ
3323 if (regrepeat_hard(scan, 1, &l)) {
3324 ln++;
3280af22 3325 locinput = PL_reginput;
c277df42
IZ
3326 }
3327 else
3328 sayNO;
3329 }
a0ed51b3
LW
3330 }
3331 else {
c277df42 3332 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3333 /* if we matched something zero-length we don't need to
3334 backtrack, unless the minimum count is zero and we
3335 are capturing the result - in that case the capture
3336 being defined or not may affect later execution
3337 */
3338 if (n != 0 && l == 0 && !(paren && ln == 0))
3339 ln = n; /* don't backtrack */
3280af22 3340 locinput = PL_reginput;
c277df42 3341 DEBUG_r(
5c0ca799 3342 PerlIO_printf(Perl_debug_log,
faccc32b 3343 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3344 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3345 (IV) n, (IV)l)
c277df42
IZ
3346 );
3347 if (n >= ln) {
cca55fe3 3348 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3349 regnode *text_node = next;
3350
cca55fe3 3351 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3352
cca55fe3 3353 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3354 else {
cca55fe3
JP
3355 if (PL_regkind[(U8)OP(text_node)] == REF) {
3356 I32 n, ln;
3357 n = ARG(text_node); /* which paren pair */
3358 ln = PL_regstartp[n];
3359 /* assume yes if we haven't seen CLOSEn */
3360 if (
3361 *PL_reglastparen < n ||
3362 ln == -1 ||
3363 ln == PL_regendp[n]
3364 ) {
3365 c1 = c2 = -1000;
3366 goto assume_ok_REG;
3367 }
3368 c1 = *(PL_bostr + ln);
3369 }
3370 else { c1 = (U8)*STRING(text_node); }
3371
af5decee 3372 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3373 c2 = PL_fold[c1];
af5decee 3374 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3375 c2 = PL_fold_locale[c1];
3376 else
3377 c2 = c1;
3378 }
a0ed51b3
LW
3379 }
3380 else
c277df42
IZ
3381 c1 = c2 = -1000;
3382 }
cca55fe3 3383 assume_ok_REG:
02db2b7b 3384 REGCP_SET(lastcp);
c277df42
IZ
3385 while (n >= ln) {
3386 /* If it could work, try it. */
3387 if (c1 == -1000 ||
3280af22
NIS
3388 UCHARAT(PL_reginput) == c1 ||
3389 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3390 {
3391 DEBUG_r(
c3464db5 3392 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3393 "%*s trying tail with n=%"IVdf"...\n",
3394 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3395 );
3396 if (paren) {
3397 if (n) {
cf93c79d
IZ
3398 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3399 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3400 }
a0ed51b3 3401 else
cf93c79d 3402 PL_regendp[paren] = -1;
c277df42 3403 }
a0ed51b3
LW
3404 if (regmatch(next))
3405 sayYES;
02db2b7b 3406 REGCP_UNWIND(lastcp);
a0ed51b3 3407 }
c277df42
IZ
3408 /* Couldn't or didn't -- back up. */
3409 n--;
dfe13c55 3410 locinput = HOPc(locinput, -l);
3280af22 3411 PL_reginput = locinput;
c277df42
IZ
3412 }
3413 }
3414 sayNO;
3415 break;
3416 }
3417 case CURLYN:
3418 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3419 if (paren > PL_regsize)
3420 PL_regsize = paren;
3421 if (paren > *PL_reglastparen)
3422 *PL_reglastparen = paren;
c277df42
IZ
3423 ln = ARG1(scan); /* min to match */
3424 n = ARG2(scan); /* max to match */
dc45a647 3425 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3426 goto repeat;
a0d0e21e 3427 case CURLY:
c277df42 3428 paren = 0;
a0d0e21e
LW
3429 ln = ARG1(scan); /* min to match */
3430 n = ARG2(scan); /* max to match */
dc45a647 3431 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3432 goto repeat;
3433 case STAR:
3434 ln = 0;
c277df42 3435 n = REG_INFTY;
a0d0e21e 3436 scan = NEXTOPER(scan);
c277df42 3437 paren = 0;
a0d0e21e
LW
3438 goto repeat;
3439 case PLUS:
c277df42
IZ
3440 ln = 1;
3441 n = REG_INFTY;
3442 scan = NEXTOPER(scan);
3443 paren = 0;
3444 repeat:
a0d0e21e
LW
3445 /*
3446 * Lookahead to avoid useless match attempts
3447 * when we know what character comes next.
3448 */
5f80c4cf
JP
3449
3450 /*
3451 * Used to only do .*x and .*?x, but now it allows
3452 * for )'s, ('s and (?{ ... })'s to be in the way
3453 * of the quantifier and the EXACT-like node. -- japhy
3454 */
3455
cca55fe3 3456 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3457 U8 *s;
3458 regnode *text_node = next;
3459
cca55fe3 3460 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3461
cca55fe3 3462 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3463 else {
cca55fe3
JP
3464 if (PL_regkind[(U8)OP(text_node)] == REF) {
3465 I32 n, ln;
3466 n = ARG(text_node); /* which paren pair */
3467 ln = PL_regstartp[n];
3468 /* assume yes if we haven't seen CLOSEn */
3469 if (
3470 *PL_reglastparen < n ||
3471 ln == -1 ||
3472 ln == PL_regendp[n]
3473 ) {
3474 c1 = c2 = -1000;
3475 goto assume_ok_easy;
3476 }
9246c65e 3477 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3478 }
3479 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3480
3481 if (!UTF) {
3482 c2 = c1 = *s;
f65d3ee7 3483 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3484 c2 = PL_fold[c1];
f65d3ee7 3485 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3486 c2 = PL_fold_locale[c1];
1aa99e6b 3487 }
5f80c4cf 3488 else { /* UTF */
f65d3ee7 3489 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3490 STRLEN ulen1, ulen2;
e7ae6809
JH
3491 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3492 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3493
3494 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3495 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3496
3497 c1 = utf8_to_uvuni(tmpbuf1, 0);
3498 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3499 }
3500 else {
3501 c2 = c1 = utf8_to_uvchr(s, NULL);
3502 }
1aa99e6b
IH
3503 }
3504 }
bbce6d69 3505 }
a0d0e21e 3506 else
bbce6d69 3507 c1 = c2 = -1000;
cca55fe3 3508 assume_ok_easy:
3280af22 3509 PL_reginput = locinput;
a0d0e21e 3510 if (minmod) {
c277df42 3511 CHECKPOINT lastcp;
a0d0e21e
LW
3512 minmod = 0;
3513 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3514 sayNO;
a0ed51b3 3515 locinput = PL_reginput;
02db2b7b 3516 REGCP_SET(lastcp);
0fe9bf95 3517 if (c1 != -1000) {
1aa99e6b 3518 char *e; /* Should not check after this */
0fe9bf95
IZ
3519 char *old = locinput;
3520
1aa99e6b 3521 if (n == REG_INFTY) {
0fe9bf95 3522 e = PL_regeol - 1;
1aa99e6b
IH
3523 if (do_utf8)
3524 while (UTF8_IS_CONTINUATION(*(U8*)e))
3525 e--;
3526 }
3527 else if (do_utf8) {
3528 int m = n - ln;
3529 for (e = locinput;
3530 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3531 e += UTF8SKIP(e);
3532 }
3533 else {
3534 e = locinput + n - ln;
3535 if (e >= PL_regeol)
3536 e = PL_regeol - 1;
3537 }
0fe9bf95 3538 while (1) {
1aa99e6b 3539 int count;
0fe9bf95 3540 /* Find place 'next' could work */
1aa99e6b
IH
3541 if (!do_utf8) {
3542 if (c1 == c2) {
a8e8ab15
JH
3543 while (locinput <= e &&
3544 UCHARAT(locinput) != c1)
1aa99e6b
IH
3545 locinput++;
3546 } else {
9041c2e3 3547 while (locinput <= e
a8e8ab15
JH
3548 && UCHARAT(locinput) != c1
3549 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3550 locinput++;
3551 }
3552 count = locinput - old;
3553 }
3554 else {
3555 STRLEN len;
3556 if (c1 == c2) {
3557 for (count = 0;
3558 locinput <= e &&
9041c2e3 3559 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3560 count++)
3561 locinput += len;
3562
3563 } else {
3564 for (count = 0; locinput <= e; count++) {
9041c2e3 3565 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3566 if (c == c1 || c == c2)
3567 break;
9041c2e3 3568 locinput += len;
1aa99e6b
IH
3569 }
3570 }
0fe9bf95 3571 }
9041c2e3 3572 if (locinput > e)
0fe9bf95
IZ
3573 sayNO;
3574 /* PL_reginput == old now */
3575 if (locinput != old) {
3576 ln = 1; /* Did some */
1aa99e6b 3577 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3578 sayNO;
3579 }
3580 /* PL_reginput == locinput now */
29d1e993 3581 TRYPAREN(paren, ln, locinput);
0fe9bf95 3582 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3583 REGCP_UNWIND(lastcp);
0fe9bf95 3584 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3585 old = locinput;
3586 if (do_utf8)
3587 locinput += UTF8SKIP(locinput);
3588 else
3589 locinput++;
0fe9bf95
IZ
3590 }
3591 }
3592 else
c277df42 3593 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3594 UV c;
3595 if (c1 != -1000) {
3596 if (do_utf8)
9041c2e3 3597 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3598 else
9041c2e3 3599 c = UCHARAT(PL_reginput);
2390ecbc
PP
3600 /* If it could work, try it. */
3601 if (c == c1 || c == c2)
3602 {
3603 TRYPAREN(paren, n, PL_reginput);
3604 REGCP_UNWIND(lastcp);
3605 }
1aa99e6b 3606 }
a0d0e21e 3607 /* If it could work, try it. */
2390ecbc 3608 else if (c1 == -1000)
bbce6d69 3609 {
29d1e993 3610 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3611 REGCP_UNWIND(lastcp);
bbce6d69 3612 }
c277df42 3613 /* Couldn't or didn't -- move forward. */
a0ed51b3 3614 PL_reginput = locinput;
a0d0e21e
LW
3615 if (regrepeat(scan, 1)) {
3616 ln++;
a0ed51b3
LW
3617 locinput = PL_reginput;
3618 }
3619 else
4633a7c4 3620 sayNO;
a0d0e21e
LW
3621 }
3622 }
3623 else {
c277df42 3624 CHECKPOINT lastcp;
a0d0e21e 3625 n = regrepeat(scan, n);
a0ed51b3 3626 locinput = PL_reginput;
22c35a8c 3627 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3628 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3629 ln = n; /* why back off? */
1aeab75a
GS
3630 /* ...because $ and \Z can match before *and* after
3631 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3632 We should back off by one in this case. */
3633 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3634 ln--;
3635 }
02db2b7b 3636 REGCP_SET(lastcp);
c277df42 3637 if (paren) {
8fa7f367 3638 UV c = 0;
c277df42 3639 while (n >= ln) {
1aa99e6b
IH
3640 if (c1 != -1000) {
3641 if (do_utf8)
9041c2e3 3642 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3643 else
9041c2e3 3644 c = UCHARAT(PL_reginput);
1aa99e6b 3645 }
c277df42 3646 /* If it could work, try it. */
1aa99e6b 3647 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3648 {
29d1e993 3649 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3650 REGCP_UNWIND(lastcp);
c277df42
IZ
3651 }
3652 /* Couldn't or didn't -- back up. */
3653 n--;
dfe13c55 3654 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3655 }
a0ed51b3
LW
3656 }
3657 else {
8fa7f367 3658 UV c = 0;
c277df42 3659 while (n >= ln) {
1aa99e6b
IH
3660 if (c1 != -1000) {
3661 if (do_utf8)
9041c2e3 3662 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3663 else
9041c2e3 3664 c = UCHARAT(PL_reginput);
1aa99e6b 3665 }
c277df42 3666 /* If it could work, try it. */
1aa99e6b 3667 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3668 {
29d1e993 3669 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3670 REGCP_UNWIND(lastcp);
c277df42
IZ
3671 }
3672 /* Couldn't or didn't -- back up. */
3673 n--;
dfe13c55 3674 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3675 }
a0d0e21e
LW
3676 }
3677 }
4633a7c4 3678 sayNO;
c277df42 3679 break;
a0d0e21e 3680 case END:
0f5d15d6
IZ
3681 if (PL_reg_call_cc) {
3682 re_cc_state *cur_call_cc = PL_reg_call_cc;
3683 CURCUR *cctmp = PL_regcc;
3684 regexp *re = PL_reg_re;
3685 CHECKPOINT cp, lastcp;
3686
3687 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3688 REGCP_SET(lastcp);
0f5d15d6
IZ
3689 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3690 the caller. */
3691 PL_reginput = locinput; /* Make position available to
3692 the callcc. */
3693 cache_re(PL_reg_call_cc->re);
3694 PL_regcc = PL_reg_call_cc->cc;
3695 PL_reg_call_cc = PL_reg_call_cc->prev;
3696 if (regmatch(cur_call_cc->node)) {
3697 PL_reg_call_cc = cur_call_cc;
3698 regcpblow(cp);
3699 sayYES;
3700 }
02db2b7b 3701 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3702 regcppop();
3703 PL_reg_call_cc = cur_call_cc;
3704 PL_regcc = cctmp;
3705 PL_reg_re = re;
3706 cache_re(re);
3707
3708 DEBUG_r(
3709 PerlIO_printf(Perl_debug_log,
3710 "%*s continuation failed...\n",
3711 REPORT_CODE_OFF+PL_regindent*2, "")
3712 );
7821416a 3713 sayNO_SILENT;
0f5d15d6 3714 }
7821416a
IZ
3715 if (locinput < PL_regtill) {
3716 DEBUG_r(PerlIO_printf(Perl_debug_log,
3717 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3718 PL_colors[4],
3719 (long)(locinput - PL_reg_starttry),
3720 (long)(PL_regtill - PL_reg_starttry),
3721 PL_colors[5]));
3722 sayNO_FINAL; /* Cannot match: too short. */
3723 }
3724 PL_reginput = locinput; /* put where regtry can find it */
3725 sayYES_FINAL; /* Success! */
7e5428c5 3726 case SUCCEED:
3280af22 3727 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3728 sayYES_LOUD; /* Success! */
c277df42
IZ
3729 case SUSPEND:
3730 n = 1;
9fe1d20c 3731 PL_reginput = locinput;
9041c2e3 3732 goto do_ifmatch;
a0d0e21e 3733 case UNLESSM:
c277df42 3734 n = 0;
a0ed51b3 3735 if (scan->flags) {
efb30f32
HS
3736 s = HOPBACKc(locinput, scan->flags);
3737 if (!s)
3738 goto say_yes;
3739 PL_reginput = s;
a0ed51b3
LW
3740 }
3741 else
3742 PL_reginput = locinput;
c277df42
IZ
3743 goto do_ifmatch;
3744 case IFMATCH:
3745 n = 1;
a0ed51b3 3746 if (scan->flags) {
efb30f32
HS
3747 s = HOPBACKc(locinput, scan->flags);
3748 if (!s)
3749 goto say_no;
3750 PL_reginput = s;
a0ed51b3
LW
3751 }
3752 else
3753 PL_reginput = locinput;
3754
c277df42 3755 do_ifmatch:
c277df42
IZ
3756 inner = NEXTOPER(NEXTOPER(scan));
3757 if (regmatch(inner) != n) {
3758 say_no:
3759 if (logical) {
3760 logical = 0;
3761 sw = 0;
3762 goto do_longjump;
a0ed51b3
LW
3763 }
3764 else
c277df42
IZ
3765 sayNO;
3766 }
3767 say_yes:
3768 if (logical) {
3769 logical = 0;
3770 sw = 1;
3771 }
fe44a5e8 3772 if (OP(scan) == SUSPEND) {
3280af22 3773 locinput = PL_reginput;
565764a8 3774 nextchr = UCHARAT(locinput);
fe44a5e8 3775 }
c277df42
IZ
3776 /* FALL THROUGH. */
3777 case LONGJMP:
3778 do_longjump:
3779 next = scan + ARG(scan);
3780 if (next == scan)
3781 next = NULL;
a0d0e21e
LW
3782 break;
3783 default:
b900a521 3784 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3785 PTR2UV(scan), OP(scan));
cea2e8a9 3786 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3787 }
02db2b7b 3788 reenter:
a0d0e21e
LW
3789 scan = next;
3790 }
a687059c 3791
a0d0e21e
LW
3792 /*
3793 * We get here only if there's trouble -- normally "case END" is
3794 * the terminating point.
3795 */
cea2e8a9 3796 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3797 /*NOTREACHED*/
4633a7c4
LW
3798 sayNO;
3799
7821416a
IZ
3800yes_loud:
3801 DEBUG_r(
3802 PerlIO_printf(Perl_debug_log,
3803 "%*s %scould match...%s\n",
3804 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3805 );
3806 goto yes;
3807yes_final:
3808 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3809 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3810yes:
3811#ifdef DEBUGGING
3280af22 3812 PL_regindent--;
4633a7c4 3813#endif
02db2b7b
IZ
3814
3815#if 0 /* Breaks $^R */
3816 if (unwind)
3817 regcpblow(firstcp);
3818#endif
4633a7c4
LW
3819 return 1;
3820
3821no:
7821416a
IZ
3822 DEBUG_r(
3823 PerlIO_printf(Perl_debug_log,
3824 "%*s %sfailed...%s\n",
3825 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3826 );
3827 goto do_no;
3828no_final:
3829do_no:
02db2b7b
IZ
3830 if (unwind) {
3831 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3832
3833 switch (uw->type) {
3834 case RE_UNWIND_BRANCH:
3835 case RE_UNWIND_BRANCHJ:
3836 {
3837 re_unwind_branch_t *uwb = &(uw->branch);
3838 I32 lastparen = uwb->lastparen;
9041c2e3 3839
02db2b7b
IZ
3840 REGCP_UNWIND(uwb->lastcp);
3841 for (n = *PL_reglastparen; n > lastparen; n--)
3842 PL_regendp[n] = -1;
3843 *PL_reglastparen = n;
3844 scan = next = uwb->next;
9041c2e3
NIS
3845 if ( !scan ||
3846 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3847 ? BRANCH : BRANCHJ) ) { /* Failure */
3848 unwind = uwb->prev;
3849#ifdef DEBUGGING
3850 PL_regindent--;
3851#endif
3852 goto do_no;
3853 }
3854 /* Have more choice yet. Reuse the same uwb. */
3855 /*SUPPRESS 560*/
3856 if ((n = (uwb->type == RE_UNWIND_BRANCH
3857 ? NEXT_OFF(next) : ARG(next))))
3858 next += n;
3859 else
3860 next = NULL; /* XXXX Needn't unwinding in this case... */
3861 uwb->next = next;
3862 next = NEXTOPER(scan);
3863 if (uwb->type == RE_UNWIND_BRANCHJ)
3864 next = NEXTOPER(next);
3865 locinput = uwb->locinput;
3866 nextchr = uwb->nextchr;
3867#ifdef DEBUGGING
3868 PL_regindent = uwb->regindent;
3869#endif
3870
3871 goto reenter;
3872 }
3873 /* NOT REACHED */
3874 default:
3875 Perl_croak(aTHX_ "regexp unwind memory corruption");
3876 }
3877 /* NOT REACHED */
3878 }
4633a7c4 3879#ifdef DEBUGGING
3280af22 3880 PL_regindent--;
4633a7c4 3881#endif
a0d0e21e 3882 return 0;
a687059c
LW
3883}
3884
3885/*
3886 - regrepeat - repeatedly match something simple, report how many
3887 */
3888/*
3889 * [This routine now assumes that it will only match on things of length 1.
3890 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3891 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3892 */
76e3520e 3893STATIC I32
cea2e8a9 3894S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3895{
a0d0e21e 3896 register char *scan;
a0d0e21e 3897 register I32 c;
3280af22 3898 register char *loceol = PL_regeol;
a0ed51b3 3899 register I32 hardcount = 0;
53c4c00c 3900 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3901
3280af22 3902 scan = PL_reginput;
c277df42 3903 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3904 loceol = scan + max;
a0d0e21e 3905 switch (OP(p)) {
22c35a8c 3906 case REG_ANY:
1aa99e6b 3907 if (do_utf8) {
ffc61ed2 3908 loceol = PL_regeol;
1aa99e6b 3909 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3910 scan += UTF8SKIP(scan);
3911 hardcount++;
3912 }
3913 } else {
3914 while (scan < loceol && *scan != '\n')
3915 scan++;
a0ed51b3
LW
3916 }
3917 break;
ffc61ed2 3918 case SANY:
3baa4c62 3919 scan = loceol;
a0ed51b3 3920 break;
f33976b4
DB
3921 case CANY:
3922 scan = loceol;
3923 break;
bbce6d69 3924 case EXACT: /* length of string is 1 */
cd439c50 3925 c = (U8)*STRING(p);
bbce6d69 3926 while (scan < loceol && UCHARAT(scan) == c)
3927 scan++;
3928 break;
3929 case EXACTF: /* length of string is 1 */
cd439c50 3930 c = (U8)*STRING(p);
bbce6d69 3931 while (scan < loceol &&
22c35a8c 3932 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3933 scan++;
3934 break;
3935 case EXACTFL: /* length of string is 1 */
3280af22 3936 PL_reg_flags |= RF_tainted;
cd439c50 3937 c = (U8)*STRING(p);
bbce6d69 3938 while (scan < loceol &&
22c35a8c 3939 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3940 scan++;
3941 break;
3942 case ANYOF:
ffc61ed2
JH
3943 if (do_utf8) {
3944 loceol = PL_regeol;
1aa99e6b
IH
3945 while (hardcount < max && scan < loceol &&
3946 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3947 scan += UTF8SKIP(scan);
3948 hardcount++;
3949 }
3950 } else {
3951 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3952 scan++;
3953 }
a0d0e21e
LW
3954 break;
3955 case ALNUM:
1aa99e6b 3956 if (do_utf8) {
ffc61ed2 3957 loceol = PL_regeol;
8269fa76 3958 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3959 while (hardcount < max && scan < loceol &&
3568d838 3960 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3961 scan += UTF8SKIP(scan);
3962 hardcount++;
3963 }
3964 } else {
3965 while (scan < loceol && isALNUM(*scan))
3966 scan++;
a0ed51b3
LW
3967 }
3968 break;
bbce6d69 3969 case ALNUML:
3280af22 3970 PL_reg_flags |= RF_tainted;
1aa99e6b 3971 if (do_utf8) {
ffc61ed2 3972 loceol = PL_regeol;
1aa99e6b
IH
3973 while (hardcount < max && scan < loceol &&
3974 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3975 scan += UTF8SKIP(scan);
3976 hardcount++;
3977 }
3978 } else {
3979 while (scan < loceol && isALNUM_LC(*scan))
3980 scan++;
a0ed51b3
LW
3981 }
3982 break;
a0d0e21e 3983 case NALNUM:
1aa99e6b 3984 if (do_utf8) {
ffc61ed2 3985 loceol = PL_regeol;
8269fa76 3986 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3987 while (hardcount < max && scan < loceol &&
3568d838 3988 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3989 scan += UTF8SKIP(scan);
3990 hardcount++;
3991 }
3992 } else {
3993 while (scan < loceol && !isALNUM(*scan))
3994 scan++;
a0ed51b3
LW
3995 }
3996 break;
bbce6d69 3997 case NALNUML:
3280af22 3998 PL_reg_flags |= RF_tainted;
1aa99e6b 3999 if (do_utf8) {
ffc61ed2 4000 loceol = PL_regeol;
1aa99e6b
IH
4001 while (hardcount < max && scan < loceol &&
4002 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4003 scan += UTF8SKIP(scan);
4004 hardcount++;
4005 }
4006 } else {
4007 while (scan < loceol && !isALNUM_LC(*scan))
4008 scan++;
a0ed51b3
LW
4009 }
4010 break;
a0d0e21e 4011 case SPACE:
1aa99e6b 4012 if (do_utf8) {
ffc61ed2 4013 loceol = PL_regeol;
8269fa76 4014 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4015 while (hardcount < max && scan < loceol &&
3568d838
JH
4016 (*scan == ' ' ||
4017 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4018 scan += UTF8SKIP(scan);
4019 hardcount++;
4020 }
4021 } else {
4022 while (scan < loceol && isSPACE(*scan))
4023 scan++;
a0ed51b3
LW
4024 }
4025 break;
bbce6d69 4026 case SPACEL:
3280af22 4027 PL_reg_flags |= RF_tainted;
1aa99e6b 4028 if (do_utf8) {
ffc61ed2 4029 loceol = PL_regeol;
1aa99e6b 4030 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4031 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4032 scan += UTF8SKIP(scan);
4033 hardcount++;
4034 }
4035 } else {
4036 while (scan < loceol && isSPACE_LC(*scan))
4037 scan++;
a0ed51b3
LW
4038 }
4039 break;
a0d0e21e 4040 case NSPACE:
1aa99e6b 4041 if (do_utf8) {
ffc61ed2 4042 loceol = PL_regeol;
8269fa76 4043 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4044 while (hardcount < max && scan < loceol &&
3568d838
JH
4045 !(*scan == ' ' ||
4046 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4047 scan += UTF8SKIP(scan);
4048 hardcount++;
4049 }
4050 } else {
4051 while (scan < loceol && !isSPACE(*scan))
4052 scan++;
4053 break;
a0ed51b3 4054 }
bbce6d69 4055 case NSPACEL:
3280af22 4056 PL_reg_flags |= RF_tainted;
1aa99e6b 4057 if (do_utf8) {
ffc61ed2 4058 loceol = PL_regeol;
1aa99e6b 4059 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4060 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4061 scan += UTF8SKIP(scan);
4062 hardcount++;
4063 }
4064 } else {
4065 while (scan < loceol && !isSPACE_LC(*scan))
4066 scan++;
a0ed51b3
LW
4067 }
4068 break;
a0d0e21e 4069 case DIGIT:
1aa99e6b 4070 if (do_utf8) {
ffc61ed2 4071 loceol = PL_regeol;
8269fa76 4072 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4073 while (hardcount < max && scan < loceol &&
3568d838 4074 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4075 scan += UTF8SKIP(scan);
4076 hardcount++;
4077 }
4078 } else {
4079 while (scan < loceol && isDIGIT(*scan))
4080 scan++;
a0ed51b3
LW
4081 }
4082 break;
a0d0e21e 4083 case NDIGIT:
1aa99e6b 4084 if (do_utf8) {
ffc61ed2 4085 loceol = PL_regeol;
8269fa76 4086 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4087 while (hardcount < max && scan < loceol &&
3568d838 4088 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4089 scan += UTF8SKIP(scan);
4090 hardcount++;
4091 }
4092 } else {
4093 while (scan < loceol && !isDIGIT(*scan))
4094 scan++;
a0ed51b3
LW
4095 }
4096 break;
a0d0e21e
LW
4097 default: /* Called on something of 0 width. */
4098 break; /* So match right here or not at all. */
4099 }
a687059c 4100
a0ed51b3
LW
4101 if (hardcount)
4102 c = hardcount;
4103 else
4104 c = scan - PL_reginput;
3280af22 4105 PL_reginput = scan;
a687059c 4106
9041c2e3 4107 DEBUG_r(
c277df42
IZ
4108 {
4109 SV *prop = sv_newmortal();
4110
4111 regprop(prop, p);
9041c2e3
NIS
4112 PerlIO_printf(Perl_debug_log,
4113 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4114 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4115 });
9041c2e3 4116
a0d0e21e 4117 return(c);
a687059c
LW
4118}
4119
4120/*
c277df42 4121 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4122 *
c277df42
IZ
4123 * The repeater is supposed to have constant length.
4124 */
4125
76e3520e 4126STATIC I32
cea2e8a9 4127S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4128{
b7953727 4129 register char *scan = Nullch;
c277df42 4130 register char *start;
3280af22 4131 register char *loceol = PL_regeol;
a0ed51b3 4132 I32 l = 0;
708e3b05 4133 I32 count = 0, res = 1;
a0ed51b3
LW
4134
4135 if (!max)
4136 return 0;
c277df42 4137
3280af22 4138 start = PL_reginput;
53c4c00c 4139 if (PL_reg_match_utf8) {
708e3b05 4140 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4141 if (!count++) {
4142 l = 0;
4143 while (start < PL_reginput) {
4144 l++;
4145 start += UTF8SKIP(start);
4146 }
4147 *lp = l;
4148 if (l == 0)
4149 return max;
4150 }
4151 if (count == max)
4152 return count;
4153 }
4154 }
4155 else {
708e3b05 4156 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4157 if (!count++) {
4158 *lp = l = PL_reginput - start;
4159 if (max != REG_INFTY && l*max < loceol - scan)
4160 loceol = scan + l*max;
4161 if (l == 0)
4162 return max;
c277df42
IZ
4163 }
4164 }
4165 }
708e3b05 4166 if (!res)
3280af22 4167 PL_reginput = scan;
9041c2e3 4168
a0ed51b3 4169 return count;
c277df42
IZ
4170}
4171
4172/*
ffc61ed2
JH
4173- regclass_swash - prepare the utf8 swash
4174*/
4175
4176SV *
4177Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
4178{
4179 SV *sw = NULL;
4180 SV *si = NULL;
4181
4182 if (PL_regdata && PL_regdata->count) {
4183 U32 n = ARG(node);
4184
4185 if (PL_regdata->what[n] == 's') {
4186 SV *rv = (SV*)PL_regdata->data[n];
4187 AV *av = (AV*)SvRV((SV*)rv);
4188 SV **a;
9041c2e3 4189
ffc61ed2
JH
4190 si = *av_fetch(av, 0, FALSE);
4191 a = av_fetch(av, 1, FALSE);
9041c2e3 4192
ffc61ed2
JH
4193 if (a)
4194 sw = *a;
4195 else if (si && doinit) {
4196 sw = swash_init("utf8", "", si, 1, 0);
4197 (void)av_store(av, 1, sw);
4198 }
4199 }
4200 }
4201
4202 if (initsvp)
4203 *initsvp = si;
4204
4205 return sw;
4206}
4207
4208/*
cb8d8820 4209 - reginclass - determine if a character falls into a character class
bbce6d69 4210 */
4211
76e3520e 4212STATIC bool
ffc61ed2 4213S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 4214{
ffc61ed2 4215 char flags = ANYOF_FLAGS(n);
bbce6d69 4216 bool match = FALSE;
1aa99e6b 4217 UV c;
3568d838 4218 STRLEN len = 0;
1aa99e6b 4219
3568d838 4220 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4221
ffc61ed2
JH
4222 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4223 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4224 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4225 match = TRUE;
bbce6d69 4226 }
3568d838 4227 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4228 match = TRUE;
ffc61ed2
JH
4229 if (!match) {
4230 SV *sw = regclass_swash(n, TRUE, 0);
4231
4232 if (sw) {
3568d838 4233 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4234 match = TRUE;
4235 else if (flags & ANYOF_FOLD) {
a2a2844f 4236 STRLEN ulen;
a5961de5 4237 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
a2a2844f 4238
a5961de5
JH
4239 to_utf8_fold(p, tmpbuf, &ulen);
4240 if (swash_fetch(sw, tmpbuf, do_utf8))
4241 match = TRUE;
4242 to_utf8_upper(p, tmpbuf, &ulen);
3568d838 4243 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
4244 match = TRUE;
4245 }
4246 }
bbce6d69 4247 }
4248 }
1aa99e6b 4249 if (!match && c < 256) {
ffc61ed2
JH
4250 if (ANYOF_BITMAP_TEST(n, c))
4251 match = TRUE;
4252 else if (flags & ANYOF_FOLD) {
3568d838 4253 I32 f;
a0ed51b3 4254
ffc61ed2
JH
4255 if (flags & ANYOF_LOCALE) {
4256 PL_reg_flags |= RF_tainted;
4257 f = PL_fold_locale[c];
4258 }
4259 else
4260 f = PL_fold[c];
4261 if (f != c && ANYOF_BITMAP_TEST(n, f))
4262 match = TRUE;
4263 }
4264
4265 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4266 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4267 if (
4268 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4269 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4270 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4271 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4272 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4273 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4274 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4275 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4276 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4277 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4278 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4279 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4280 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4281 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4282 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4283 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4284 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4285 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4286 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4287 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4288 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4289 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4290 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4291 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4292 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4293 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4294 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4295 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4296 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4297 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4298 ) /* How's that for a conditional? */
4299 {
4300 match = TRUE;
4301 }
a0ed51b3 4302 }
a0ed51b3
LW
4303 }
4304
a0ed51b3
LW
4305 return (flags & ANYOF_INVERT) ? !match : match;
4306}
161b471a 4307
dfe13c55 4308STATIC U8 *
cea2e8a9 4309S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4310{
1aa99e6b
IH
4311 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4312}
4313
4314STATIC U8 *
4315S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4316{
a0ed51b3 4317 if (off >= 0) {
1aa99e6b 4318 while (off-- && s < lim) {
ffc61ed2 4319 /* XXX could check well-formedness here */
a0ed51b3 4320 s += UTF8SKIP(s);
ffc61ed2 4321 }
a0ed51b3
LW
4322 }
4323 else {
4324 while (off++) {
1aa99e6b 4325 if (s > lim) {
a0ed51b3 4326 s--;
ffc61ed2 4327 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4328 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4329 s--;
ffc61ed2
JH
4330 }
4331 /* XXX could check well-formedness here */
a0ed51b3
LW
4332 }
4333 }
4334 }
4335 return s;
4336}
161b471a 4337
dfe13c55 4338STATIC U8 *
1aa99e6b 4339S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4340{
1aa99e6b
IH
4341 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4342}
4343
4344STATIC U8 *
4345S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4346{
4347 if (off >= 0) {
1aa99e6b 4348 while (off-- && s < lim) {
ffc61ed2 4349 /* XXX could check well-formedness here */
a0ed51b3 4350 s += UTF8SKIP(s);
ffc61ed2 4351 }
a0ed51b3
LW
4352 if (off >= 0)
4353 return 0;
4354 }
4355 else {
4356 while (off++) {
1aa99e6b 4357 if (s > lim) {
a0ed51b3 4358 s--;
ffc61ed2 4359 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4360 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4361 s--;
ffc61ed2
JH
4362 }
4363 /* XXX could check well-formedness here */
a0ed51b3
LW
4364 }
4365 else
4366 break;
4367 }
4368 if (off <= 0)
4369 return 0;
4370 }
4371 return s;
4372}
51371543 4373
51371543 4374static void
acfe0abc 4375restore_pos(pTHX_ void *arg)
51371543 4376{
51371543
GS
4377 if (PL_reg_eval_set) {
4378 if (PL_reg_oldsaved) {
4379 PL_reg_re->subbeg = PL_reg_oldsaved;
4380 PL_reg_re->sublen = PL_reg_oldsavedlen;
4381 RX_MATCH_COPIED_on(PL_reg_re);
4382 }
4383 PL_reg_magic->mg_len = PL_reg_oldpos;
4384 PL_reg_eval_set = 0;
4385 PL_curpm = PL_reg_oldcurpm;
4386 }
4387}