This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make that six wrongs.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded
JH
403 char *s = PL_reg_match_utf8 ?
404 sv_uni_display(dsv, sv, 60, 0) : strpos;
405 int len = PL_reg_match_utf8 ?
406 strlen(s) : strend - strpos;
2a782b5b
JH
407 if (!PL_colorset)
408 reginitcolors();
b8d68ded
JH
409 if (PL_reg_match_utf8)
410 DEBUG_r(PerlIO_printf(Perl_debug_log,
411 "UTF-8 target...\n"));
2a782b5b
JH
412 PerlIO_printf(Perl_debug_log,
413 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
414 PL_colors[4],PL_colors[5],PL_colors[0],
415 prog->precomp,
416 PL_colors[1],
417 (strlen(prog->precomp) > 60 ? "..." : ""),
418 PL_colors[0],
419 (int)(len > 60 ? 60 : len),
420 s, PL_colors[1],
421 (len > 60 ? "..." : "")
422 );
423 });
cad2e5aa 424
1aa99e6b 425 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
426 DEBUG_r(PerlIO_printf(Perl_debug_log,
427 "String too short... [re_intuit_start]\n"));
cad2e5aa 428 goto fail;
2c2d71f5 429 }
a1933d95 430 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 431 PL_regeol = strend;
653099ff 432 check = prog->check_substr;
2c2d71f5 433 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
434 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
435 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 436 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 437
7e25d62c
JH
438 if (!ml_anch) {
439 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
440 | ROPT_IMPLICIT)) /* not a real BOL */
441 /* SvCUR is not set on references: SvRV and SvPVX overlap */
442 && sv && !SvROK(sv)
443 && (strpos != strbeg)) {
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
445 goto fail;
446 }
447 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 448 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 449 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
450 I32 slen;
451
1aa99e6b 452 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
453 if (SvTAIL(check)) {
454 slen = SvCUR(check); /* >= 1 */
cad2e5aa 455
9041c2e3 456 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
457 || (strend - s == slen && strend[-1] != '\n')) {
458 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
459 goto fail_finish;
cad2e5aa
JH
460 }
461 /* Now should match s[0..slen-2] */
462 slen--;
653099ff 463 if (slen && (*SvPVX(check) != *s
cad2e5aa 464 || (slen > 1
653099ff 465 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
466 report_neq:
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
468 goto fail_finish;
469 }
cad2e5aa 470 }
653099ff
GS
471 else if (*SvPVX(check) != *s
472 || ((slen = SvCUR(check)) > 1
473 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
474 goto report_neq;
475 goto success_at_start;
7e25d62c 476 }
cad2e5aa 477 }
2c2d71f5 478 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 479 s = strpos;
2c2d71f5 480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 483 if (!ml_anch) {
653099ff
GS
484 I32 end = prog->check_offset_max + CHR_SVLEN(check)
485 - (SvTAIL(check) != 0);
1aa99e6b 486 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
487
488 if (end_shift < eshift)
489 end_shift = eshift;
490 }
cad2e5aa 491 }
2c2d71f5 492 else { /* Can match at random position */
cad2e5aa
JH
493 ml_anch = 0;
494 s = strpos;
2c2d71f5
JH
495 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
496 /* Should be nonnegative! */
497 end_shift = prog->minlen - start_shift -
653099ff 498 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
499 }
500
2c2d71f5 501#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 502 if (end_shift < 0)
6bbae5e6 503 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
504#endif
505
2c2d71f5
JH
506 restart:
507 /* Find a possible match in the region s..strend by looking for
508 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 509 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
510 I32 p = -1; /* Internal iterator of scream. */
511 I32 *pp = data ? data->scream_pos : &p;
512
2c2d71f5
JH
513 if (PL_screamfirst[BmRARE(check)] >= 0
514 || ( BmRARE(check) == '\n'
515 && (BmPREVIOUS(check) == SvCUR(check) - 1)
516 && SvTAIL(check) ))
9041c2e3 517 s = screaminstr(sv, check,
2c2d71f5 518 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 519 else
2c2d71f5 520 goto fail_finish;
cad2e5aa
JH
521 if (data)
522 *data->scream_olds = s;
523 }
f33976b4 524 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
525 s = fbm_instr((U8*)(s + start_shift),
526 (U8*)(strend - end_shift),
527 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 528 else
1aa99e6b
IH
529 s = fbm_instr(HOP3(s, start_shift, strend),
530 HOP3(strend, -end_shift, strbeg),
2c2d71f5 531 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
532
533 /* Update the count-of-usability, remove useless subpatterns,
534 unshift s. */
2c2d71f5
JH
535
536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
537 (s ? "Found" : "Did not find"),
538 ((check == prog->anchored_substr) ? "anchored" : "floating"),
539 PL_colors[0],
7b0972df
JH
540 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
541 SvPVX(check),
2c2d71f5
JH
542 PL_colors[1], (SvTAIL(check) ? "$" : ""),
543 (s ? " at offset " : "...\n") ) );
544
545 if (!s)
546 goto fail_finish;
547
6eb5f6b9
JH
548 check_at = s;
549
2c2d71f5 550 /* Finish the diagnostic message */
30944b6d 551 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
552
553 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
554 Start with the other substr.
555 XXXX no SCREAM optimization yet - and a very coarse implementation
556 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
557 *always* match. Probably should be marked during compile...
558 Probably it is right to do no SCREAM here...
559 */
560
561 if (prog->float_substr && prog->anchored_substr) {
30944b6d 562 /* Take into account the "other" substring. */
2c2d71f5
JH
563 /* XXXX May be hopelessly wrong for UTF... */
564 if (!other_last)
6eb5f6b9 565 other_last = strpos;
2c2d71f5 566 if (check == prog->float_substr) {
30944b6d
IZ
567 do_other_anchored:
568 {
1aa99e6b 569 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
570 char *s1 = s;
571
2c2d71f5
JH
572 t = s - prog->check_offset_max;
573 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
574 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 575 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 576 && t > strpos)))
30944b6d 577 /* EMPTY */;
2c2d71f5
JH
578 else
579 t = strpos;
1aa99e6b 580 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
581 if (t < other_last) /* These positions already checked */
582 t = other_last;
1aa99e6b 583 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
584 if (last < last1)
585 last1 = last;
586 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
587 /* On end-of-str: see comment below. */
588 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
589 HOP3(HOP3(last1, prog->anchored_offset, strend)
590 + SvCUR(prog->anchored_substr),
591 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
592 prog->anchored_substr,
593 PL_multiline ? FBMrf_MULTILINE : 0);
594 DEBUG_r(PerlIO_printf(Perl_debug_log,
595 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
596 (s ? "Found" : "Contradicts"),
597 PL_colors[0],
7b0972df
JH
598 (int)(SvCUR(prog->anchored_substr)
599 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
600 SvPVX(prog->anchored_substr),
601 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
602 if (!s) {
603 if (last1 >= last2) {
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
606 goto fail_finish;
607 }
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying floating at offset %ld...\n",
1aa99e6b
IH
610 (long)(HOP3c(s1, 1, strend) - i_strpos)));
611 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
612 s = HOP3c(last, 1, strend);
2c2d71f5
JH
613 goto restart;
614 }
615 else {
616 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 617 (long)(s - i_strpos)));
1aa99e6b
IH
618 t = HOP3c(s, -prog->anchored_offset, strbeg);
619 other_last = HOP3c(s, 1, strend);
30944b6d 620 s = s1;
2c2d71f5
JH
621 if (t == strpos)
622 goto try_at_start;
2c2d71f5
JH
623 goto try_at_offset;
624 }
30944b6d 625 }
2c2d71f5
JH
626 }
627 else { /* Take into account the floating substring. */
628 char *last, *last1;
629 char *s1 = s;
630
1aa99e6b
IH
631 t = HOP3c(s, -start_shift, strbeg);
632 last1 = last =
633 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
634 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
635 last = HOP3c(t, prog->float_max_offset, strend);
636 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
637 if (s < other_last)
638 s = other_last;
2c2d71f5
JH
639 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
640 /* fbm_instr() takes into account exact value of end-of-str
641 if the check is SvTAIL(ed). Since false positives are OK,
642 and end-of-str is not later than strend we are OK. */
643 s = fbm_instr((unsigned char*)s,
644 (unsigned char*)last + SvCUR(prog->float_substr)
645 - (SvTAIL(prog->float_substr)!=0),
646 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
647 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
7b0972df
JH
650 (int)(SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
652 SvPVX(prog->float_substr),
653 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
654 if (!s) {
655 if (last1 == last) {
656 DEBUG_r(PerlIO_printf(Perl_debug_log,
657 ", giving up...\n"));
658 goto fail_finish;
659 }
660 DEBUG_r(PerlIO_printf(Perl_debug_log,
661 ", trying anchored starting at offset %ld...\n",
30944b6d 662 (long)(s1 + 1 - i_strpos)));
803ff556 663 other_last = last;
1aa99e6b 664 s = HOP3c(t, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
803ff556 670 other_last = s; /* Fix this later. --Hugo */
30944b6d 671 s = s1;
2c2d71f5
JH
672 if (t == strpos)
673 goto try_at_start;
2c2d71f5
JH
674 goto try_at_offset;
675 }
676 }
cad2e5aa 677 }
2c2d71f5
JH
678
679 t = s - prog->check_offset_max;
2c2d71f5
JH
680 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
681 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
682 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
683 && t > strpos))) {
2c2d71f5
JH
684 /* Fixed substring is found far enough so that the match
685 cannot start at strpos. */
686 try_at_offset:
cad2e5aa 687 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
688 /* Eventually fbm_*() should handle this, but often
689 anchored_offset is not 0, so this check will not be wasted. */
690 /* XXXX In the code below we prefer to look for "^" even in
691 presence of anchored substrings. And we search even
692 beyond the found float position. These pessimizations
693 are historical artefacts only. */
694 find_anchor:
2c2d71f5 695 while (t < strend - prog->minlen) {
cad2e5aa 696 if (*t == '\n') {
4ee3650e 697 if (t < check_at - prog->check_offset_min) {
30944b6d 698 if (prog->anchored_substr) {
4ee3650e
GS
699 /* Since we moved from the found position,
700 we definitely contradict the found anchored
30944b6d
IZ
701 substr. Due to the above check we do not
702 contradict "check" substr.
703 Thus we can arrive here only if check substr
704 is float. Redo checking for "other"=="fixed".
705 */
9041c2e3 706 strpos = t + 1;
30944b6d
IZ
707 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
708 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
709 goto do_other_anchored;
710 }
4ee3650e
GS
711 /* We don't contradict the found floating substring. */
712 /* XXXX Why not check for STCLASS? */
cad2e5aa 713 s = t + 1;
2c2d71f5 714 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 715 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
716 goto set_useful;
717 }
4ee3650e
GS
718 /* Position contradicts check-string */
719 /* XXXX probably better to look for check-string
720 than for "\n", so one should lower the limit for t? */
721 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 722 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 723 other_last = strpos = s = t + 1;
cad2e5aa
JH
724 goto restart;
725 }
726 t++;
727 }
2c2d71f5
JH
728 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
729 PL_colors[0],PL_colors[1]));
730 goto fail_finish;
cad2e5aa 731 }
f5952150
GS
732 else {
733 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
734 PL_colors[0],PL_colors[1]));
735 }
cad2e5aa
JH
736 s = t;
737 set_useful:
2c2d71f5 738 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
739 }
740 else {
f5952150 741 /* The found string does not prohibit matching at strpos,
2c2d71f5 742 - no optimization of calling REx engine can be performed,
f5952150
GS
743 unless it was an MBOL and we are not after MBOL,
744 or a future STCLASS check will fail this. */
2c2d71f5
JH
745 try_at_start:
746 /* Even in this situation we may use MBOL flag if strpos is offset
747 wrt the start of the string. */
05b4157f 748 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 749 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
750 /* May be due to an implicit anchor of m{.*foo} */
751 && !(prog->reganch & ROPT_IMPLICIT))
752 {
cad2e5aa
JH
753 t = strpos;
754 goto find_anchor;
755 }
30944b6d 756 DEBUG_r( if (ml_anch)
f5952150
GS
757 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
758 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 759 );
2c2d71f5 760 success_at_start:
30944b6d 761 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 762 && prog->check_substr /* Could be deleted already */
cad2e5aa 763 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
764 && prog->check_substr == prog->float_substr)
765 {
cad2e5aa 766 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
768 SvREFCNT_dec(prog->check_substr);
769 prog->check_substr = Nullsv; /* disable */
770 prog->float_substr = Nullsv; /* clear */
5e39e1e5 771 check = Nullsv; /* abort */
cad2e5aa 772 s = strpos;
3cf5c195
IZ
773 /* XXXX This is a remnant of the old implementation. It
774 looks wasteful, since now INTUIT can use many
6eb5f6b9 775 other heuristics. */
cad2e5aa
JH
776 prog->reganch &= ~RE_USE_INTUIT;
777 }
778 else
779 s = strpos;
780 }
781
6eb5f6b9
JH
782 /* Last resort... */
783 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
784 if (prog->regstclass) {
785 /* minlen == 0 is possible if regstclass is \b or \B,
786 and the fixed substr is ''$.
787 Since minlen is already taken into account, s+1 is before strend;
788 accidentally, minlen >= 1 guaranties no false positives at s + 1
789 even for \b or \B. But (minlen? 1 : 0) below assumes that
790 regstclass does not come from lookahead... */
791 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
792 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 793 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 794 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 795 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 796 : 1);
6eb5f6b9 797 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
798 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
799 : (prog->float_substr
800 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
801 cl_l, strend)
802 : strend);
a1933d95 803 char *startpos = strbeg;
6eb5f6b9
JH
804
805 t = s;
76384e4a 806 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 807 PL_regdata = prog->data;
76384e4a
GS
808 PL_bostr = startpos;
809 }
f33976b4 810 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
811 if (!s) {
812#ifdef DEBUGGING
b7953727 813 char *what = 0;
6eb5f6b9
JH
814#endif
815 if (endpos == strend) {
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
66e933ab
GS
820 DEBUG_r( PerlIO_printf(Perl_debug_log,
821 "This position contradicts STCLASS...\n") );
653099ff
GS
822 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
823 goto fail;
6eb5f6b9
JH
824 /* Contradict one of substrings */
825 if (prog->anchored_substr) {
6eb5f6b9
JH
826 if (prog->anchored_substr == check) {
827 DEBUG_r( what = "anchored" );
828 hop_and_restart:
1aa99e6b 829 s = HOP3c(t, 1, strend);
66e933ab
GS
830 if (s + start_shift + end_shift > strend) {
831 /* XXXX Should be taken into account earlier? */
832 DEBUG_r( PerlIO_printf(Perl_debug_log,
833 "Could not match STCLASS...\n") );
834 goto fail;
835 }
5e39e1e5
HS
836 if (!check)
837 goto giveup;
6eb5f6b9 838 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 839 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
840 what, (long)(s + start_shift - i_strpos)) );
841 goto restart;
842 }
66e933ab 843 /* Have both, check_string is floating */
6eb5f6b9
JH
844 if (t + start_shift >= check_at) /* Contradicts floating=check */
845 goto retry_floating_check;
846 /* Recheck anchored substring, but not floating... */
9041c2e3 847 s = check_at;
5e39e1e5
HS
848 if (!check)
849 goto giveup;
6eb5f6b9 850 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 851 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
852 (long)(other_last - i_strpos)) );
853 goto do_other_anchored;
854 }
60e71179
GS
855 /* Another way we could have checked stclass at the
856 current position only: */
857 if (ml_anch) {
858 s = t = t + 1;
5e39e1e5
HS
859 if (!check)
860 goto giveup;
60e71179 861 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
862 "Looking for /%s^%s/m starting at offset %ld...\n",
863 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 864 goto try_at_offset;
66e933ab 865 }
60e71179
GS
866 if (!prog->float_substr) /* Could have been deleted */
867 goto fail;
6eb5f6b9
JH
868 /* Check is floating subtring. */
869 retry_floating_check:
870 t = check_at - start_shift;
871 DEBUG_r( what = "floating" );
872 goto hop_and_restart;
873 }
b7953727
JH
874 if (t != s) {
875 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 876 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
877 (long)(t - i_strpos), (long)(s - i_strpos))
878 );
879 }
880 else {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
882 "Does not contradict STCLASS...\n");
883 );
884 }
6eb5f6b9 885 }
5e39e1e5
HS
886 giveup:
887 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
888 PL_colors[4], (check ? "Guessed" : "Giving up"),
889 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 890 return s;
2c2d71f5
JH
891
892 fail_finish: /* Substring not found */
66e933ab
GS
893 if (prog->check_substr) /* could be removed already */
894 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 895 fail:
2c2d71f5 896 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
897 PL_colors[4],PL_colors[5]));
898 return Nullch;
899}
9661b544 900
6eb5f6b9 901/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
902STATIC char *
903S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 904{
6eb5f6b9
JH
905 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
906 char *m;
d8093b23
G
907 STRLEN ln;
908 unsigned int c1;
909 unsigned int c2;
6eb5f6b9
JH
910 char *e;
911 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 912 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 913
6eb5f6b9
JH
914 /* We know what class it must start with. */
915 switch (OP(c)) {
6eb5f6b9 916 case ANYOF:
a0ed51b3 917 while (s < strend) {
ffc61ed2 918 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
919 if (tmp && (norun || regtry(prog, s)))
920 goto got_it;
921 else
922 tmp = doevery;
a0ed51b3 923 }
6eb5f6b9
JH
924 else
925 tmp = 1;
ffc61ed2 926 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 927 }
6eb5f6b9 928 break;
f33976b4
DB
929 case CANY:
930 while (s < strend) {
931 if (tmp && (norun || regtry(prog, s)))
932 goto got_it;
933 else
934 tmp = doevery;
935 s++;
936 }
937 break;
6eb5f6b9
JH
938 case EXACTF:
939 m = STRING(c);
940 ln = STR_LEN(c);
1aa99e6b 941 if (UTF) {
a2a2844f 942 STRLEN ulen1, ulen2;
e7ae6809
JH
943 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
944 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
945
946 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
947 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
948
949 c1 = utf8_to_uvuni(tmpbuf1, 0);
950 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
951 }
952 else {
953 c1 = *(U8*)m;
954 c2 = PL_fold[c1];
955 }
6eb5f6b9
JH
956 goto do_exactf;
957 case EXACTFL:
958 m = STRING(c);
959 ln = STR_LEN(c);
d8093b23 960 c1 = *(U8*)m;
6eb5f6b9
JH
961 c2 = PL_fold_locale[c1];
962 do_exactf:
963 e = strend - ln;
b3c9acc1 964
6eb5f6b9
JH
965 if (norun && e < s)
966 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
967
968 if (do_utf8) {
969 STRLEN len;
cadb39a9
JH
970 /* The ibcmp_utf8() uses to_uni_fold() which is more
971 * correct folding for Unicode than using lowercase.
972 * However, it doesn't work quite fully since the folding
973 * is a one-to-many mapping and the regex optimizer is
974 * unaware of this, so it may throw out good matches.
975 * Fortunately, not getting this right is allowed
976 * for Unicode Regular Expression Support level 1,
977 * only one-to-one matching is required. --jhi */
09091399 978 if (c1 == c2) {
1aa99e6b 979 while (s <= e) {
9041c2e3 980 if ( utf8_to_uvchr((U8*)s, &len) == c1
bc517b45 981 && (ln == len ||
2ad154fe
JH
982 !ibcmp_utf8(s, do_utf8, (I32)(strend - s),
983 m, UTF, (I32)ln))
55da9344 984 && (norun || regtry(prog, s)) )
1aa99e6b
IH
985 goto got_it;
986 s += len;
987 }
09091399
JH
988 }
989 else {
1aa99e6b 990 while (s <= e) {
254ba52a
JH
991 U8 tmpbuf [UTF8_MAXLEN+1];
992 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
993 STRLEN foldlen;
9041c2e3 994 UV c = utf8_to_uvchr((U8*)s, &len);
254ba52a
JH
995 UV f;
996
997 uvchr_to_utf8(tmpbuf, c);
2ddfca77
JH
998 to_utf8_fold(tmpbuf, foldbuf, &foldlen);
999 f = utf8_to_uvchr(foldbuf, 0);
1000
880bd946
JH
1001 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1002 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1003 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
254ba52a 1004 if ( (c == c1 || c == c2 || f == c1 || f == c2)
92c9c481 1005 && (ln == len || ln == foldlen ||
2ad154fe
JH
1006 !ibcmp_utf8(s, do_utf8, (I32)(strend - s),
1007 m, UTF, (I32)ln))
55da9344 1008 && (norun || regtry(prog, s)) )
1aa99e6b
IH
1009 goto got_it;
1010 s += len;
1011 }
09091399 1012 }
1aa99e6b
IH
1013 }
1014 else {
1015 if (c1 == c2)
1016 while (s <= e) {
1017 if ( *(U8*)s == c1
1018 && (ln == 1 || !(OP(c) == EXACTF
1019 ? ibcmp(s, m, ln)
1020 : ibcmp_locale(s, m, ln)))
1021 && (norun || regtry(prog, s)) )
1022 goto got_it;
1023 s++;
1024 }
1025 else
1026 while (s <= e) {
1027 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1028 && (ln == 1 || !(OP(c) == EXACTF
1029 ? ibcmp(s, m, ln)
1030 : ibcmp_locale(s, m, ln)))
1031 && (norun || regtry(prog, s)) )
1032 goto got_it;
1033 s++;
1034 }
b3c9acc1
IZ
1035 }
1036 break;
bbce6d69 1037 case BOUNDL:
3280af22 1038 PL_reg_flags |= RF_tainted;
bbce6d69 1039 /* FALL THROUGH */
a0d0e21e 1040 case BOUND:
ffc61ed2 1041 if (do_utf8) {
12d33761 1042 if (s == PL_bostr)
ffc61ed2
JH
1043 tmp = '\n';
1044 else {
1aa99e6b 1045 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1046
0064a8a9
JH
1047 if (s > (char*)r)
1048 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1049 }
1050 tmp = ((OP(c) == BOUND ?
9041c2e3 1051 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1052 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1053 while (s < strend) {
1054 if (tmp == !(OP(c) == BOUND ?
3568d838 1055 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1056 isALNUM_LC_utf8((U8*)s)))
1057 {
1058 tmp = !tmp;
1059 if ((norun || regtry(prog, s)))
1060 goto got_it;
1061 }
1062 s += UTF8SKIP(s);
a687059c 1063 }
a0d0e21e 1064 }
667bb95a 1065 else {
12d33761 1066 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1067 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1068 while (s < strend) {
1069 if (tmp ==
1070 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1071 tmp = !tmp;
1072 if ((norun || regtry(prog, s)))
1073 goto got_it;
1074 }
1075 s++;
a0ed51b3 1076 }
a0ed51b3 1077 }
6eb5f6b9 1078 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1079 goto got_it;
1080 break;
bbce6d69 1081 case NBOUNDL:
3280af22 1082 PL_reg_flags |= RF_tainted;
bbce6d69 1083 /* FALL THROUGH */
a0d0e21e 1084 case NBOUND:
ffc61ed2 1085 if (do_utf8) {
12d33761 1086 if (s == PL_bostr)
ffc61ed2
JH
1087 tmp = '\n';
1088 else {
1aa99e6b 1089 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1090
0064a8a9
JH
1091 if (s > (char*)r)
1092 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1093 }
1094 tmp = ((OP(c) == NBOUND ?
9041c2e3 1095 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1096 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1097 while (s < strend) {
1098 if (tmp == !(OP(c) == NBOUND ?
3568d838 1099 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1100 isALNUM_LC_utf8((U8*)s)))
1101 tmp = !tmp;
1102 else if ((norun || regtry(prog, s)))
1103 goto got_it;
1104 s += UTF8SKIP(s);
1105 }
a0d0e21e 1106 }
667bb95a 1107 else {
12d33761 1108 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1109 tmp = ((OP(c) == NBOUND ?
1110 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1111 while (s < strend) {
1112 if (tmp ==
1113 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1114 tmp = !tmp;
1115 else if ((norun || regtry(prog, s)))
1116 goto got_it;
1117 s++;
1118 }
a0ed51b3 1119 }
6eb5f6b9 1120 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1121 goto got_it;
1122 break;
a0d0e21e 1123 case ALNUM:
ffc61ed2 1124 if (do_utf8) {
8269fa76 1125 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1126 while (s < strend) {
3568d838 1127 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1128 if (tmp && (norun || regtry(prog, s)))
1129 goto got_it;
1130 else
1131 tmp = doevery;
1132 }
bbce6d69 1133 else
ffc61ed2
JH
1134 tmp = 1;
1135 s += UTF8SKIP(s);
bbce6d69 1136 }
bbce6d69 1137 }
ffc61ed2
JH
1138 else {
1139 while (s < strend) {
1140 if (isALNUM(*s)) {
1141 if (tmp && (norun || regtry(prog, s)))
1142 goto got_it;
1143 else
1144 tmp = doevery;
1145 }
a0ed51b3 1146 else
ffc61ed2
JH
1147 tmp = 1;
1148 s++;
a0ed51b3 1149 }
a0ed51b3
LW
1150 }
1151 break;
bbce6d69 1152 case ALNUML:
3280af22 1153 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1154 if (do_utf8) {
1155 while (s < strend) {
1156 if (isALNUM_LC_utf8((U8*)s)) {
1157 if (tmp && (norun || regtry(prog, s)))
1158 goto got_it;
1159 else
1160 tmp = doevery;
1161 }
a687059c 1162 else
ffc61ed2
JH
1163 tmp = 1;
1164 s += UTF8SKIP(s);
a0d0e21e 1165 }
a0d0e21e 1166 }
ffc61ed2
JH
1167 else {
1168 while (s < strend) {
1169 if (isALNUM_LC(*s)) {
1170 if (tmp && (norun || regtry(prog, s)))
1171 goto got_it;
1172 else
1173 tmp = doevery;
1174 }
a0ed51b3 1175 else
ffc61ed2
JH
1176 tmp = 1;
1177 s++;
a0ed51b3 1178 }
a0ed51b3
LW
1179 }
1180 break;
a0d0e21e 1181 case NALNUM:
ffc61ed2 1182 if (do_utf8) {
8269fa76 1183 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1184 while (s < strend) {
3568d838 1185 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1186 if (tmp && (norun || regtry(prog, s)))
1187 goto got_it;
1188 else
1189 tmp = doevery;
1190 }
bbce6d69 1191 else
ffc61ed2
JH
1192 tmp = 1;
1193 s += UTF8SKIP(s);
bbce6d69 1194 }
bbce6d69 1195 }
ffc61ed2
JH
1196 else {
1197 while (s < strend) {
1198 if (!isALNUM(*s)) {
1199 if (tmp && (norun || regtry(prog, s)))
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
a0ed51b3 1204 else
ffc61ed2
JH
1205 tmp = 1;
1206 s++;
a0ed51b3 1207 }
a0ed51b3
LW
1208 }
1209 break;
bbce6d69 1210 case NALNUML:
3280af22 1211 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1212 if (do_utf8) {
1213 while (s < strend) {
1214 if (!isALNUM_LC_utf8((U8*)s)) {
1215 if (tmp && (norun || regtry(prog, s)))
1216 goto got_it;
1217 else
1218 tmp = doevery;
1219 }
a687059c 1220 else
ffc61ed2
JH
1221 tmp = 1;
1222 s += UTF8SKIP(s);
a687059c 1223 }
a0d0e21e 1224 }
ffc61ed2
JH
1225 else {
1226 while (s < strend) {
1227 if (!isALNUM_LC(*s)) {
1228 if (tmp && (norun || regtry(prog, s)))
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
a0ed51b3 1233 else
ffc61ed2
JH
1234 tmp = 1;
1235 s++;
a0ed51b3 1236 }
a0ed51b3
LW
1237 }
1238 break;
a0d0e21e 1239 case SPACE:
ffc61ed2 1240 if (do_utf8) {
8269fa76 1241 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1242 while (s < strend) {
3568d838 1243 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1244 if (tmp && (norun || regtry(prog, s)))
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
a0d0e21e 1249 else
ffc61ed2
JH
1250 tmp = 1;
1251 s += UTF8SKIP(s);
2304df62 1252 }
a0d0e21e 1253 }
ffc61ed2
JH
1254 else {
1255 while (s < strend) {
1256 if (isSPACE(*s)) {
1257 if (tmp && (norun || regtry(prog, s)))
1258 goto got_it;
1259 else
1260 tmp = doevery;
1261 }
a0ed51b3 1262 else
ffc61ed2
JH
1263 tmp = 1;
1264 s++;
a0ed51b3 1265 }
a0ed51b3
LW
1266 }
1267 break;
bbce6d69 1268 case SPACEL:
3280af22 1269 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1270 if (do_utf8) {
1271 while (s < strend) {
1272 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1273 if (tmp && (norun || regtry(prog, s)))
1274 goto got_it;
1275 else
1276 tmp = doevery;
1277 }
bbce6d69 1278 else
ffc61ed2
JH
1279 tmp = 1;
1280 s += UTF8SKIP(s);
bbce6d69 1281 }
bbce6d69 1282 }
ffc61ed2
JH
1283 else {
1284 while (s < strend) {
1285 if (isSPACE_LC(*s)) {
1286 if (tmp && (norun || regtry(prog, s)))
1287 goto got_it;
1288 else
1289 tmp = doevery;
1290 }
a0ed51b3 1291 else
ffc61ed2
JH
1292 tmp = 1;
1293 s++;
a0ed51b3 1294 }
a0ed51b3
LW
1295 }
1296 break;
a0d0e21e 1297 case NSPACE:
ffc61ed2 1298 if (do_utf8) {
8269fa76 1299 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1300 while (s < strend) {
3568d838 1301 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1302 if (tmp && (norun || regtry(prog, s)))
1303 goto got_it;
1304 else
1305 tmp = doevery;
1306 }
a0d0e21e 1307 else
ffc61ed2
JH
1308 tmp = 1;
1309 s += UTF8SKIP(s);
a687059c 1310 }
a0d0e21e 1311 }
ffc61ed2
JH
1312 else {
1313 while (s < strend) {
1314 if (!isSPACE(*s)) {
1315 if (tmp && (norun || regtry(prog, s)))
1316 goto got_it;
1317 else
1318 tmp = doevery;
1319 }
a0ed51b3 1320 else
ffc61ed2
JH
1321 tmp = 1;
1322 s++;
a0ed51b3 1323 }
a0ed51b3
LW
1324 }
1325 break;
bbce6d69 1326 case NSPACEL:
3280af22 1327 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1328 if (do_utf8) {
1329 while (s < strend) {
1330 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1331 if (tmp && (norun || regtry(prog, s)))
1332 goto got_it;
1333 else
1334 tmp = doevery;
1335 }
bbce6d69 1336 else
ffc61ed2
JH
1337 tmp = 1;
1338 s += UTF8SKIP(s);
bbce6d69 1339 }
bbce6d69 1340 }
ffc61ed2
JH
1341 else {
1342 while (s < strend) {
1343 if (!isSPACE_LC(*s)) {
1344 if (tmp && (norun || regtry(prog, s)))
1345 goto got_it;
1346 else
1347 tmp = doevery;
1348 }
a0ed51b3 1349 else
ffc61ed2
JH
1350 tmp = 1;
1351 s++;
a0ed51b3 1352 }
a0ed51b3
LW
1353 }
1354 break;
a0d0e21e 1355 case DIGIT:
ffc61ed2 1356 if (do_utf8) {
8269fa76 1357 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1358 while (s < strend) {
3568d838 1359 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1360 if (tmp && (norun || regtry(prog, s)))
1361 goto got_it;
1362 else
1363 tmp = doevery;
1364 }
a0d0e21e 1365 else
ffc61ed2
JH
1366 tmp = 1;
1367 s += UTF8SKIP(s);
2b69d0c2 1368 }
a0d0e21e 1369 }
ffc61ed2
JH
1370 else {
1371 while (s < strend) {
1372 if (isDIGIT(*s)) {
1373 if (tmp && (norun || regtry(prog, s)))
1374 goto got_it;
1375 else
1376 tmp = doevery;
1377 }
a0ed51b3 1378 else
ffc61ed2
JH
1379 tmp = 1;
1380 s++;
a0ed51b3 1381 }
a0ed51b3
LW
1382 }
1383 break;
b8c5462f
JH
1384 case DIGITL:
1385 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1386 if (do_utf8) {
1387 while (s < strend) {
1388 if (isDIGIT_LC_utf8((U8*)s)) {
1389 if (tmp && (norun || regtry(prog, s)))
1390 goto got_it;
1391 else
1392 tmp = doevery;
1393 }
b8c5462f 1394 else
ffc61ed2
JH
1395 tmp = 1;
1396 s += UTF8SKIP(s);
b8c5462f 1397 }
b8c5462f 1398 }
ffc61ed2
JH
1399 else {
1400 while (s < strend) {
1401 if (isDIGIT_LC(*s)) {
1402 if (tmp && (norun || regtry(prog, s)))
1403 goto got_it;
1404 else
1405 tmp = doevery;
1406 }
b8c5462f 1407 else
ffc61ed2
JH
1408 tmp = 1;
1409 s++;
b8c5462f 1410 }
b8c5462f
JH
1411 }
1412 break;
a0d0e21e 1413 case NDIGIT:
ffc61ed2 1414 if (do_utf8) {
8269fa76 1415 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1416 while (s < strend) {
3568d838 1417 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1418 if (tmp && (norun || regtry(prog, s)))
1419 goto got_it;
1420 else
1421 tmp = doevery;
1422 }
a0d0e21e 1423 else
ffc61ed2
JH
1424 tmp = 1;
1425 s += UTF8SKIP(s);
a687059c 1426 }
a0d0e21e 1427 }
ffc61ed2
JH
1428 else {
1429 while (s < strend) {
1430 if (!isDIGIT(*s)) {
1431 if (tmp && (norun || regtry(prog, s)))
1432 goto got_it;
1433 else
1434 tmp = doevery;
1435 }
a0ed51b3 1436 else
ffc61ed2
JH
1437 tmp = 1;
1438 s++;
a0ed51b3 1439 }
a0ed51b3
LW
1440 }
1441 break;
b8c5462f
JH
1442 case NDIGITL:
1443 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1444 if (do_utf8) {
1445 while (s < strend) {
1446 if (!isDIGIT_LC_utf8((U8*)s)) {
1447 if (tmp && (norun || regtry(prog, s)))
1448 goto got_it;
1449 else
1450 tmp = doevery;
1451 }
b8c5462f 1452 else
ffc61ed2
JH
1453 tmp = 1;
1454 s += UTF8SKIP(s);
b8c5462f 1455 }
a0ed51b3 1456 }
ffc61ed2
JH
1457 else {
1458 while (s < strend) {
1459 if (!isDIGIT_LC(*s)) {
1460 if (tmp && (norun || regtry(prog, s)))
1461 goto got_it;
1462 else
1463 tmp = doevery;
1464 }
cf93c79d 1465 else
ffc61ed2
JH
1466 tmp = 1;
1467 s++;
b8c5462f 1468 }
b8c5462f
JH
1469 }
1470 break;
b3c9acc1 1471 default:
3c3eec57
GS
1472 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1473 break;
d6a28714 1474 }
6eb5f6b9
JH
1475 return 0;
1476 got_it:
1477 return s;
1478}
1479
1480/*
1481 - regexec_flags - match a regexp against a string
1482 */
1483I32
1484Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1485 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1486/* strend: pointer to null at end of string */
1487/* strbeg: real beginning of string */
1488/* minend: end of match must be >=minend after stringarg. */
1489/* data: May be used for some additional optimizations. */
1490/* nosave: For optimizations. */
1491{
6eb5f6b9
JH
1492 register char *s;
1493 register regnode *c;
1494 register char *startpos = stringarg;
6eb5f6b9
JH
1495 I32 minlen; /* must match at least this many chars */
1496 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1497 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1498 constant substr. */ /* CC */
1499 I32 end_shift = 0; /* Same for the end. */ /* CC */
1500 I32 scream_pos = -1; /* Internal iterator of scream. */
1501 char *scream_olds;
1502 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1503 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1504#ifdef DEBUGGING
ce333219 1505 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
2a782b5b 1506#endif
6eb5f6b9
JH
1507
1508 PL_regcc = 0;
1509
1510 cache_re(prog);
1511#ifdef DEBUGGING
aea4f609 1512 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1513#endif
1514
1515 /* Be paranoid... */
1516 if (prog == NULL || startpos == NULL) {
1517 Perl_croak(aTHX_ "NULL regexp parameter");
1518 return 0;
1519 }
1520
1521 minlen = prog->minlen;
a72c7584
JH
1522 if (strend - startpos < minlen) {
1523 DEBUG_r(PerlIO_printf(Perl_debug_log,
1524 "String too short [regexec_flags]...\n"));
1525 goto phooey;
1aa99e6b 1526 }
6eb5f6b9 1527
6eb5f6b9
JH
1528 /* Check validity of program. */
1529 if (UCHARAT(prog->program) != REG_MAGIC) {
1530 Perl_croak(aTHX_ "corrupted regexp program");
1531 }
1532
1533 PL_reg_flags = 0;
1534 PL_reg_eval_set = 0;
1535 PL_reg_maxiter = 0;
1536
1537 if (prog->reganch & ROPT_UTF8)
1538 PL_reg_flags |= RF_utf8;
1539
1540 /* Mark beginning of line for ^ and lookbehind. */
1541 PL_regbol = startpos;
1542 PL_bostr = strbeg;
1543 PL_reg_sv = sv;
1544
1545 /* Mark end of line for $ (and such) */
1546 PL_regeol = strend;
1547
1548 /* see how far we have to get to not match where we matched before */
1549 PL_regtill = startpos+minend;
1550
1551 /* We start without call_cc context. */
1552 PL_reg_call_cc = 0;
1553
1554 /* If there is a "must appear" string, look for it. */
1555 s = startpos;
1556
1557 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1558 MAGIC *mg;
1559
1560 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1561 PL_reg_ganch = startpos;
1562 else if (sv && SvTYPE(sv) >= SVt_PVMG
1563 && SvMAGIC(sv)
14befaf4
DM
1564 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1565 && mg->mg_len >= 0) {
6eb5f6b9
JH
1566 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1567 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1568 if (s > PL_reg_ganch)
6eb5f6b9
JH
1569 goto phooey;
1570 s = PL_reg_ganch;
1571 }
1572 }
1573 else /* pos() not defined */
1574 PL_reg_ganch = strbeg;
1575 }
1576
699c3c34
JH
1577 if (do_utf8 == (UTF!=0) &&
1578 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1579 re_scream_pos_data d;
1580
1581 d.scream_olds = &scream_olds;
1582 d.scream_pos = &scream_pos;
1583 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1584 if (!s) {
1585 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1586 goto phooey; /* not present */
3fa9c3d7 1587 }
6eb5f6b9
JH
1588 }
1589
2a782b5b 1590 DEBUG_r({
df1ffd02
JH
1591 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1592 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1593 if (!PL_colorset)
1594 reginitcolors();
1595 PerlIO_printf(Perl_debug_log,
1596 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1597 PL_colors[4],PL_colors[5],PL_colors[0],
1598 prog->precomp,
1599 PL_colors[1],
1600 (strlen(prog->precomp) > 60 ? "..." : ""),
1601 PL_colors[0],
1602 (int)(len > 60 ? 60 : len),
1603 s, PL_colors[1],
1604 (len > 60 ? "..." : "")
1605 );
1606 });
6eb5f6b9
JH
1607
1608 /* Simplest case: anchored match need be tried only once. */
1609 /* [unless only anchor is BOL and multiline is set] */
1610 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1611 if (s == startpos && regtry(prog, startpos))
1612 goto got_it;
1613 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1614 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1615 {
1616 char *end;
1617
1618 if (minlen)
1619 dontbother = minlen - 1;
1aa99e6b 1620 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1621 /* for multiline we only have to try after newlines */
1622 if (prog->check_substr) {
1623 if (s == startpos)
1624 goto after_try;
1625 while (1) {
1626 if (regtry(prog, s))
1627 goto got_it;
1628 after_try:
1629 if (s >= end)
1630 goto phooey;
1631 if (prog->reganch & RE_USE_INTUIT) {
1632 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1633 if (!s)
1634 goto phooey;
1635 }
1636 else
1637 s++;
1638 }
1639 } else {
1640 if (s > startpos)
1641 s--;
1642 while (s < end) {
1643 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1644 if (regtry(prog, s))
1645 goto got_it;
1646 }
1647 }
1648 }
1649 }
1650 goto phooey;
1651 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1652 if (regtry(prog, PL_reg_ganch))
1653 goto got_it;
1654 goto phooey;
1655 }
1656
1657 /* Messy cases: unanchored match. */
9041c2e3 1658 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1659 /* we have /x+whatever/ */
1660 /* it must be a one character string (XXXX Except UTF?) */
1661 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1662#ifdef DEBUGGING
1663 int did_match = 0;
1664#endif
1665
1aa99e6b 1666 if (do_utf8) {
6eb5f6b9
JH
1667 while (s < strend) {
1668 if (*s == ch) {
bf93d4cc 1669 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1670 if (regtry(prog, s)) goto got_it;
1671 s += UTF8SKIP(s);
1672 while (s < strend && *s == ch)
1673 s += UTF8SKIP(s);
1674 }
1675 s += UTF8SKIP(s);
1676 }
1677 }
1678 else {
1679 while (s < strend) {
1680 if (*s == ch) {
bf93d4cc 1681 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1682 if (regtry(prog, s)) goto got_it;
1683 s++;
1684 while (s < strend && *s == ch)
1685 s++;
1686 }
1687 s++;
1688 }
1689 }
b7953727 1690 DEBUG_r(if (!did_match)
bf93d4cc 1691 PerlIO_printf(Perl_debug_log,
b7953727
JH
1692 "Did not find anchored character...\n")
1693 );
6eb5f6b9
JH
1694 }
1695 /*SUPPRESS 560*/
1aa99e6b
IH
1696 else if (do_utf8 == (UTF!=0) &&
1697 (prog->anchored_substr != Nullsv
9041c2e3 1698 || (prog->float_substr != Nullsv
1aa99e6b 1699 && prog->float_max_offset < strend - s))) {
9041c2e3 1700 SV *must = prog->anchored_substr
6eb5f6b9 1701 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1702 I32 back_max =
6eb5f6b9 1703 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1704 I32 back_min =
6eb5f6b9 1705 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1706 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1707 -(I32)(CHR_SVLEN(must)
1aa99e6b 1708 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1709 char *last1; /* Last position checked before */
bf93d4cc
GS
1710#ifdef DEBUGGING
1711 int did_match = 0;
1712#endif
6eb5f6b9
JH
1713
1714 if (s > PL_bostr)
1715 last1 = HOPc(s, -1);
1716 else
1717 last1 = s - 1; /* bogus */
1718
1719 /* XXXX check_substr already used to find `s', can optimize if
1720 check_substr==must. */
1721 scream_pos = -1;
1722 dontbother = end_shift;
1723 strend = HOPc(strend, -dontbother);
1724 while ( (s <= last) &&
9041c2e3 1725 ((flags & REXEC_SCREAM)
1aa99e6b 1726 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1727 end_shift, &scream_pos, 0))
1aa99e6b 1728 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1729 (unsigned char*)strend, must,
6eb5f6b9 1730 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1731 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1732 if (HOPc(s, -back_max) > last1) {
1733 last1 = HOPc(s, -back_min);
1734 s = HOPc(s, -back_max);
1735 }
1736 else {
1737 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1738
1739 last1 = HOPc(s, -back_min);
1740 s = t;
1741 }
1aa99e6b 1742 if (do_utf8) {
6eb5f6b9
JH
1743 while (s <= last1) {
1744 if (regtry(prog, s))
1745 goto got_it;
1746 s += UTF8SKIP(s);
1747 }
1748 }
1749 else {
1750 while (s <= last1) {
1751 if (regtry(prog, s))
1752 goto got_it;
1753 s++;
1754 }
1755 }
1756 }
b7953727
JH
1757 DEBUG_r(if (!did_match)
1758 PerlIO_printf(Perl_debug_log,
1759 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1760 ((must == prog->anchored_substr)
1761 ? "anchored" : "floating"),
1762 PL_colors[0],
1763 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1764 SvPVX(must),
b7953727
JH
1765 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1766 );
6eb5f6b9
JH
1767 goto phooey;
1768 }
155aba94 1769 else if ((c = prog->regstclass)) {
66e933ab
GS
1770 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1771 /* don't bother with what can't match */
6eb5f6b9 1772 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1773 DEBUG_r({
1774 SV *prop = sv_newmortal();
1775 regprop(prop, c);
2a782b5b 1776 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1777 });
6eb5f6b9
JH
1778 if (find_byclass(prog, c, s, strend, startpos, 0))
1779 goto got_it;
bf93d4cc 1780 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1781 }
1782 else {
1783 dontbother = 0;
1784 if (prog->float_substr != Nullsv) { /* Trim the end. */
1785 char *last;
d6a28714
JH
1786
1787 if (flags & REXEC_SCREAM) {
1788 last = screaminstr(sv, prog->float_substr, s - strbeg,
1789 end_shift, &scream_pos, 1); /* last one */
1790 if (!last)
ffc61ed2 1791 last = scream_olds; /* Only one occurrence. */
b8c5462f 1792 }
d6a28714
JH
1793 else {
1794 STRLEN len;
1795 char *little = SvPV(prog->float_substr, len);
1796
1797 if (SvTAIL(prog->float_substr)) {
1798 if (memEQ(strend - len + 1, little, len - 1))
1799 last = strend - len + 1;
1800 else if (!PL_multiline)
9041c2e3 1801 last = memEQ(strend - len, little, len)
d6a28714 1802 ? strend - len : Nullch;
b8c5462f 1803 else
d6a28714
JH
1804 goto find_last;
1805 } else {
1806 find_last:
9041c2e3 1807 if (len)
d6a28714 1808 last = rninstr(s, strend, little, little + len);
b8c5462f 1809 else
d6a28714 1810 last = strend; /* matching `$' */
b8c5462f 1811 }
b8c5462f 1812 }
bf93d4cc
GS
1813 if (last == NULL) {
1814 DEBUG_r(PerlIO_printf(Perl_debug_log,
1815 "%sCan't trim the tail, match fails (should not happen)%s\n",
1816 PL_colors[4],PL_colors[5]));
1817 goto phooey; /* Should not happen! */
1818 }
d6a28714
JH
1819 dontbother = strend - last + prog->float_min_offset;
1820 }
1821 if (minlen && (dontbother < minlen))
1822 dontbother = minlen - 1;
1823 strend -= dontbother; /* this one's always in bytes! */
1824 /* We don't know much -- general case. */
1aa99e6b 1825 if (do_utf8) {
d6a28714
JH
1826 for (;;) {
1827 if (regtry(prog, s))
1828 goto got_it;
1829 if (s >= strend)
1830 break;
b8c5462f 1831 s += UTF8SKIP(s);
d6a28714
JH
1832 };
1833 }
1834 else {
1835 do {
1836 if (regtry(prog, s))
1837 goto got_it;
1838 } while (s++ < strend);
1839 }
1840 }
1841
1842 /* Failure. */
1843 goto phooey;
1844
1845got_it:
1846 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1847
1848 if (PL_reg_eval_set) {
1849 /* Preserve the current value of $^R */
1850 if (oreplsv != GvSV(PL_replgv))
1851 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1852 restored, the value remains
1853 the same. */
acfe0abc 1854 restore_pos(aTHX_ 0);
d6a28714
JH
1855 }
1856
1857 /* make sure $`, $&, $', and $digit will work later */
1858 if ( !(flags & REXEC_NOT_FIRST) ) {
1859 if (RX_MATCH_COPIED(prog)) {
1860 Safefree(prog->subbeg);
1861 RX_MATCH_COPIED_off(prog);
1862 }
1863 if (flags & REXEC_COPY_STR) {
1864 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1865
1866 s = savepvn(strbeg, i);
1867 prog->subbeg = s;
1868 prog->sublen = i;
1869 RX_MATCH_COPIED_on(prog);
1870 }
1871 else {
1872 prog->subbeg = strbeg;
1873 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1874 }
1875 }
9041c2e3 1876
d6a28714
JH
1877 return 1;
1878
1879phooey:
bf93d4cc
GS
1880 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1881 PL_colors[4],PL_colors[5]));
d6a28714 1882 if (PL_reg_eval_set)
acfe0abc 1883 restore_pos(aTHX_ 0);
d6a28714
JH
1884 return 0;
1885}
1886
1887/*
1888 - regtry - try match at specific point
1889 */
1890STATIC I32 /* 0 failure, 1 success */
1891S_regtry(pTHX_ regexp *prog, char *startpos)
1892{
d6a28714
JH
1893 register I32 i;
1894 register I32 *sp;
1895 register I32 *ep;
1896 CHECKPOINT lastcp;
1897
02db2b7b
IZ
1898#ifdef DEBUGGING
1899 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1900#endif
d6a28714
JH
1901 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1902 MAGIC *mg;
1903
1904 PL_reg_eval_set = RS_init;
1905 DEBUG_r(DEBUG_s(
b900a521
JH
1906 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1907 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1908 ));
e8347627 1909 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1910 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1911 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1912 SAVETMPS;
1913 /* Apparently this is not needed, judging by wantarray. */
e8347627 1914 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1915 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1916
1917 if (PL_reg_sv) {
1918 /* Make $_ available to executed code. */
1919 if (PL_reg_sv != DEFSV) {
4d1ff10f 1920 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1921 SAVESPTR(DEFSV);
1922 DEFSV = PL_reg_sv;
b8c5462f 1923 }
d6a28714 1924
9041c2e3 1925 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1926 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1927 /* prepare for quick setting of pos */
14befaf4
DM
1928 sv_magic(PL_reg_sv, (SV*)0,
1929 PERL_MAGIC_regex_global, Nullch, 0);
1930 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1931 mg->mg_len = -1;
b8c5462f 1932 }
d6a28714
JH
1933 PL_reg_magic = mg;
1934 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1935 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1936 }
09687e5a 1937 if (!PL_reg_curpm) {
0f79a09d 1938 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1939#ifdef USE_ITHREADS
1940 {
1941 SV* repointer = newSViv(0);
577e12cc 1942 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1943 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1944 av_push(PL_regex_padav,repointer);
1945 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1946 PL_regex_pad = AvARRAY(PL_regex_padav);
1947 }
1948#endif
1949 }
aaa362c4 1950 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1951 PL_reg_oldcurpm = PL_curpm;
1952 PL_curpm = PL_reg_curpm;
1953 if (RX_MATCH_COPIED(prog)) {
1954 /* Here is a serious problem: we cannot rewrite subbeg,
1955 since it may be needed if this match fails. Thus
1956 $` inside (?{}) could fail... */
1957 PL_reg_oldsaved = prog->subbeg;
1958 PL_reg_oldsavedlen = prog->sublen;
1959 RX_MATCH_COPIED_off(prog);
1960 }
1961 else
1962 PL_reg_oldsaved = Nullch;
1963 prog->subbeg = PL_bostr;
1964 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1965 }
1966 prog->startp[0] = startpos - PL_bostr;
1967 PL_reginput = startpos;
1968 PL_regstartp = prog->startp;
1969 PL_regendp = prog->endp;
1970 PL_reglastparen = &prog->lastparen;
a01268b5 1971 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1972 prog->lastparen = 0;
1973 PL_regsize = 0;
1974 DEBUG_r(PL_reg_starttry = startpos);
1975 if (PL_reg_start_tmpl <= prog->nparens) {
1976 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1977 if(PL_reg_start_tmp)
1978 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1979 else
1980 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1981 }
1982
128e8167
JH
1983#ifdef DEBUGGING
1984 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
1985 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
1986 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
1987#endif
1988
d6a28714
JH
1989 /* XXXX What this code is doing here?!!! There should be no need
1990 to do this again and again, PL_reglastparen should take care of
3dd2943c 1991 this! --ilya*/
dafc8851
JH
1992
1993 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1994 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1995 * PL_reglastparen), is not needed at all by the test suite
1996 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1997 * enough, for building DynaLoader, or otherwise this
1998 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1999 * will happen. Meanwhile, this code *is* needed for the
2000 * above-mentioned test suite tests to succeed. The common theme
2001 * on those tests seems to be returning null fields from matches.
2002 * --jhi */
dafc8851 2003#if 1
d6a28714
JH
2004 sp = prog->startp;
2005 ep = prog->endp;
2006 if (prog->nparens) {
09e8ae3b 2007 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2008 *++sp = -1;
2009 *++ep = -1;
2010 }
2011 }
dafc8851 2012#endif
02db2b7b 2013 REGCP_SET(lastcp);
d6a28714
JH
2014 if (regmatch(prog->program + 1)) {
2015 prog->endp[0] = PL_reginput - PL_bostr;
2016 return 1;
2017 }
02db2b7b 2018 REGCP_UNWIND(lastcp);
d6a28714
JH
2019 return 0;
2020}
2021
02db2b7b
IZ
2022#define RE_UNWIND_BRANCH 1
2023#define RE_UNWIND_BRANCHJ 2
2024
2025union re_unwind_t;
2026
2027typedef struct { /* XX: makes sense to enlarge it... */
2028 I32 type;
2029 I32 prev;
2030 CHECKPOINT lastcp;
2031} re_unwind_generic_t;
2032
2033typedef struct {
2034 I32 type;
2035 I32 prev;
2036 CHECKPOINT lastcp;
2037 I32 lastparen;
2038 regnode *next;
2039 char *locinput;
2040 I32 nextchr;
2041#ifdef DEBUGGING
2042 int regindent;
2043#endif
2044} re_unwind_branch_t;
2045
2046typedef union re_unwind_t {
2047 I32 type;
2048 re_unwind_generic_t generic;
2049 re_unwind_branch_t branch;
2050} re_unwind_t;
2051
8ba1375e
MJD
2052#define sayYES goto yes
2053#define sayNO goto no
2054#define sayYES_FINAL goto yes_final
2055#define sayYES_LOUD goto yes_loud
2056#define sayNO_FINAL goto no_final
2057#define sayNO_SILENT goto do_no
2058#define saySAME(x) if (x) goto yes; else goto no
2059
2060#define REPORT_CODE_OFF 24
2061
d6a28714
JH
2062/*
2063 - regmatch - main matching routine
2064 *
2065 * Conceptually the strategy is simple: check to see whether the current
2066 * node matches, call self recursively to see whether the rest matches,
2067 * and then act accordingly. In practice we make some effort to avoid
2068 * recursion, in particular by going through "ordinary" nodes (that don't
2069 * need to know whether the rest of the match failed) by a loop instead of
2070 * by recursion.
2071 */
2072/* [lwall] I've hoisted the register declarations to the outer block in order to
2073 * maybe save a little bit of pushing and popping on the stack. It also takes
2074 * advantage of machines that use a register save mask on subroutine entry.
2075 */
2076STATIC I32 /* 0 failure, 1 success */
2077S_regmatch(pTHX_ regnode *prog)
2078{
d6a28714
JH
2079 register regnode *scan; /* Current node. */
2080 regnode *next; /* Next node. */
2081 regnode *inner; /* Next node in internal branch. */
2082 register I32 nextchr; /* renamed nextchr - nextchar colides with
2083 function of same name */
2084 register I32 n; /* no or next */
b7953727
JH
2085 register I32 ln = 0; /* len or last */
2086 register char *s = Nullch; /* operand or save */
d6a28714 2087 register char *locinput = PL_reginput;
b7953727 2088 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2089 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2090 I32 unwind = 0;
b7953727 2091#if 0
02db2b7b 2092 I32 firstcp = PL_savestack_ix;
b7953727 2093#endif
53c4c00c 2094 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2095#ifdef DEBUGGING
ce333219
JH
2096 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2097 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2098 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2099#endif
02db2b7b 2100
d6a28714
JH
2101#ifdef DEBUGGING
2102 PL_regindent++;
2103#endif
2104
2105 /* Note that nextchr is a byte even in UTF */
2106 nextchr = UCHARAT(locinput);
2107 scan = prog;
2108 while (scan != NULL) {
8ba1375e 2109
2a782b5b 2110 DEBUG_r( {
d6a28714
JH
2111 SV *prop = sv_newmortal();
2112 int docolor = *PL_colors[0];
2113 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2114 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2115 /* The part of the string before starttry has one color
2116 (pref0_len chars), between starttry and current
2117 position another one (pref_len - pref0_len chars),
2118 after the current position the third one.
2119 We assume that pref0_len <= pref_len, otherwise we
2120 decrease pref0_len. */
9041c2e3 2121 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2122 ? (5 + taill) - l : locinput - PL_bostr;
2123 int pref0_len;
d6a28714 2124
df1ffd02 2125 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2126 pref_len++;
2127 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2128 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2129 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2130 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2131 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2132 l--;
d6a28714
JH
2133 if (pref0_len < 0)
2134 pref0_len = 0;
2135 if (pref0_len > pref_len)
2136 pref0_len = pref_len;
2137 regprop(prop, scan);
2a782b5b
JH
2138 {
2139 char *s0 =
df1ffd02 2140 do_utf8 ?
2a782b5b
JH
2141 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2142 pref0_len, 60, 0) :
2143 locinput - pref_len;
df1ffd02
JH
2144 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2145 char *s1 = do_utf8 ?
2a782b5b
JH
2146 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2147 pref_len - pref0_len, 60, 0) :
2148 locinput - pref_len + pref0_len;
df1ffd02
JH
2149 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2150 char *s2 = do_utf8 ?
2a782b5b
JH
2151 pv_uni_display(dsv2, (U8*)locinput,
2152 PL_regeol - locinput, 60, 0) :
2153 locinput;
df1ffd02 2154 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2155 PerlIO_printf(Perl_debug_log,
2156 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2157 (IV)(locinput - PL_bostr),
2158 PL_colors[4],
2159 len0, s0,
2160 PL_colors[5],
2161 PL_colors[2],
2162 len1, s1,
2163 PL_colors[3],
2164 (docolor ? "" : "> <"),
2165 PL_colors[0],
2166 len2, s2,
2167 PL_colors[1],
2168 15 - l - pref_len + 1,
2169 "",
2170 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2171 SvPVX(prop));
2172 }
2173 });
d6a28714
JH
2174
2175 next = scan + NEXT_OFF(scan);
2176 if (next == scan)
2177 next = NULL;
2178
2179 switch (OP(scan)) {
2180 case BOL:
12d33761
HS
2181 if (locinput == PL_bostr || (PL_multiline &&
2182 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2183 {
2184 /* regtill = regbol; */
b8c5462f
JH
2185 break;
2186 }
d6a28714
JH
2187 sayNO;
2188 case MBOL:
12d33761
HS
2189 if (locinput == PL_bostr ||
2190 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2191 {
b8c5462f
JH
2192 break;
2193 }
d6a28714
JH
2194 sayNO;
2195 case SBOL:
c2a73568 2196 if (locinput == PL_bostr)
b8c5462f 2197 break;
d6a28714
JH
2198 sayNO;
2199 case GPOS:
2200 if (locinput == PL_reg_ganch)
2201 break;
2202 sayNO;
2203 case EOL:
2204 if (PL_multiline)
2205 goto meol;
2206 else
2207 goto seol;
2208 case MEOL:
2209 meol:
2210 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2211 sayNO;
b8c5462f 2212 break;
d6a28714
JH
2213 case SEOL:
2214 seol:
2215 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2216 sayNO;
d6a28714 2217 if (PL_regeol - locinput > 1)
b8c5462f 2218 sayNO;
b8c5462f 2219 break;
d6a28714
JH
2220 case EOS:
2221 if (PL_regeol != locinput)
b8c5462f 2222 sayNO;
d6a28714 2223 break;
ffc61ed2 2224 case SANY:
d6a28714 2225 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2226 sayNO;
f33976b4
DB
2227 if (do_utf8) {
2228 locinput += PL_utf8skip[nextchr];
2229 if (locinput > PL_regeol)
2230 sayNO;
2231 nextchr = UCHARAT(locinput);
2232 }
2233 else
2234 nextchr = UCHARAT(++locinput);
2235 break;
2236 case CANY:
2237 if (!nextchr && locinput >= PL_regeol)
2238 sayNO;
b8c5462f 2239 nextchr = UCHARAT(++locinput);
a0d0e21e 2240 break;
ffc61ed2 2241 case REG_ANY:
1aa99e6b
IH
2242 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2243 sayNO;
2244 if (do_utf8) {
b8c5462f 2245 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2246 if (locinput > PL_regeol)
2247 sayNO;
a0ed51b3 2248 nextchr = UCHARAT(locinput);
a0ed51b3 2249 }
1aa99e6b
IH
2250 else
2251 nextchr = UCHARAT(++locinput);
a0ed51b3 2252 break;
d6a28714 2253 case EXACT:
cd439c50
IZ
2254 s = STRING(scan);
2255 ln = STR_LEN(scan);
1aa99e6b 2256 if (do_utf8 != (UTF!=0)) {
bc517b45 2257 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2258 char *l = locinput;
2259 char *e = s + ln;
bc517b45 2260 STRLEN ulen;
a72c7584 2261
5ff6fc6d
JH
2262 if (do_utf8) {
2263 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2264 while (s < e) {
2265 if (l >= PL_regeol)
5ff6fc6d
JH
2266 sayNO;
2267 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2268 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2269 sayNO;
bc517b45 2270 l += ulen;
5ff6fc6d 2271 s ++;
1aa99e6b 2272 }
5ff6fc6d
JH
2273 }
2274 else {
2275 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2276 while (s < e) {
2277 if (l >= PL_regeol)
2278 sayNO;
5ff6fc6d 2279 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2280 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2281 sayNO;
bc517b45 2282 s += ulen;
a72c7584 2283 l ++;
1aa99e6b 2284 }
5ff6fc6d 2285 }
1aa99e6b
IH
2286 locinput = l;
2287 nextchr = UCHARAT(locinput);
2288 break;
2289 }
bc517b45 2290 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2291 /* Inline the first character, for speed. */
2292 if (UCHARAT(s) != nextchr)
2293 sayNO;
2294 if (PL_regeol - locinput < ln)
2295 sayNO;
2296 if (ln > 1 && memNE(s, locinput, ln))
2297 sayNO;
2298 locinput += ln;
2299 nextchr = UCHARAT(locinput);
2300 break;
2301 case EXACTFL:
b8c5462f
JH
2302 PL_reg_flags |= RF_tainted;
2303 /* FALL THROUGH */
d6a28714 2304 case EXACTF:
cd439c50
IZ
2305 s = STRING(scan);
2306 ln = STR_LEN(scan);
d6a28714 2307
bc517b45 2308 {
d6a28714 2309 char *l = locinput;
bc517b45
JH
2310 char *e = s + ln;
2311 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
2312
2313 if (do_utf8 != (UTF!=0)) {
2314 /* The target and the pattern have differing utf8ness. */
2315 STRLEN ulen1, ulen2;
2316 UV cs, cl;
2317
2318 if (do_utf8) {
2319 /* The target is utf8, the pattern is not utf8. */
2320 while (s < e) {
2321 if (l >= PL_regeol)
2322 sayNO;
2323
2324 cs = to_uni_fold(NATIVE_TO_UNI(*(U8*)s),
2325 (U8*)s, &ulen1);
2326 cl = utf8_to_uvchr((U8*)l, &ulen2);
2327
2328 if (cs != cl) {
2329 cl = to_uni_fold(cl, (U8*)l, &ulen2);
2330 if (ulen1 != ulen2 || cs != cl)
2331 sayNO;
2332 }
2333 l += ulen1;
2334 s ++;
2335 }
2336 }
2337 else {
2338 /* The target is not utf8, the pattern is utf8. */
2339 while (s < e) {
2340 if (l >= PL_regeol)
2341 sayNO;
2342
2343 cs = utf8_to_uvchr((U8*)s, &ulen1);
2344
2345 cl = to_uni_fold(NATIVE_TO_UNI(*(U8*)l),
2346 (U8*)l, &ulen2);
2347
2348 if (cs != cl) {
2349 cs = to_uni_fold(cs, (U8*)s, &ulen1);
2350 if (ulen1 != ulen2 || cs != cl)
2351 sayNO;
2352 }
2353 l ++;
2354 s += ulen1;
2355 }
2356 }
2357 locinput = l;
2358 nextchr = UCHARAT(locinput);
2359 break;
2360 }
2361
2362 if (do_utf8 && UTF) {
2363 /* Both the target and the pattern are utf8. */
2364 STRLEN ulen;
2365
2366 while (s < e) {
2367 if (l >= PL_regeol)
2368 sayNO;
2369 if (UTF8SKIP(s) != UTF8SKIP(l) ||
2370 memNE(s, (char*)l, UTF8SKIP(s))) {
2371 to_utf8_fold((U8*)l, tmpbuf, &ulen);
2372 if (UTF8SKIP(s) != ulen ||
2373 memNE(s, (char*)tmpbuf, ulen))
2374 sayNO;
2375 }
2376 l += UTF8SKIP(l);
2377 s += UTF8SKIP(s);
2378 }
2379 locinput = l;
2380 nextchr = UCHARAT(locinput);
2381 break;
b8c5462f 2382 }
a0ed51b3 2383 }
d6a28714 2384
bc517b45
JH
2385 /* Neither the target and the pattern are utf8. */
2386
d6a28714
JH
2387 /* Inline the first character, for speed. */
2388 if (UCHARAT(s) != nextchr &&
2389 UCHARAT(s) != ((OP(scan) == EXACTF)
2390 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2391 sayNO;
d6a28714 2392 if (PL_regeol - locinput < ln)
b8c5462f 2393 sayNO;
d6a28714
JH
2394 if (ln > 1 && (OP(scan) == EXACTF
2395 ? ibcmp(s, locinput, ln)
2396 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2397 sayNO;
d6a28714
JH
2398 locinput += ln;
2399 nextchr = UCHARAT(locinput);
a0d0e21e 2400 break;
d6a28714 2401 case ANYOF:
ffc61ed2
JH
2402 if (do_utf8) {
2403 if (!reginclass(scan, (U8*)locinput, do_utf8))
2404 sayNO;
2405 if (locinput >= PL_regeol)
2406 sayNO;
2407 locinput += PL_utf8skip[nextchr];
b8c5462f 2408 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2409 }
2410 else {
2411 if (nextchr < 0)
2412 nextchr = UCHARAT(locinput);
2413 if (!reginclass(scan, (U8*)locinput, do_utf8))
2414 sayNO;
2415 if (!nextchr && locinput >= PL_regeol)
2416 sayNO;
2417 nextchr = UCHARAT(++locinput);
2418 }
b8c5462f 2419 break;
d6a28714 2420 case ALNUML:
b8c5462f
JH
2421 PL_reg_flags |= RF_tainted;
2422 /* FALL THROUGH */
d6a28714 2423 case ALNUM:
b8c5462f 2424 if (!nextchr)
4633a7c4 2425 sayNO;
ffc61ed2 2426 if (do_utf8) {
ad24be35 2427 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2428 if (!(OP(scan) == ALNUM
3568d838 2429 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2430 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2431 {
2432 sayNO;
a0ed51b3 2433 }
b8c5462f 2434 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2435 nextchr = UCHARAT(locinput);
2436 break;
2437 }
ffc61ed2 2438 if (!(OP(scan) == ALNUM
d6a28714 2439 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2440 sayNO;
b8c5462f 2441 nextchr = UCHARAT(++locinput);
a0d0e21e 2442 break;
d6a28714 2443 case NALNUML:
b8c5462f
JH
2444 PL_reg_flags |= RF_tainted;
2445 /* FALL THROUGH */
d6a28714
JH
2446 case NALNUM:
2447 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2448 sayNO;
ffc61ed2 2449 if (do_utf8) {
8269fa76 2450 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2451 if (OP(scan) == NALNUM
3568d838 2452 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2453 : isALNUM_LC_utf8((U8*)locinput))
2454 {
b8c5462f 2455 sayNO;
d6a28714 2456 }
b8c5462f
JH
2457 locinput += PL_utf8skip[nextchr];
2458 nextchr = UCHARAT(locinput);
2459 break;
2460 }
ffc61ed2 2461 if (OP(scan) == NALNUM
d6a28714 2462 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2463 sayNO;
76e3520e 2464 nextchr = UCHARAT(++locinput);
a0d0e21e 2465 break;
d6a28714
JH
2466 case BOUNDL:
2467 case NBOUNDL:
3280af22 2468 PL_reg_flags |= RF_tainted;
bbce6d69 2469 /* FALL THROUGH */
d6a28714
JH
2470 case BOUND:
2471 case NBOUND:
2472 /* was last char in word? */
ffc61ed2 2473 if (do_utf8) {
12d33761
HS
2474 if (locinput == PL_bostr)
2475 ln = '\n';
ffc61ed2
JH
2476 else {
2477 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2478
2b9d42f0 2479 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2480 }
2481 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2482 ln = isALNUM_uni(ln);
8269fa76 2483 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2484 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2485 }
2486 else {
9041c2e3 2487 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2488 n = isALNUM_LC_utf8((U8*)locinput);
2489 }
a0ed51b3 2490 }
d6a28714 2491 else {
12d33761
HS
2492 ln = (locinput != PL_bostr) ?
2493 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2494 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2495 ln = isALNUM(ln);
2496 n = isALNUM(nextchr);
2497 }
2498 else {
2499 ln = isALNUM_LC(ln);
2500 n = isALNUM_LC(nextchr);
2501 }
d6a28714 2502 }
ffc61ed2
JH
2503 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2504 OP(scan) == BOUNDL))
2505 sayNO;
a0ed51b3 2506 break;
d6a28714 2507 case SPACEL:
3280af22 2508 PL_reg_flags |= RF_tainted;
bbce6d69 2509 /* FALL THROUGH */
d6a28714 2510 case SPACE:
9442cb0e 2511 if (!nextchr)
4633a7c4 2512 sayNO;
1aa99e6b 2513 if (do_utf8) {
fd400ab9 2514 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2515 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2516 if (!(OP(scan) == SPACE
3568d838 2517 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2518 : isSPACE_LC_utf8((U8*)locinput)))
2519 {
2520 sayNO;
2521 }
2522 locinput += PL_utf8skip[nextchr];
2523 nextchr = UCHARAT(locinput);
2524 break;
d6a28714 2525 }
ffc61ed2
JH
2526 if (!(OP(scan) == SPACE
2527 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2528 sayNO;
2529 nextchr = UCHARAT(++locinput);
2530 }
2531 else {
2532 if (!(OP(scan) == SPACE
2533 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2534 sayNO;
2535 nextchr = UCHARAT(++locinput);
a0ed51b3 2536 }
a0ed51b3 2537 break;
d6a28714 2538 case NSPACEL:
3280af22 2539 PL_reg_flags |= RF_tainted;
bbce6d69 2540 /* FALL THROUGH */
d6a28714 2541 case NSPACE:
9442cb0e 2542 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2543 sayNO;
1aa99e6b 2544 if (do_utf8) {
8269fa76 2545 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2546 if (OP(scan) == NSPACE
3568d838 2547 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2548 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2549 {
2550 sayNO;
2551 }
2552 locinput += PL_utf8skip[nextchr];
2553 nextchr = UCHARAT(locinput);
2554 break;
a0ed51b3 2555 }
ffc61ed2 2556 if (OP(scan) == NSPACE
d6a28714 2557 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2558 sayNO;
76e3520e 2559 nextchr = UCHARAT(++locinput);
a0d0e21e 2560 break;
d6a28714 2561 case DIGITL:
a0ed51b3
LW
2562 PL_reg_flags |= RF_tainted;
2563 /* FALL THROUGH */
d6a28714 2564 case DIGIT:
9442cb0e 2565 if (!nextchr)
a0ed51b3 2566 sayNO;
1aa99e6b 2567 if (do_utf8) {
8269fa76 2568 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2569 if (!(OP(scan) == DIGIT
3568d838 2570 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2571 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2572 {
a0ed51b3 2573 sayNO;
dfe13c55 2574 }
6f06b55f 2575 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2576 nextchr = UCHARAT(locinput);
2577 break;
2578 }
ffc61ed2 2579 if (!(OP(scan) == DIGIT
9442cb0e 2580 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2581 sayNO;
2582 nextchr = UCHARAT(++locinput);
2583 break;
d6a28714 2584 case NDIGITL:
b8c5462f
JH
2585 PL_reg_flags |= RF_tainted;
2586 /* FALL THROUGH */
d6a28714 2587 case NDIGIT:
9442cb0e 2588 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2589 sayNO;
1aa99e6b 2590 if (do_utf8) {
8269fa76 2591 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2592 if (OP(scan) == NDIGIT
3568d838 2593 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2594 : isDIGIT_LC_utf8((U8*)locinput))
2595 {
a0ed51b3 2596 sayNO;
9442cb0e 2597 }
6f06b55f 2598 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2599 nextchr = UCHARAT(locinput);
2600 break;
2601 }
ffc61ed2 2602 if (OP(scan) == NDIGIT
9442cb0e 2603 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2604 sayNO;
2605 nextchr = UCHARAT(++locinput);
2606 break;
2607 case CLUMP:
b7c83a7e 2608 if (locinput >= PL_regeol)
a0ed51b3 2609 sayNO;
b7c83a7e
JH
2610 if (do_utf8) {
2611 LOAD_UTF8_CHARCLASS(mark,"~");
2612 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2613 sayNO;
2614 locinput += PL_utf8skip[nextchr];
2615 while (locinput < PL_regeol &&
2616 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2617 locinput += UTF8SKIP(locinput);
2618 if (locinput > PL_regeol)
2619 sayNO;
eb08e2da
JH
2620 }
2621 else
2622 locinput++;
a0ed51b3
LW
2623 nextchr = UCHARAT(locinput);
2624 break;
c8756f30 2625 case REFFL:
3280af22 2626 PL_reg_flags |= RF_tainted;
c8756f30 2627 /* FALL THROUGH */
c277df42 2628 case REF:
c8756f30 2629 case REFF:
c277df42 2630 n = ARG(scan); /* which paren pair */
cf93c79d 2631 ln = PL_regstartp[n];
2c2d71f5 2632 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2633 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2634 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2635 if (ln == PL_regendp[n])
a0d0e21e 2636 break;
a0ed51b3 2637
cf93c79d 2638 s = PL_bostr + ln;
1aa99e6b 2639 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2640 char *l = locinput;
cf93c79d 2641 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2642 /*
2643 * Note that we can't do the "other character" lookup trick as
2644 * in the 8-bit case (no pun intended) because in Unicode we
2645 * have to map both upper and title case to lower case.
2646 */
2647 if (OP(scan) == REFF) {
a2a2844f 2648 STRLEN ulen1, ulen2;
e7ae6809
JH
2649 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2650 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2651 while (s < e) {
2652 if (l >= PL_regeol)
2653 sayNO;
a2a2844f
JH
2654 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2655 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2656 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2657 sayNO;
a2a2844f
JH
2658 s += ulen1;
2659 l += ulen2;
a0ed51b3
LW
2660 }
2661 }
2662 locinput = l;
2663 nextchr = UCHARAT(locinput);
2664 break;
2665 }
2666
a0d0e21e 2667 /* Inline the first character, for speed. */
76e3520e 2668 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2669 (OP(scan) == REF ||
2670 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2671 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2672 sayNO;
cf93c79d 2673 ln = PL_regendp[n] - ln;
3280af22 2674 if (locinput + ln > PL_regeol)
4633a7c4 2675 sayNO;
c8756f30
AK
2676 if (ln > 1 && (OP(scan) == REF
2677 ? memNE(s, locinput, ln)
2678 : (OP(scan) == REFF
2679 ? ibcmp(s, locinput, ln)
2680 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2681 sayNO;
a0d0e21e 2682 locinput += ln;
76e3520e 2683 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2684 break;
2685
2686 case NOTHING:
c277df42 2687 case TAIL:
a0d0e21e
LW
2688 break;
2689 case BACK:
2690 break;
c277df42
IZ
2691 case EVAL:
2692 {
2693 dSP;
533c011a 2694 OP_4tree *oop = PL_op;
3280af22
NIS
2695 COP *ocurcop = PL_curcop;
2696 SV **ocurpad = PL_curpad;
c277df42 2697 SV *ret;
9041c2e3 2698
c277df42 2699 n = ARG(scan);
533c011a 2700 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2701 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2702 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2703 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2704
8e5e9ebe
RGS
2705 {
2706 SV **before = SP;
2707 CALLRUNOPS(aTHX); /* Scalar context. */
2708 SPAGAIN;
2709 if (SP == before)
2710 ret = Nullsv; /* protect against empty (?{}) blocks. */
2711 else {
2712 ret = POPs;
2713 PUTBACK;
2714 }
2715 }
2716
0f5d15d6
IZ
2717 PL_op = oop;
2718 PL_curpad = ocurpad;
2719 PL_curcop = ocurcop;
c277df42 2720 if (logical) {
0f5d15d6
IZ
2721 if (logical == 2) { /* Postponed subexpression. */
2722 regexp *re;
22c35a8c 2723 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2724 re_cc_state state;
0f5d15d6
IZ
2725 CHECKPOINT cp, lastcp;
2726
2727 if(SvROK(ret) || SvRMAGICAL(ret)) {
2728 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2729
2730 if(SvMAGICAL(sv))
14befaf4 2731 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2732 }
2733 if (mg) {
2734 re = (regexp *)mg->mg_obj;
df0003d4 2735 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2736 }
2737 else {
2738 STRLEN len;
2739 char *t = SvPV(ret, len);
2740 PMOP pm;
2741 char *oprecomp = PL_regprecomp;
2742 I32 osize = PL_regsize;
2743 I32 onpar = PL_regnpar;
2744
5fcd1c1b 2745 Zero(&pm, 1, PMOP);
cea2e8a9 2746 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2747 if (!(SvFLAGS(ret)
0f5d15d6 2748 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2749 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2750 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2751 PL_regprecomp = oprecomp;
2752 PL_regsize = osize;
2753 PL_regnpar = onpar;
2754 }
2755 DEBUG_r(
9041c2e3 2756 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2757 "Entering embedded `%s%.60s%s%s'\n",
2758 PL_colors[0],
2759 re->precomp,
2760 PL_colors[1],
2761 (strlen(re->precomp) > 60 ? "..." : ""))
2762 );
2763 state.node = next;
2764 state.prev = PL_reg_call_cc;
2765 state.cc = PL_regcc;
2766 state.re = PL_reg_re;
2767
2ab05381 2768 PL_regcc = 0;
9041c2e3 2769
0f5d15d6 2770 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2771 REGCP_SET(lastcp);
0f5d15d6
IZ
2772 cache_re(re);
2773 state.ss = PL_savestack_ix;
2774 *PL_reglastparen = 0;
a01268b5 2775 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2776 PL_reg_call_cc = &state;
2777 PL_reginput = locinput;
2c2d71f5
JH
2778
2779 /* XXXX This is too dramatic a measure... */
2780 PL_reg_maxiter = 0;
2781
0f5d15d6 2782 if (regmatch(re->program + 1)) {
2c914db6
IZ
2783 /* Even though we succeeded, we need to restore
2784 global variables, since we may be wrapped inside
2785 SUSPEND, thus the match may be not finished yet. */
2786
2787 /* XXXX Do this only if SUSPENDed? */
2788 PL_reg_call_cc = state.prev;
2789 PL_regcc = state.cc;
2790 PL_reg_re = state.re;
2791 cache_re(PL_reg_re);
2792
2793 /* XXXX This is too dramatic a measure... */
2794 PL_reg_maxiter = 0;
2795
2796 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2797 ReREFCNT_dec(re);
2798 regcpblow(cp);
2799 sayYES;
2800 }
0f5d15d6 2801 ReREFCNT_dec(re);
02db2b7b 2802 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2803 regcppop();
2804 PL_reg_call_cc = state.prev;
2805 PL_regcc = state.cc;
2806 PL_reg_re = state.re;
d3790889 2807 cache_re(PL_reg_re);
2c2d71f5
JH
2808
2809 /* XXXX This is too dramatic a measure... */
2810 PL_reg_maxiter = 0;
2811
8e514ae6 2812 logical = 0;
0f5d15d6
IZ
2813 sayNO;
2814 }
c277df42 2815 sw = SvTRUE(ret);
0f5d15d6 2816 logical = 0;
a0ed51b3
LW
2817 }
2818 else
3280af22 2819 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2820 break;
2821 }
a0d0e21e 2822 case OPEN:
c277df42 2823 n = ARG(scan); /* which paren pair */
3280af22
NIS
2824 PL_reg_start_tmp[n] = locinput;
2825 if (n > PL_regsize)
2826 PL_regsize = n;
a0d0e21e
LW
2827 break;
2828 case CLOSE:
c277df42 2829 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2830 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2831 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2832 if (n > *PL_reglastparen)
2833 *PL_reglastparen = n;
a01268b5 2834 *PL_reglastcloseparen = n;
a0d0e21e 2835 break;
c277df42
IZ
2836 case GROUPP:
2837 n = ARG(scan); /* which paren pair */
cf93c79d 2838 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2839 break;
2840 case IFTHEN:
2c2d71f5 2841 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2842 if (sw)
2843 next = NEXTOPER(NEXTOPER(scan));
2844 else {
2845 next = scan + ARG(scan);
2846 if (OP(next) == IFTHEN) /* Fake one. */
2847 next = NEXTOPER(NEXTOPER(next));
2848 }
2849 break;
2850 case LOGICAL:
0f5d15d6 2851 logical = scan->flags;
c277df42 2852 break;
2ab05381
IZ
2853/*******************************************************************
2854 PL_regcc contains infoblock about the innermost (...)* loop, and
2855 a pointer to the next outer infoblock.
2856
2857 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2858
2859 1) After matching X, regnode for CURLYX is processed;
2860
9041c2e3 2861 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2862 regmatch() recursively with the starting point at WHILEM node;
2863
2864 3) Each hit of WHILEM node tries to match A and Z (in the order
2865 depending on the current iteration, min/max of {min,max} and
2866 greediness). The information about where are nodes for "A"
2867 and "Z" is read from the infoblock, as is info on how many times "A"
2868 was already matched, and greediness.
2869
2870 4) After A matches, the same WHILEM node is hit again.
2871
2872 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2873 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2874 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2875 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2876 of the external loop.
2877
2878 Currently present infoblocks form a tree with a stem formed by PL_curcc
2879 and whatever it mentions via ->next, and additional attached trees
2880 corresponding to temporarily unset infoblocks as in "5" above.
2881
9041c2e3 2882 In the following picture infoblocks for outer loop of
2ab05381
IZ
2883 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2884 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2885 infoblocks are drawn below the "reset" infoblock.
2886
2887 In fact in the picture below we do not show failed matches for Z and T
2888 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2889 more obvious *why* one needs to *temporary* unset infoblocks.]
2890
2891 Matched REx position InfoBlocks Comment
2892 (Y(A)*?Z)*?T x
2893 Y(A)*?Z)*?T x <- O
2894 Y (A)*?Z)*?T x <- O
2895 Y A)*?Z)*?T x <- O <- I
2896 YA )*?Z)*?T x <- O <- I
2897 YA A)*?Z)*?T x <- O <- I
2898 YAA )*?Z)*?T x <- O <- I
2899 YAA Z)*?T x <- O # Temporary unset I
2900 I
2901
2902 YAAZ Y(A)*?Z)*?T x <- O
2903 I
2904
2905 YAAZY (A)*?Z)*?T x <- O
2906 I
2907
2908 YAAZY A)*?Z)*?T x <- O <- I
2909 I
2910
2911 YAAZYA )*?Z)*?T x <- O <- I
2912 I
2913
2914 YAAZYA Z)*?T x <- O # Temporary unset I
2915 I,I
2916
2917 YAAZYAZ )*?T x <- O
2918 I,I
2919
2920 YAAZYAZ T x # Temporary unset O
2921 O
2922 I,I
2923
2924 YAAZYAZT x
2925 O
2926 I,I
2927 *******************************************************************/
a0d0e21e
LW
2928 case CURLYX: {
2929 CURCUR cc;
3280af22 2930 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2931 /* No need to save/restore up to this paren */
2932 I32 parenfloor = scan->flags;
c277df42
IZ
2933
2934 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2935 next += ARG(next);
3280af22
NIS
2936 cc.oldcc = PL_regcc;
2937 PL_regcc = &cc;
cb434fcc
IZ
2938 /* XXXX Probably it is better to teach regpush to support
2939 parenfloor > PL_regsize... */
2940 if (parenfloor > *PL_reglastparen)
2941 parenfloor = *PL_reglastparen; /* Pessimization... */
2942 cc.parenfloor = parenfloor;
a0d0e21e
LW
2943 cc.cur = -1;
2944 cc.min = ARG1(scan);
2945 cc.max = ARG2(scan);
c277df42 2946 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2947 cc.next = next;
2948 cc.minmod = minmod;
2949 cc.lastloc = 0;
3280af22 2950 PL_reginput = locinput;
a0d0e21e
LW
2951 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2952 regcpblow(cp);
3280af22 2953 PL_regcc = cc.oldcc;
4633a7c4 2954 saySAME(n);
a0d0e21e
LW
2955 }
2956 /* NOT REACHED */
2957 case WHILEM: {
2958 /*
2959 * This is really hard to understand, because after we match
2960 * what we're trying to match, we must make sure the rest of
2c2d71f5 2961 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2962 * to go back UP the parse tree by recursing ever deeper. And
2963 * if it fails, we have to reset our parent's current state
2964 * that we can try again after backing off.
2965 */
2966
c277df42 2967 CHECKPOINT cp, lastcp;
3280af22 2968 CURCUR* cc = PL_regcc;
c277df42
IZ
2969 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2970
4633a7c4 2971 n = cc->cur + 1; /* how many we know we matched */
3280af22 2972 PL_reginput = locinput;
a0d0e21e 2973
c277df42 2974 DEBUG_r(
9041c2e3
NIS
2975 PerlIO_printf(Perl_debug_log,
2976 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2977 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2978 (long)n, (long)cc->min,
c277df42
IZ
2979 (long)cc->max, (long)cc)
2980 );
4633a7c4 2981
a0d0e21e
LW
2982 /* If degenerate scan matches "", assume scan done. */
2983
579cf2c3 2984 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2985 PL_regcc = cc->oldcc;
2ab05381
IZ
2986 if (PL_regcc)
2987 ln = PL_regcc->cur;
c277df42 2988 DEBUG_r(
c3464db5
DD
2989 PerlIO_printf(Perl_debug_log,
2990 "%*s empty match detected, try continuation...\n",
3280af22 2991 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2992 );
a0d0e21e 2993 if (regmatch(cc->next))
4633a7c4 2994 sayYES;
2ab05381
IZ
2995 if (PL_regcc)
2996 PL_regcc->cur = ln;
3280af22 2997 PL_regcc = cc;
4633a7c4 2998 sayNO;
a0d0e21e
LW
2999 }
3000
3001 /* First just match a string of min scans. */
3002
3003 if (n < cc->min) {
3004 cc->cur = n;
3005 cc->lastloc = locinput;
4633a7c4
LW
3006 if (regmatch(cc->scan))
3007 sayYES;
3008 cc->cur = n - 1;
c277df42 3009 cc->lastloc = lastloc;
4633a7c4 3010 sayNO;
a0d0e21e
LW
3011 }
3012
2c2d71f5
JH
3013 if (scan->flags) {
3014 /* Check whether we already were at this position.
3015 Postpone detection until we know the match is not
3016 *that* much linear. */
3017 if (!PL_reg_maxiter) {
3018 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3019 PL_reg_leftiter = PL_reg_maxiter;
3020 }
3021 if (PL_reg_leftiter-- == 0) {
3022 I32 size = (PL_reg_maxiter + 7)/8;
3023 if (PL_reg_poscache) {
3024 if (PL_reg_poscache_size < size) {
3025 Renew(PL_reg_poscache, size, char);
3026 PL_reg_poscache_size = size;
3027 }
3028 Zero(PL_reg_poscache, size, char);
3029 }
3030 else {
3031 PL_reg_poscache_size = size;
3032 Newz(29, PL_reg_poscache, size, char);
3033 }
3034 DEBUG_r(
3035 PerlIO_printf(Perl_debug_log,
3036 "%sDetected a super-linear match, switching on caching%s...\n",
3037 PL_colors[4], PL_colors[5])
3038 );
3039 }
3040 if (PL_reg_leftiter < 0) {
3041 I32 o = locinput - PL_bostr, b;
3042
3043 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3044 b = o % 8;
3045 o /= 8;
3046 if (PL_reg_poscache[o] & (1<<b)) {
3047 DEBUG_r(
3048 PerlIO_printf(Perl_debug_log,
3049 "%*s already tried at this position...\n",
3050 REPORT_CODE_OFF+PL_regindent*2, "")
3051 );
7821416a 3052 sayNO_SILENT;
2c2d71f5
JH
3053 }
3054 PL_reg_poscache[o] |= (1<<b);
3055 }
3056 }
3057
a0d0e21e
LW
3058 /* Prefer next over scan for minimal matching. */
3059
3060 if (cc->minmod) {
3280af22 3061 PL_regcc = cc->oldcc;
2ab05381
IZ
3062 if (PL_regcc)
3063 ln = PL_regcc->cur;
5f05dabc 3064 cp = regcppush(cc->parenfloor);
02db2b7b 3065 REGCP_SET(lastcp);
5f05dabc 3066 if (regmatch(cc->next)) {
c277df42 3067 regcpblow(cp);
4633a7c4 3068 sayYES; /* All done. */
5f05dabc 3069 }
02db2b7b 3070 REGCP_UNWIND(lastcp);
5f05dabc 3071 regcppop();
2ab05381
IZ
3072 if (PL_regcc)
3073 PL_regcc->cur = ln;
3280af22 3074 PL_regcc = cc;
a0d0e21e 3075
c277df42 3076 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3077 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3078 && !(PL_reg_flags & RF_warned)) {
3079 PL_reg_flags |= RF_warned;
e476b1b5 3080 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3081 "Complex regular subexpression recursion",
3082 REG_INFTY - 1);
c277df42 3083 }
4633a7c4 3084 sayNO;
c277df42 3085 }
a687059c 3086
c277df42 3087 DEBUG_r(
c3464db5
DD
3088 PerlIO_printf(Perl_debug_log,
3089 "%*s trying longer...\n",
3280af22 3090 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3091 );
a0d0e21e 3092 /* Try scanning more and see if it helps. */
3280af22 3093 PL_reginput = locinput;
a0d0e21e
LW
3094 cc->cur = n;
3095 cc->lastloc = locinput;
5f05dabc 3096 cp = regcppush(cc->parenfloor);
02db2b7b 3097 REGCP_SET(lastcp);
5f05dabc 3098 if (regmatch(cc->scan)) {
c277df42 3099 regcpblow(cp);
4633a7c4 3100 sayYES;
5f05dabc 3101 }
02db2b7b 3102 REGCP_UNWIND(lastcp);
5f05dabc 3103 regcppop();
4633a7c4 3104 cc->cur = n - 1;
c277df42 3105 cc->lastloc = lastloc;
4633a7c4 3106 sayNO;
a0d0e21e
LW
3107 }
3108
3109 /* Prefer scan over next for maximal matching. */
3110
3111 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3112 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3113 cc->cur = n;
3114 cc->lastloc = locinput;
02db2b7b 3115 REGCP_SET(lastcp);
5f05dabc 3116 if (regmatch(cc->scan)) {
c277df42 3117 regcpblow(cp);
4633a7c4 3118 sayYES;
5f05dabc 3119 }
02db2b7b 3120 REGCP_UNWIND(lastcp);
a0d0e21e 3121 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3122 PL_reginput = locinput;
c277df42 3123 DEBUG_r(
c3464db5
DD
3124 PerlIO_printf(Perl_debug_log,
3125 "%*s failed, try continuation...\n",
3280af22 3126 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3127 );
3128 }
9041c2e3 3129 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3130 && !(PL_reg_flags & RF_warned)) {
3280af22 3131 PL_reg_flags |= RF_warned;
e476b1b5 3132 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3133 "Complex regular subexpression recursion",
3134 REG_INFTY - 1);
a0d0e21e
LW
3135 }
3136
3137 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3138 PL_regcc = cc->oldcc;
2ab05381
IZ
3139 if (PL_regcc)
3140 ln = PL_regcc->cur;
a0d0e21e 3141 if (regmatch(cc->next))
4633a7c4 3142 sayYES;
2ab05381
IZ
3143 if (PL_regcc)
3144 PL_regcc->cur = ln;
3280af22 3145 PL_regcc = cc;
4633a7c4 3146 cc->cur = n - 1;
c277df42 3147 cc->lastloc = lastloc;
4633a7c4 3148 sayNO;
a0d0e21e
LW
3149 }
3150 /* NOT REACHED */
9041c2e3 3151 case BRANCHJ:
c277df42
IZ
3152 next = scan + ARG(scan);
3153 if (next == scan)
3154 next = NULL;
3155 inner = NEXTOPER(NEXTOPER(scan));
3156 goto do_branch;
9041c2e3 3157 case BRANCH:
c277df42
IZ
3158 inner = NEXTOPER(scan);
3159 do_branch:
3160 {
c277df42
IZ
3161 c1 = OP(scan);
3162 if (OP(next) != c1) /* No choice. */
3163 next = inner; /* Avoid recursion. */
a0d0e21e 3164 else {
02db2b7b
IZ
3165 I32 lastparen = *PL_reglastparen;
3166 I32 unwind1;
3167 re_unwind_branch_t *uw;
3168
3169 /* Put unwinding data on stack */
3170 unwind1 = SSNEWt(1,re_unwind_branch_t);
3171 uw = SSPTRt(unwind1,re_unwind_branch_t);
3172 uw->prev = unwind;
3173 unwind = unwind1;
3174 uw->type = ((c1 == BRANCH)
3175 ? RE_UNWIND_BRANCH
3176 : RE_UNWIND_BRANCHJ);
3177 uw->lastparen = lastparen;
3178 uw->next = next;
3179 uw->locinput = locinput;
3180 uw->nextchr = nextchr;
3181#ifdef DEBUGGING
3182 uw->regindent = ++PL_regindent;
3183#endif
c277df42 3184
02db2b7b
IZ
3185 REGCP_SET(uw->lastcp);
3186
3187 /* Now go into the first branch */
3188 next = inner;
a687059c 3189 }
a0d0e21e
LW
3190 }
3191 break;
3192 case MINMOD:
3193 minmod = 1;
3194 break;
c277df42
IZ
3195 case CURLYM:
3196 {
00db4c45 3197 I32 l = 0;
c277df42 3198 CHECKPOINT lastcp;
9041c2e3 3199
c277df42
IZ
3200 /* We suppose that the next guy does not need
3201 backtracking: in particular, it is of constant length,
3202 and has no parenths to influence future backrefs. */
3203 ln = ARG1(scan); /* min to match */
3204 n = ARG2(scan); /* max to match */
c277df42
IZ
3205 paren = scan->flags;
3206 if (paren) {
3280af22
NIS
3207 if (paren > PL_regsize)
3208 PL_regsize = paren;
3209 if (paren > *PL_reglastparen)
3210 *PL_reglastparen = paren;
c277df42 3211 }
dc45a647 3212 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3213 if (paren)
3214 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3215 PL_reginput = locinput;
c277df42
IZ
3216 if (minmod) {
3217 minmod = 0;
3218 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3219 sayNO;
f31a99c8
HS
3220 /* if we matched something zero-length we don't need to
3221 backtrack - capturing parens are already defined, so
3222 the caveat in the maximal case doesn't apply
3223
3224 XXXX if ln == 0, we can redo this check first time
3225 through the following loop
3226 */
3227 if (ln && l == 0)
3228 n = ln; /* don't backtrack */
3280af22 3229 locinput = PL_reginput;
cca55fe3 3230 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3231 regnode *text_node = next;
3232
cca55fe3 3233 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3234
cca55fe3 3235 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3236 else {
cca55fe3
JP
3237 if (PL_regkind[(U8)OP(text_node)] == REF) {
3238 I32 n, ln;
3239 n = ARG(text_node); /* which paren pair */
3240 ln = PL_regstartp[n];
3241 /* assume yes if we haven't seen CLOSEn */
3242 if (
3243 *PL_reglastparen < n ||
3244 ln == -1 ||
3245 ln == PL_regendp[n]
3246 ) {
3247 c1 = c2 = -1000;
3248 goto assume_ok_MM;
3249 }
3250 c1 = *(PL_bostr + ln);
3251 }
3252 else { c1 = (U8)*STRING(text_node); }
af5decee 3253 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3254 c2 = PL_fold[c1];
af5decee 3255 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3256 c2 = PL_fold_locale[c1];
3257 else
3258 c2 = c1;
3259 }
a0ed51b3
LW
3260 }
3261 else
c277df42 3262 c1 = c2 = -1000;
cca55fe3 3263 assume_ok_MM:
02db2b7b 3264 REGCP_SET(lastcp);
5f4b28b2 3265 /* This may be improved if l == 0. */
c277df42
IZ
3266 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3267 /* If it could work, try it. */
3268 if (c1 == -1000 ||
3280af22
NIS
3269 UCHARAT(PL_reginput) == c1 ||
3270 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3271 {
3272 if (paren) {
f31a99c8 3273 if (ln) {
cf93c79d
IZ
3274 PL_regstartp[paren] =
3275 HOPc(PL_reginput, -l) - PL_bostr;
3276 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3277 }
3278 else
cf93c79d 3279 PL_regendp[paren] = -1;
c277df42
IZ
3280 }
3281 if (regmatch(next))
3282 sayYES;
02db2b7b 3283 REGCP_UNWIND(lastcp);
c277df42
IZ
3284 }
3285 /* Couldn't or didn't -- move forward. */
3280af22 3286 PL_reginput = locinput;
c277df42
IZ
3287 if (regrepeat_hard(scan, 1, &l)) {
3288 ln++;
3280af22 3289 locinput = PL_reginput;
c277df42
IZ
3290 }
3291 else
3292 sayNO;
3293 }
a0ed51b3
LW
3294 }
3295 else {
c277df42 3296 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3297 /* if we matched something zero-length we don't need to
3298 backtrack, unless the minimum count is zero and we
3299 are capturing the result - in that case the capture
3300 being defined or not may affect later execution
3301 */
3302 if (n != 0 && l == 0 && !(paren && ln == 0))
3303 ln = n; /* don't backtrack */
3280af22 3304 locinput = PL_reginput;
c277df42 3305 DEBUG_r(
5c0ca799 3306 PerlIO_printf(Perl_debug_log,
faccc32b 3307 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3308 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3309 (IV) n, (IV)l)
c277df42
IZ
3310 );
3311 if (n >= ln) {
cca55fe3 3312 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3313 regnode *text_node = next;
3314
cca55fe3 3315 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3316
cca55fe3 3317 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3318 else {
cca55fe3
JP
3319 if (PL_regkind[(U8)OP(text_node)] == REF) {
3320 I32 n, ln;
3321 n = ARG(text_node); /* which paren pair */
3322 ln = PL_regstartp[n];
3323 /* assume yes if we haven't seen CLOSEn */
3324 if (
3325 *PL_reglastparen < n ||
3326 ln == -1 ||
3327 ln == PL_regendp[n]
3328 ) {
3329 c1 = c2 = -1000;
3330 goto assume_ok_REG;
3331 }
3332 c1 = *(PL_bostr + ln);
3333 }
3334 else { c1 = (U8)*STRING(text_node); }
3335
af5decee 3336 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3337 c2 = PL_fold[c1];
af5decee 3338 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3339 c2 = PL_fold_locale[c1];
3340 else
3341 c2 = c1;
3342 }
a0ed51b3
LW
3343 }
3344 else
c277df42
IZ
3345 c1 = c2 = -1000;
3346 }
cca55fe3 3347 assume_ok_REG:
02db2b7b 3348 REGCP_SET(lastcp);
c277df42
IZ
3349 while (n >= ln) {
3350 /* If it could work, try it. */
3351 if (c1 == -1000 ||
3280af22
NIS
3352 UCHARAT(PL_reginput) == c1 ||
3353 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3354 {
3355 DEBUG_r(
c3464db5 3356 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3357 "%*s trying tail with n=%"IVdf"...\n",
3358 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3359 );
3360 if (paren) {
3361 if (n) {
cf93c79d
IZ
3362 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3363 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3364 }
a0ed51b3 3365 else
cf93c79d 3366 PL_regendp[paren] = -1;
c277df42 3367 }
a0ed51b3
LW
3368 if (regmatch(next))
3369 sayYES;
02db2b7b 3370 REGCP_UNWIND(lastcp);
a0ed51b3 3371 }
c277df42
IZ
3372 /* Couldn't or didn't -- back up. */
3373 n--;
dfe13c55 3374 locinput = HOPc(locinput, -l);
3280af22 3375 PL_reginput = locinput;
c277df42
IZ
3376 }
3377 }
3378 sayNO;
3379 break;
3380 }
3381 case CURLYN:
3382 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3383 if (paren > PL_regsize)
3384 PL_regsize = paren;
3385 if (paren > *PL_reglastparen)
3386 *PL_reglastparen = paren;
c277df42
IZ
3387 ln = ARG1(scan); /* min to match */
3388 n = ARG2(scan); /* max to match */
dc45a647 3389 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3390 goto repeat;
a0d0e21e 3391 case CURLY:
c277df42 3392 paren = 0;
a0d0e21e
LW
3393 ln = ARG1(scan); /* min to match */
3394 n = ARG2(scan); /* max to match */
dc45a647 3395 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3396 goto repeat;
3397 case STAR:
3398 ln = 0;
c277df42 3399 n = REG_INFTY;
a0d0e21e 3400 scan = NEXTOPER(scan);
c277df42 3401 paren = 0;
a0d0e21e
LW
3402 goto repeat;
3403 case PLUS:
c277df42
IZ
3404 ln = 1;
3405 n = REG_INFTY;
3406 scan = NEXTOPER(scan);
3407 paren = 0;
3408 repeat:
a0d0e21e
LW
3409 /*
3410 * Lookahead to avoid useless match attempts
3411 * when we know what character comes next.
3412 */
5f80c4cf
JP
3413
3414 /*
3415 * Used to only do .*x and .*?x, but now it allows
3416 * for )'s, ('s and (?{ ... })'s to be in the way
3417 * of the quantifier and the EXACT-like node. -- japhy
3418 */
3419
cca55fe3 3420 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3421 U8 *s;
3422 regnode *text_node = next;
3423
cca55fe3 3424 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3425
cca55fe3 3426 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3427 else {
cca55fe3
JP
3428 if (PL_regkind[(U8)OP(text_node)] == REF) {
3429 I32 n, ln;
3430 n = ARG(text_node); /* which paren pair */
3431 ln = PL_regstartp[n];
3432 /* assume yes if we haven't seen CLOSEn */
3433 if (
3434 *PL_reglastparen < n ||
3435 ln == -1 ||
3436 ln == PL_regendp[n]
3437 ) {
3438 c1 = c2 = -1000;
3439 goto assume_ok_easy;
3440 }
9246c65e 3441 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3442 }
3443 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3444
3445 if (!UTF) {
3446 c2 = c1 = *s;
f65d3ee7 3447 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3448 c2 = PL_fold[c1];
f65d3ee7 3449 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3450 c2 = PL_fold_locale[c1];
1aa99e6b 3451 }
5f80c4cf 3452 else { /* UTF */
f65d3ee7 3453 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3454 STRLEN ulen1, ulen2;
e7ae6809
JH
3455 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3456 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3457
3458 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3459 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3460
3461 c1 = utf8_to_uvuni(tmpbuf1, 0);
3462 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3463 }
3464 else {
3465 c2 = c1 = utf8_to_uvchr(s, NULL);
3466 }
1aa99e6b
IH
3467 }
3468 }
bbce6d69 3469 }
a0d0e21e 3470 else
bbce6d69 3471 c1 = c2 = -1000;
cca55fe3 3472 assume_ok_easy:
3280af22 3473 PL_reginput = locinput;
a0d0e21e 3474 if (minmod) {
c277df42 3475 CHECKPOINT lastcp;
a0d0e21e
LW
3476 minmod = 0;
3477 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3478 sayNO;
a0ed51b3 3479 locinput = PL_reginput;
02db2b7b 3480 REGCP_SET(lastcp);
0fe9bf95 3481 if (c1 != -1000) {
1aa99e6b 3482 char *e; /* Should not check after this */
0fe9bf95
IZ
3483 char *old = locinput;
3484
1aa99e6b 3485 if (n == REG_INFTY) {
0fe9bf95 3486 e = PL_regeol - 1;
1aa99e6b
IH
3487 if (do_utf8)
3488 while (UTF8_IS_CONTINUATION(*(U8*)e))
3489 e--;
3490 }
3491 else if (do_utf8) {
3492 int m = n - ln;
3493 for (e = locinput;
3494 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3495 e += UTF8SKIP(e);
3496 }
3497 else {
3498 e = locinput + n - ln;
3499 if (e >= PL_regeol)
3500 e = PL_regeol - 1;
3501 }
0fe9bf95 3502 while (1) {
1aa99e6b 3503 int count;
0fe9bf95 3504 /* Find place 'next' could work */
1aa99e6b
IH
3505 if (!do_utf8) {
3506 if (c1 == c2) {
a8e8ab15
JH
3507 while (locinput <= e &&
3508 UCHARAT(locinput) != c1)
1aa99e6b
IH
3509 locinput++;
3510 } else {
9041c2e3 3511 while (locinput <= e
a8e8ab15
JH
3512 && UCHARAT(locinput) != c1
3513 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3514 locinput++;
3515 }
3516 count = locinput - old;
3517 }
3518 else {
3519 STRLEN len;
3520 if (c1 == c2) {
3521 for (count = 0;
3522 locinput <= e &&
9041c2e3 3523 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3524 count++)
3525 locinput += len;
3526
3527 } else {
3528 for (count = 0; locinput <= e; count++) {
9041c2e3 3529 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3530 if (c == c1 || c == c2)
3531 break;
9041c2e3 3532 locinput += len;
1aa99e6b
IH
3533 }
3534 }
0fe9bf95 3535 }
9041c2e3 3536 if (locinput > e)
0fe9bf95
IZ
3537 sayNO;
3538 /* PL_reginput == old now */
3539 if (locinput != old) {
3540 ln = 1; /* Did some */
1aa99e6b 3541 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3542 sayNO;
3543 }
3544 /* PL_reginput == locinput now */
29d1e993 3545 TRYPAREN(paren, ln, locinput);
0fe9bf95 3546 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3547 REGCP_UNWIND(lastcp);
0fe9bf95 3548 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3549 old = locinput;
3550 if (do_utf8)
3551 locinput += UTF8SKIP(locinput);
3552 else
3553 locinput++;
0fe9bf95
IZ
3554 }
3555 }
3556 else
c277df42 3557 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3558 UV c;
3559 if (c1 != -1000) {
3560 if (do_utf8)
9041c2e3 3561 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3562 else
9041c2e3 3563 c = UCHARAT(PL_reginput);
2390ecbc
PP
3564 /* If it could work, try it. */
3565 if (c == c1 || c == c2)
3566 {
3567 TRYPAREN(paren, n, PL_reginput);
3568 REGCP_UNWIND(lastcp);
3569 }
1aa99e6b 3570 }
a0d0e21e 3571 /* If it could work, try it. */
2390ecbc 3572 else if (c1 == -1000)
bbce6d69 3573 {
29d1e993 3574 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3575 REGCP_UNWIND(lastcp);
bbce6d69 3576 }
c277df42 3577 /* Couldn't or didn't -- move forward. */
a0ed51b3 3578 PL_reginput = locinput;
a0d0e21e
LW
3579 if (regrepeat(scan, 1)) {
3580 ln++;
a0ed51b3
LW
3581 locinput = PL_reginput;
3582 }
3583 else
4633a7c4 3584 sayNO;
a0d0e21e
LW
3585 }
3586 }
3587 else {
c277df42 3588 CHECKPOINT lastcp;
a0d0e21e 3589 n = regrepeat(scan, n);
a0ed51b3 3590 locinput = PL_reginput;
22c35a8c 3591 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3592 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3593 ln = n; /* why back off? */
1aeab75a
GS
3594 /* ...because $ and \Z can match before *and* after
3595 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3596 We should back off by one in this case. */
3597 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3598 ln--;
3599 }
02db2b7b 3600 REGCP_SET(lastcp);
c277df42 3601 if (paren) {
8fa7f367 3602 UV c = 0;
c277df42 3603 while (n >= ln) {
1aa99e6b
IH
3604 if (c1 != -1000) {
3605 if (do_utf8)
9041c2e3 3606 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3607 else
9041c2e3 3608 c = UCHARAT(PL_reginput);
1aa99e6b 3609 }
c277df42 3610 /* If it could work, try it. */
1aa99e6b 3611 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3612 {
29d1e993 3613 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3614 REGCP_UNWIND(lastcp);
c277df42
IZ
3615 }
3616 /* Couldn't or didn't -- back up. */
3617 n--;
dfe13c55 3618 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3619 }
a0ed51b3
LW
3620 }
3621 else {
8fa7f367 3622 UV c = 0;
c277df42 3623 while (n >= ln) {
1aa99e6b
IH
3624 if (c1 != -1000) {
3625 if (do_utf8)
9041c2e3 3626 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3627 else
9041c2e3 3628 c = UCHARAT(PL_reginput);
1aa99e6b 3629 }
c277df42 3630 /* If it could work, try it. */
1aa99e6b 3631 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3632 {
29d1e993 3633 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3634 REGCP_UNWIND(lastcp);
c277df42
IZ
3635 }
3636 /* Couldn't or didn't -- back up. */
3637 n--;
dfe13c55 3638 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3639 }
a0d0e21e
LW
3640 }
3641 }
4633a7c4 3642 sayNO;
c277df42 3643 break;
a0d0e21e 3644 case END:
0f5d15d6
IZ
3645 if (PL_reg_call_cc) {
3646 re_cc_state *cur_call_cc = PL_reg_call_cc;
3647 CURCUR *cctmp = PL_regcc;
3648 regexp *re = PL_reg_re;
3649 CHECKPOINT cp, lastcp;
3650
3651 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3652 REGCP_SET(lastcp);
0f5d15d6
IZ
3653 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3654 the caller. */
3655 PL_reginput = locinput; /* Make position available to
3656 the callcc. */
3657 cache_re(PL_reg_call_cc->re);
3658 PL_regcc = PL_reg_call_cc->cc;
3659 PL_reg_call_cc = PL_reg_call_cc->prev;
3660 if (regmatch(cur_call_cc->node)) {
3661 PL_reg_call_cc = cur_call_cc;
3662 regcpblow(cp);
3663 sayYES;
3664 }
02db2b7b 3665 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3666 regcppop();
3667 PL_reg_call_cc = cur_call_cc;
3668 PL_regcc = cctmp;
3669 PL_reg_re = re;
3670 cache_re(re);
3671
3672 DEBUG_r(
3673 PerlIO_printf(Perl_debug_log,
3674 "%*s continuation failed...\n",
3675 REPORT_CODE_OFF+PL_regindent*2, "")
3676 );
7821416a 3677 sayNO_SILENT;
0f5d15d6 3678 }
7821416a
IZ
3679 if (locinput < PL_regtill) {
3680 DEBUG_r(PerlIO_printf(Perl_debug_log,
3681 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3682 PL_colors[4],
3683 (long)(locinput - PL_reg_starttry),
3684 (long)(PL_regtill - PL_reg_starttry),
3685 PL_colors[5]));
3686 sayNO_FINAL; /* Cannot match: too short. */
3687 }
3688 PL_reginput = locinput; /* put where regtry can find it */
3689 sayYES_FINAL; /* Success! */
7e5428c5 3690 case SUCCEED:
3280af22 3691 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3692 sayYES_LOUD; /* Success! */
c277df42
IZ
3693 case SUSPEND:
3694 n = 1;
9fe1d20c 3695 PL_reginput = locinput;
9041c2e3 3696 goto do_ifmatch;
a0d0e21e 3697 case UNLESSM:
c277df42 3698 n = 0;
a0ed51b3 3699 if (scan->flags) {
efb30f32
HS
3700 s = HOPBACKc(locinput, scan->flags);
3701 if (!s)
3702 goto say_yes;
3703 PL_reginput = s;
a0ed51b3
LW
3704 }
3705 else
3706 PL_reginput = locinput;
c277df42
IZ
3707 goto do_ifmatch;
3708 case IFMATCH:
3709 n = 1;
a0ed51b3 3710 if (scan->flags) {
efb30f32
HS
3711 s = HOPBACKc(locinput, scan->flags);
3712 if (!s)
3713 goto say_no;
3714 PL_reginput = s;
a0ed51b3
LW
3715 }
3716 else
3717 PL_reginput = locinput;
3718
c277df42 3719 do_ifmatch:
c277df42
IZ
3720 inner = NEXTOPER(NEXTOPER(scan));
3721 if (regmatch(inner) != n) {
3722 say_no:
3723 if (logical) {
3724 logical = 0;
3725 sw = 0;
3726 goto do_longjump;
a0ed51b3
LW
3727 }
3728 else
c277df42
IZ
3729 sayNO;
3730 }
3731 say_yes:
3732 if (logical) {
3733 logical = 0;
3734 sw = 1;
3735 }
fe44a5e8 3736 if (OP(scan) == SUSPEND) {
3280af22 3737 locinput = PL_reginput;
565764a8 3738 nextchr = UCHARAT(locinput);
fe44a5e8 3739 }
c277df42
IZ
3740 /* FALL THROUGH. */
3741 case LONGJMP:
3742 do_longjump:
3743 next = scan + ARG(scan);
3744 if (next == scan)
3745 next = NULL;
a0d0e21e
LW
3746 break;
3747 default:
b900a521 3748 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3749 PTR2UV(scan), OP(scan));
cea2e8a9 3750 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3751 }
02db2b7b 3752 reenter:
a0d0e21e
LW
3753 scan = next;
3754 }
a687059c 3755
a0d0e21e
LW
3756 /*
3757 * We get here only if there's trouble -- normally "case END" is
3758 * the terminating point.
3759 */
cea2e8a9 3760 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3761 /*NOTREACHED*/
4633a7c4
LW
3762 sayNO;
3763
7821416a
IZ
3764yes_loud:
3765 DEBUG_r(
3766 PerlIO_printf(Perl_debug_log,
3767 "%*s %scould match...%s\n",
3768 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3769 );
3770 goto yes;
3771yes_final:
3772 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3773 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3774yes:
3775#ifdef DEBUGGING
3280af22 3776 PL_regindent--;
4633a7c4 3777#endif
02db2b7b
IZ
3778
3779#if 0 /* Breaks $^R */
3780 if (unwind)
3781 regcpblow(firstcp);
3782#endif
4633a7c4
LW
3783 return 1;
3784
3785no:
7821416a
IZ
3786 DEBUG_r(
3787 PerlIO_printf(Perl_debug_log,
3788 "%*s %sfailed...%s\n",
3789 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3790 );
3791 goto do_no;
3792no_final:
3793do_no:
02db2b7b
IZ
3794 if (unwind) {
3795 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3796
3797 switch (uw->type) {
3798 case RE_UNWIND_BRANCH:
3799 case RE_UNWIND_BRANCHJ:
3800 {
3801 re_unwind_branch_t *uwb = &(uw->branch);
3802 I32 lastparen = uwb->lastparen;
9041c2e3 3803
02db2b7b
IZ
3804 REGCP_UNWIND(uwb->lastcp);
3805 for (n = *PL_reglastparen; n > lastparen; n--)
3806 PL_regendp[n] = -1;
3807 *PL_reglastparen = n;
3808 scan = next = uwb->next;
9041c2e3
NIS
3809 if ( !scan ||
3810 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3811 ? BRANCH : BRANCHJ) ) { /* Failure */
3812 unwind = uwb->prev;
3813#ifdef DEBUGGING
3814 PL_regindent--;
3815#endif
3816 goto do_no;
3817 }
3818 /* Have more choice yet. Reuse the same uwb. */
3819 /*SUPPRESS 560*/
3820 if ((n = (uwb->type == RE_UNWIND_BRANCH
3821 ? NEXT_OFF(next) : ARG(next))))
3822 next += n;
3823 else
3824 next = NULL; /* XXXX Needn't unwinding in this case... */
3825 uwb->next = next;
3826 next = NEXTOPER(scan);
3827 if (uwb->type == RE_UNWIND_BRANCHJ)
3828 next = NEXTOPER(next);
3829 locinput = uwb->locinput;
3830 nextchr = uwb->nextchr;
3831#ifdef DEBUGGING
3832 PL_regindent = uwb->regindent;
3833#endif
3834
3835 goto reenter;
3836 }
3837 /* NOT REACHED */
3838 default:
3839 Perl_croak(aTHX_ "regexp unwind memory corruption");
3840 }
3841 /* NOT REACHED */
3842 }
4633a7c4 3843#ifdef DEBUGGING
3280af22 3844 PL_regindent--;
4633a7c4 3845#endif
a0d0e21e 3846 return 0;
a687059c
LW
3847}
3848
3849/*
3850 - regrepeat - repeatedly match something simple, report how many
3851 */
3852/*
3853 * [This routine now assumes that it will only match on things of length 1.
3854 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3855 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3856 */
76e3520e 3857STATIC I32
cea2e8a9 3858S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3859{
a0d0e21e 3860 register char *scan;
a0d0e21e 3861 register I32 c;
3280af22 3862 register char *loceol = PL_regeol;
a0ed51b3 3863 register I32 hardcount = 0;
53c4c00c 3864 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3865
3280af22 3866 scan = PL_reginput;
c277df42 3867 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3868 loceol = scan + max;
a0d0e21e 3869 switch (OP(p)) {
22c35a8c 3870 case REG_ANY:
1aa99e6b 3871 if (do_utf8) {
ffc61ed2 3872 loceol = PL_regeol;
1aa99e6b 3873 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3874 scan += UTF8SKIP(scan);
3875 hardcount++;
3876 }
3877 } else {
3878 while (scan < loceol && *scan != '\n')
3879 scan++;
a0ed51b3
LW
3880 }
3881 break;
ffc61ed2 3882 case SANY:
3baa4c62 3883 scan = loceol;
a0ed51b3 3884 break;
f33976b4
DB
3885 case CANY:
3886 scan = loceol;
3887 break;
bbce6d69 3888 case EXACT: /* length of string is 1 */
cd439c50 3889 c = (U8)*STRING(p);
bbce6d69 3890 while (scan < loceol && UCHARAT(scan) == c)
3891 scan++;
3892 break;
3893 case EXACTF: /* length of string is 1 */
cd439c50 3894 c = (U8)*STRING(p);
bbce6d69 3895 while (scan < loceol &&
22c35a8c 3896 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3897 scan++;
3898 break;
3899 case EXACTFL: /* length of string is 1 */
3280af22 3900 PL_reg_flags |= RF_tainted;
cd439c50 3901 c = (U8)*STRING(p);
bbce6d69 3902 while (scan < loceol &&
22c35a8c 3903 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3904 scan++;
3905 break;
3906 case ANYOF:
ffc61ed2
JH
3907 if (do_utf8) {
3908 loceol = PL_regeol;
1aa99e6b
IH
3909 while (hardcount < max && scan < loceol &&
3910 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3911 scan += UTF8SKIP(scan);
3912 hardcount++;
3913 }
3914 } else {
3915 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3916 scan++;
3917 }
a0d0e21e
LW
3918 break;
3919 case ALNUM:
1aa99e6b 3920 if (do_utf8) {
ffc61ed2 3921 loceol = PL_regeol;
8269fa76 3922 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3923 while (hardcount < max && scan < loceol &&
3568d838 3924 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3925 scan += UTF8SKIP(scan);
3926 hardcount++;
3927 }
3928 } else {
3929 while (scan < loceol && isALNUM(*scan))
3930 scan++;
a0ed51b3
LW
3931 }
3932 break;
bbce6d69 3933 case ALNUML:
3280af22 3934 PL_reg_flags |= RF_tainted;
1aa99e6b 3935 if (do_utf8) {
ffc61ed2 3936 loceol = PL_regeol;
1aa99e6b
IH
3937 while (hardcount < max && scan < loceol &&
3938 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3939 scan += UTF8SKIP(scan);
3940 hardcount++;
3941 }
3942 } else {
3943 while (scan < loceol && isALNUM_LC(*scan))
3944 scan++;
a0ed51b3
LW
3945 }
3946 break;
a0d0e21e 3947 case NALNUM:
1aa99e6b 3948 if (do_utf8) {
ffc61ed2 3949 loceol = PL_regeol;
8269fa76 3950 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3951 while (hardcount < max && scan < loceol &&
3568d838 3952 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3953 scan += UTF8SKIP(scan);
3954 hardcount++;
3955 }
3956 } else {
3957 while (scan < loceol && !isALNUM(*scan))
3958 scan++;
a0ed51b3
LW
3959 }
3960 break;
bbce6d69 3961 case NALNUML:
3280af22 3962 PL_reg_flags |= RF_tainted;
1aa99e6b 3963 if (do_utf8) {
ffc61ed2 3964 loceol = PL_regeol;
1aa99e6b
IH
3965 while (hardcount < max && scan < loceol &&
3966 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3967 scan += UTF8SKIP(scan);
3968 hardcount++;
3969 }
3970 } else {
3971 while (scan < loceol && !isALNUM_LC(*scan))
3972 scan++;
a0ed51b3
LW
3973 }
3974 break;
a0d0e21e 3975 case SPACE:
1aa99e6b 3976 if (do_utf8) {
ffc61ed2 3977 loceol = PL_regeol;
8269fa76 3978 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3979 while (hardcount < max && scan < loceol &&
3568d838
JH
3980 (*scan == ' ' ||
3981 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3982 scan += UTF8SKIP(scan);
3983 hardcount++;
3984 }
3985 } else {
3986 while (scan < loceol && isSPACE(*scan))
3987 scan++;
a0ed51b3
LW
3988 }
3989 break;
bbce6d69 3990 case SPACEL:
3280af22 3991 PL_reg_flags |= RF_tainted;
1aa99e6b 3992 if (do_utf8) {
ffc61ed2 3993 loceol = PL_regeol;
1aa99e6b 3994 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3995 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3996 scan += UTF8SKIP(scan);
3997 hardcount++;
3998 }
3999 } else {
4000 while (scan < loceol && isSPACE_LC(*scan))
4001 scan++;
a0ed51b3
LW
4002 }
4003 break;
a0d0e21e 4004 case NSPACE:
1aa99e6b 4005 if (do_utf8) {
ffc61ed2 4006 loceol = PL_regeol;
8269fa76 4007 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4008 while (hardcount < max && scan < loceol &&
3568d838
JH
4009 !(*scan == ' ' ||
4010 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4011 scan += UTF8SKIP(scan);
4012 hardcount++;
4013 }
4014 } else {
4015 while (scan < loceol && !isSPACE(*scan))
4016 scan++;
4017 break;
a0ed51b3 4018 }
bbce6d69 4019 case NSPACEL:
3280af22 4020 PL_reg_flags |= RF_tainted;
1aa99e6b 4021 if (do_utf8) {
ffc61ed2 4022 loceol = PL_regeol;
1aa99e6b 4023 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4024 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4025 scan += UTF8SKIP(scan);
4026 hardcount++;
4027 }
4028 } else {
4029 while (scan < loceol && !isSPACE_LC(*scan))
4030 scan++;
a0ed51b3
LW
4031 }
4032 break;
a0d0e21e 4033 case DIGIT:
1aa99e6b 4034 if (do_utf8) {
ffc61ed2 4035 loceol = PL_regeol;
8269fa76 4036 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4037 while (hardcount < max && scan < loceol &&
3568d838 4038 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4039 scan += UTF8SKIP(scan);
4040 hardcount++;
4041 }
4042 } else {
4043 while (scan < loceol && isDIGIT(*scan))
4044 scan++;
a0ed51b3
LW
4045 }
4046 break;
a0d0e21e 4047 case NDIGIT:
1aa99e6b 4048 if (do_utf8) {
ffc61ed2 4049 loceol = PL_regeol;
8269fa76 4050 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4051 while (hardcount < max && scan < loceol &&
3568d838 4052 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4053 scan += UTF8SKIP(scan);
4054 hardcount++;
4055 }
4056 } else {
4057 while (scan < loceol && !isDIGIT(*scan))
4058 scan++;
a0ed51b3
LW
4059 }
4060 break;
a0d0e21e
LW
4061 default: /* Called on something of 0 width. */
4062 break; /* So match right here or not at all. */
4063 }
a687059c 4064
a0ed51b3
LW
4065 if (hardcount)
4066 c = hardcount;
4067 else
4068 c = scan - PL_reginput;
3280af22 4069 PL_reginput = scan;
a687059c 4070
9041c2e3 4071 DEBUG_r(
c277df42
IZ
4072 {
4073 SV *prop = sv_newmortal();
4074
4075 regprop(prop, p);
9041c2e3
NIS
4076 PerlIO_printf(Perl_debug_log,
4077 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4078 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4079 });
9041c2e3 4080
a0d0e21e 4081 return(c);
a687059c
LW
4082}
4083
4084/*
c277df42 4085 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4086 *
c277df42
IZ
4087 * The repeater is supposed to have constant length.
4088 */
4089
76e3520e 4090STATIC I32
cea2e8a9 4091S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4092{
b7953727 4093 register char *scan = Nullch;
c277df42 4094 register char *start;
3280af22 4095 register char *loceol = PL_regeol;
a0ed51b3 4096 I32 l = 0;
708e3b05 4097 I32 count = 0, res = 1;
a0ed51b3
LW
4098
4099 if (!max)
4100 return 0;
c277df42 4101
3280af22 4102 start = PL_reginput;
53c4c00c 4103 if (PL_reg_match_utf8) {
708e3b05 4104 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4105 if (!count++) {
4106 l = 0;
4107 while (start < PL_reginput) {
4108 l++;
4109 start += UTF8SKIP(start);
4110 }
4111 *lp = l;
4112 if (l == 0)
4113 return max;
4114 }
4115 if (count == max)
4116 return count;
4117 }
4118 }
4119 else {
708e3b05 4120 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4121 if (!count++) {
4122 *lp = l = PL_reginput - start;
4123 if (max != REG_INFTY && l*max < loceol - scan)
4124 loceol = scan + l*max;
4125 if (l == 0)
4126 return max;
c277df42
IZ
4127 }
4128 }
4129 }
708e3b05 4130 if (!res)
3280af22 4131 PL_reginput = scan;
9041c2e3 4132
a0ed51b3 4133 return count;
c277df42
IZ
4134}
4135
4136/*
ffc61ed2
JH
4137- regclass_swash - prepare the utf8 swash
4138*/
4139
4140SV *
4141Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
4142{
4143 SV *sw = NULL;
4144 SV *si = NULL;
4145
4146 if (PL_regdata && PL_regdata->count) {
4147 U32 n = ARG(node);
4148
4149 if (PL_regdata->what[n] == 's') {
4150 SV *rv = (SV*)PL_regdata->data[n];
4151 AV *av = (AV*)SvRV((SV*)rv);
4152 SV **a;
9041c2e3 4153
ffc61ed2
JH
4154 si = *av_fetch(av, 0, FALSE);
4155 a = av_fetch(av, 1, FALSE);
9041c2e3 4156
ffc61ed2
JH
4157 if (a)
4158 sw = *a;
4159 else if (si && doinit) {
4160 sw = swash_init("utf8", "", si, 1, 0);
4161 (void)av_store(av, 1, sw);
4162 }
4163 }
4164 }
4165
4166 if (initsvp)
4167 *initsvp = si;
4168
4169 return sw;
4170}
4171
4172/*
cb8d8820 4173 - reginclass - determine if a character falls into a character class
bbce6d69 4174 */
4175
76e3520e 4176STATIC bool
ffc61ed2 4177S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 4178{
ffc61ed2 4179 char flags = ANYOF_FLAGS(n);
bbce6d69 4180 bool match = FALSE;
1aa99e6b 4181 UV c;
3568d838 4182 STRLEN len = 0;
1aa99e6b 4183
3568d838 4184 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4185
ffc61ed2
JH
4186 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4187 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4188 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4189 match = TRUE;
bbce6d69 4190 }
3568d838 4191 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4192 match = TRUE;
ffc61ed2
JH
4193 if (!match) {
4194 SV *sw = regclass_swash(n, TRUE, 0);
4195
4196 if (sw) {
3568d838 4197 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4198 match = TRUE;
4199 else if (flags & ANYOF_FOLD) {
a2a2844f 4200 STRLEN ulen;
a5961de5 4201 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
a2a2844f 4202
a5961de5
JH
4203 to_utf8_fold(p, tmpbuf, &ulen);
4204 if (swash_fetch(sw, tmpbuf, do_utf8))
4205 match = TRUE;
4206 to_utf8_upper(p, tmpbuf, &ulen);
3568d838 4207 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
4208 match = TRUE;
4209 }
4210 }
bbce6d69 4211 }
4212 }
1aa99e6b 4213 if (!match && c < 256) {
ffc61ed2
JH
4214 if (ANYOF_BITMAP_TEST(n, c))
4215 match = TRUE;
4216 else if (flags & ANYOF_FOLD) {
3568d838 4217 I32 f;
a0ed51b3 4218
ffc61ed2
JH
4219 if (flags & ANYOF_LOCALE) {
4220 PL_reg_flags |= RF_tainted;
4221 f = PL_fold_locale[c];
4222 }
4223 else
4224 f = PL_fold[c];
4225 if (f != c && ANYOF_BITMAP_TEST(n, f))
4226 match = TRUE;
4227 }
4228
4229 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4230 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4231 if (
4232 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4233 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4234 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4235 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4236 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4237 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4238 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4239 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4240 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4241 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4242 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4243 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4244 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4245 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4246 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4247 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4248 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4249 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4250 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4251 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4252 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4253 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4254 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4255 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4256 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4257 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4258 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4259 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4260 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4261 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4262 ) /* How's that for a conditional? */
4263 {
4264 match = TRUE;
4265 }
a0ed51b3 4266 }
a0ed51b3
LW
4267 }
4268
a0ed51b3
LW
4269 return (flags & ANYOF_INVERT) ? !match : match;
4270}
161b471a 4271
dfe13c55 4272STATIC U8 *
cea2e8a9 4273S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4274{
1aa99e6b
IH
4275 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4276}
4277
4278STATIC U8 *
4279S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4280{
a0ed51b3 4281 if (off >= 0) {
1aa99e6b 4282 while (off-- && s < lim) {
ffc61ed2 4283 /* XXX could check well-formedness here */
a0ed51b3 4284 s += UTF8SKIP(s);
ffc61ed2 4285 }
a0ed51b3
LW
4286 }
4287 else {
4288 while (off++) {
1aa99e6b 4289 if (s > lim) {
a0ed51b3 4290 s--;
ffc61ed2 4291 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4292 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4293 s--;
ffc61ed2
JH
4294 }
4295 /* XXX could check well-formedness here */
a0ed51b3
LW
4296 }
4297 }
4298 }
4299 return s;
4300}
161b471a 4301
dfe13c55 4302STATIC U8 *
1aa99e6b 4303S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4304{
1aa99e6b
IH
4305 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4306}
4307
4308STATIC U8 *
4309S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4310{
4311 if (off >= 0) {
1aa99e6b 4312 while (off-- && s < lim) {
ffc61ed2 4313 /* XXX could check well-formedness here */
a0ed51b3 4314 s += UTF8SKIP(s);
ffc61ed2 4315 }
a0ed51b3
LW
4316 if (off >= 0)
4317 return 0;
4318 }
4319 else {
4320 while (off++) {
1aa99e6b 4321 if (s > lim) {
a0ed51b3 4322 s--;
ffc61ed2 4323 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4324 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4325 s--;
ffc61ed2
JH
4326 }
4327 /* XXX could check well-formedness here */
a0ed51b3
LW
4328 }
4329 else
4330 break;
4331 }
4332 if (off <= 0)
4333 return 0;
4334 }
4335 return s;
4336}
51371543 4337
51371543 4338static void
acfe0abc 4339restore_pos(pTHX_ void *arg)
51371543 4340{
51371543
GS
4341 if (PL_reg_eval_set) {
4342 if (PL_reg_oldsaved) {
4343 PL_reg_re->subbeg = PL_reg_oldsaved;
4344 PL_reg_re->sublen = PL_reg_oldsavedlen;
4345 RX_MATCH_COPIED_on(PL_reg_re);
4346 }
4347 PL_reg_magic->mg_len = PL_reg_oldpos;
4348 PL_reg_eval_set = 0;
4349 PL_curpm = PL_reg_oldcurpm;
4350 }
4351}