This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlhack.pod
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded
JH
403 char *s = PL_reg_match_utf8 ?
404 sv_uni_display(dsv, sv, 60, 0) : strpos;
405 int len = PL_reg_match_utf8 ?
406 strlen(s) : strend - strpos;
2a782b5b
JH
407 if (!PL_colorset)
408 reginitcolors();
b8d68ded
JH
409 if (PL_reg_match_utf8)
410 DEBUG_r(PerlIO_printf(Perl_debug_log,
411 "UTF-8 target...\n"));
2a782b5b
JH
412 PerlIO_printf(Perl_debug_log,
413 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
414 PL_colors[4],PL_colors[5],PL_colors[0],
415 prog->precomp,
416 PL_colors[1],
417 (strlen(prog->precomp) > 60 ? "..." : ""),
418 PL_colors[0],
419 (int)(len > 60 ? 60 : len),
420 s, PL_colors[1],
421 (len > 60 ? "..." : "")
422 );
423 });
cad2e5aa 424
1aa99e6b 425 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
426 DEBUG_r(PerlIO_printf(Perl_debug_log,
427 "String too short... [re_intuit_start]\n"));
cad2e5aa 428 goto fail;
2c2d71f5 429 }
a1933d95 430 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 431 PL_regeol = strend;
653099ff 432 check = prog->check_substr;
2c2d71f5 433 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
434 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
435 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 436 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 437
7e25d62c
JH
438 if (!ml_anch) {
439 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
440 | ROPT_IMPLICIT)) /* not a real BOL */
441 /* SvCUR is not set on references: SvRV and SvPVX overlap */
442 && sv && !SvROK(sv)
443 && (strpos != strbeg)) {
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
445 goto fail;
446 }
447 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 448 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 449 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
450 I32 slen;
451
1aa99e6b 452 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
453 if (SvTAIL(check)) {
454 slen = SvCUR(check); /* >= 1 */
cad2e5aa 455
9041c2e3 456 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
457 || (strend - s == slen && strend[-1] != '\n')) {
458 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
459 goto fail_finish;
cad2e5aa
JH
460 }
461 /* Now should match s[0..slen-2] */
462 slen--;
653099ff 463 if (slen && (*SvPVX(check) != *s
cad2e5aa 464 || (slen > 1
653099ff 465 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
466 report_neq:
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
468 goto fail_finish;
469 }
cad2e5aa 470 }
653099ff
GS
471 else if (*SvPVX(check) != *s
472 || ((slen = SvCUR(check)) > 1
473 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
474 goto report_neq;
475 goto success_at_start;
7e25d62c 476 }
cad2e5aa 477 }
2c2d71f5 478 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 479 s = strpos;
2c2d71f5 480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 483 if (!ml_anch) {
653099ff
GS
484 I32 end = prog->check_offset_max + CHR_SVLEN(check)
485 - (SvTAIL(check) != 0);
1aa99e6b 486 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
487
488 if (end_shift < eshift)
489 end_shift = eshift;
490 }
cad2e5aa 491 }
2c2d71f5 492 else { /* Can match at random position */
cad2e5aa
JH
493 ml_anch = 0;
494 s = strpos;
2c2d71f5
JH
495 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
496 /* Should be nonnegative! */
497 end_shift = prog->minlen - start_shift -
653099ff 498 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
499 }
500
2c2d71f5 501#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 502 if (end_shift < 0)
6bbae5e6 503 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
504#endif
505
2c2d71f5
JH
506 restart:
507 /* Find a possible match in the region s..strend by looking for
508 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 509 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
510 I32 p = -1; /* Internal iterator of scream. */
511 I32 *pp = data ? data->scream_pos : &p;
512
2c2d71f5
JH
513 if (PL_screamfirst[BmRARE(check)] >= 0
514 || ( BmRARE(check) == '\n'
515 && (BmPREVIOUS(check) == SvCUR(check) - 1)
516 && SvTAIL(check) ))
9041c2e3 517 s = screaminstr(sv, check,
2c2d71f5 518 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 519 else
2c2d71f5 520 goto fail_finish;
cad2e5aa
JH
521 if (data)
522 *data->scream_olds = s;
523 }
f33976b4 524 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
525 s = fbm_instr((U8*)(s + start_shift),
526 (U8*)(strend - end_shift),
527 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 528 else
1aa99e6b
IH
529 s = fbm_instr(HOP3(s, start_shift, strend),
530 HOP3(strend, -end_shift, strbeg),
2c2d71f5 531 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
532
533 /* Update the count-of-usability, remove useless subpatterns,
534 unshift s. */
2c2d71f5
JH
535
536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
537 (s ? "Found" : "Did not find"),
538 ((check == prog->anchored_substr) ? "anchored" : "floating"),
539 PL_colors[0],
7b0972df
JH
540 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
541 SvPVX(check),
2c2d71f5
JH
542 PL_colors[1], (SvTAIL(check) ? "$" : ""),
543 (s ? " at offset " : "...\n") ) );
544
545 if (!s)
546 goto fail_finish;
547
6eb5f6b9
JH
548 check_at = s;
549
2c2d71f5 550 /* Finish the diagnostic message */
30944b6d 551 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
552
553 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
554 Start with the other substr.
555 XXXX no SCREAM optimization yet - and a very coarse implementation
556 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
557 *always* match. Probably should be marked during compile...
558 Probably it is right to do no SCREAM here...
559 */
560
561 if (prog->float_substr && prog->anchored_substr) {
30944b6d 562 /* Take into account the "other" substring. */
2c2d71f5
JH
563 /* XXXX May be hopelessly wrong for UTF... */
564 if (!other_last)
6eb5f6b9 565 other_last = strpos;
2c2d71f5 566 if (check == prog->float_substr) {
30944b6d
IZ
567 do_other_anchored:
568 {
1aa99e6b 569 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
570 char *s1 = s;
571
2c2d71f5
JH
572 t = s - prog->check_offset_max;
573 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
574 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 575 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 576 && t > strpos)))
30944b6d 577 /* EMPTY */;
2c2d71f5
JH
578 else
579 t = strpos;
1aa99e6b 580 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
581 if (t < other_last) /* These positions already checked */
582 t = other_last;
1aa99e6b 583 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
584 if (last < last1)
585 last1 = last;
586 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
587 /* On end-of-str: see comment below. */
588 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
589 HOP3(HOP3(last1, prog->anchored_offset, strend)
590 + SvCUR(prog->anchored_substr),
591 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
592 prog->anchored_substr,
593 PL_multiline ? FBMrf_MULTILINE : 0);
594 DEBUG_r(PerlIO_printf(Perl_debug_log,
595 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
596 (s ? "Found" : "Contradicts"),
597 PL_colors[0],
7b0972df
JH
598 (int)(SvCUR(prog->anchored_substr)
599 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
600 SvPVX(prog->anchored_substr),
601 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
602 if (!s) {
603 if (last1 >= last2) {
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
606 goto fail_finish;
607 }
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying floating at offset %ld...\n",
1aa99e6b
IH
610 (long)(HOP3c(s1, 1, strend) - i_strpos)));
611 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
612 s = HOP3c(last, 1, strend);
2c2d71f5
JH
613 goto restart;
614 }
615 else {
616 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 617 (long)(s - i_strpos)));
1aa99e6b
IH
618 t = HOP3c(s, -prog->anchored_offset, strbeg);
619 other_last = HOP3c(s, 1, strend);
30944b6d 620 s = s1;
2c2d71f5
JH
621 if (t == strpos)
622 goto try_at_start;
2c2d71f5
JH
623 goto try_at_offset;
624 }
30944b6d 625 }
2c2d71f5
JH
626 }
627 else { /* Take into account the floating substring. */
628 char *last, *last1;
629 char *s1 = s;
630
1aa99e6b
IH
631 t = HOP3c(s, -start_shift, strbeg);
632 last1 = last =
633 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
634 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
635 last = HOP3c(t, prog->float_max_offset, strend);
636 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
637 if (s < other_last)
638 s = other_last;
2c2d71f5
JH
639 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
640 /* fbm_instr() takes into account exact value of end-of-str
641 if the check is SvTAIL(ed). Since false positives are OK,
642 and end-of-str is not later than strend we are OK. */
643 s = fbm_instr((unsigned char*)s,
644 (unsigned char*)last + SvCUR(prog->float_substr)
645 - (SvTAIL(prog->float_substr)!=0),
646 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
647 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
7b0972df
JH
650 (int)(SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
652 SvPVX(prog->float_substr),
653 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
654 if (!s) {
655 if (last1 == last) {
656 DEBUG_r(PerlIO_printf(Perl_debug_log,
657 ", giving up...\n"));
658 goto fail_finish;
659 }
660 DEBUG_r(PerlIO_printf(Perl_debug_log,
661 ", trying anchored starting at offset %ld...\n",
30944b6d 662 (long)(s1 + 1 - i_strpos)));
803ff556 663 other_last = last;
1aa99e6b 664 s = HOP3c(t, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
803ff556 670 other_last = s; /* Fix this later. --Hugo */
30944b6d 671 s = s1;
2c2d71f5
JH
672 if (t == strpos)
673 goto try_at_start;
2c2d71f5
JH
674 goto try_at_offset;
675 }
676 }
cad2e5aa 677 }
2c2d71f5
JH
678
679 t = s - prog->check_offset_max;
2c2d71f5
JH
680 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
681 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
682 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
683 && t > strpos))) {
2c2d71f5
JH
684 /* Fixed substring is found far enough so that the match
685 cannot start at strpos. */
686 try_at_offset:
cad2e5aa 687 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
688 /* Eventually fbm_*() should handle this, but often
689 anchored_offset is not 0, so this check will not be wasted. */
690 /* XXXX In the code below we prefer to look for "^" even in
691 presence of anchored substrings. And we search even
692 beyond the found float position. These pessimizations
693 are historical artefacts only. */
694 find_anchor:
2c2d71f5 695 while (t < strend - prog->minlen) {
cad2e5aa 696 if (*t == '\n') {
4ee3650e 697 if (t < check_at - prog->check_offset_min) {
30944b6d 698 if (prog->anchored_substr) {
4ee3650e
GS
699 /* Since we moved from the found position,
700 we definitely contradict the found anchored
30944b6d
IZ
701 substr. Due to the above check we do not
702 contradict "check" substr.
703 Thus we can arrive here only if check substr
704 is float. Redo checking for "other"=="fixed".
705 */
9041c2e3 706 strpos = t + 1;
30944b6d
IZ
707 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
708 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
709 goto do_other_anchored;
710 }
4ee3650e
GS
711 /* We don't contradict the found floating substring. */
712 /* XXXX Why not check for STCLASS? */
cad2e5aa 713 s = t + 1;
2c2d71f5 714 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 715 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
716 goto set_useful;
717 }
4ee3650e
GS
718 /* Position contradicts check-string */
719 /* XXXX probably better to look for check-string
720 than for "\n", so one should lower the limit for t? */
721 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 722 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 723 other_last = strpos = s = t + 1;
cad2e5aa
JH
724 goto restart;
725 }
726 t++;
727 }
2c2d71f5
JH
728 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
729 PL_colors[0],PL_colors[1]));
730 goto fail_finish;
cad2e5aa 731 }
f5952150
GS
732 else {
733 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
734 PL_colors[0],PL_colors[1]));
735 }
cad2e5aa
JH
736 s = t;
737 set_useful:
2c2d71f5 738 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
739 }
740 else {
f5952150 741 /* The found string does not prohibit matching at strpos,
2c2d71f5 742 - no optimization of calling REx engine can be performed,
f5952150
GS
743 unless it was an MBOL and we are not after MBOL,
744 or a future STCLASS check will fail this. */
2c2d71f5
JH
745 try_at_start:
746 /* Even in this situation we may use MBOL flag if strpos is offset
747 wrt the start of the string. */
05b4157f 748 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 749 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
750 /* May be due to an implicit anchor of m{.*foo} */
751 && !(prog->reganch & ROPT_IMPLICIT))
752 {
cad2e5aa
JH
753 t = strpos;
754 goto find_anchor;
755 }
30944b6d 756 DEBUG_r( if (ml_anch)
f5952150
GS
757 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
758 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 759 );
2c2d71f5 760 success_at_start:
30944b6d 761 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 762 && prog->check_substr /* Could be deleted already */
cad2e5aa 763 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
764 && prog->check_substr == prog->float_substr)
765 {
cad2e5aa 766 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
768 SvREFCNT_dec(prog->check_substr);
769 prog->check_substr = Nullsv; /* disable */
770 prog->float_substr = Nullsv; /* clear */
5e39e1e5 771 check = Nullsv; /* abort */
cad2e5aa 772 s = strpos;
3cf5c195
IZ
773 /* XXXX This is a remnant of the old implementation. It
774 looks wasteful, since now INTUIT can use many
6eb5f6b9 775 other heuristics. */
cad2e5aa
JH
776 prog->reganch &= ~RE_USE_INTUIT;
777 }
778 else
779 s = strpos;
780 }
781
6eb5f6b9
JH
782 /* Last resort... */
783 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
784 if (prog->regstclass) {
785 /* minlen == 0 is possible if regstclass is \b or \B,
786 and the fixed substr is ''$.
787 Since minlen is already taken into account, s+1 is before strend;
788 accidentally, minlen >= 1 guaranties no false positives at s + 1
789 even for \b or \B. But (minlen? 1 : 0) below assumes that
790 regstclass does not come from lookahead... */
791 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
792 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 793 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 794 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 795 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 796 : 1);
6eb5f6b9 797 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
798 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
799 : (prog->float_substr
800 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
801 cl_l, strend)
802 : strend);
a1933d95 803 char *startpos = strbeg;
6eb5f6b9
JH
804
805 t = s;
76384e4a 806 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 807 PL_regdata = prog->data;
76384e4a
GS
808 PL_bostr = startpos;
809 }
f33976b4 810 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
811 if (!s) {
812#ifdef DEBUGGING
b7953727 813 char *what = 0;
6eb5f6b9
JH
814#endif
815 if (endpos == strend) {
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
66e933ab
GS
820 DEBUG_r( PerlIO_printf(Perl_debug_log,
821 "This position contradicts STCLASS...\n") );
653099ff
GS
822 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
823 goto fail;
6eb5f6b9
JH
824 /* Contradict one of substrings */
825 if (prog->anchored_substr) {
6eb5f6b9
JH
826 if (prog->anchored_substr == check) {
827 DEBUG_r( what = "anchored" );
828 hop_and_restart:
1aa99e6b 829 s = HOP3c(t, 1, strend);
66e933ab
GS
830 if (s + start_shift + end_shift > strend) {
831 /* XXXX Should be taken into account earlier? */
832 DEBUG_r( PerlIO_printf(Perl_debug_log,
833 "Could not match STCLASS...\n") );
834 goto fail;
835 }
5e39e1e5
HS
836 if (!check)
837 goto giveup;
6eb5f6b9 838 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 839 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
840 what, (long)(s + start_shift - i_strpos)) );
841 goto restart;
842 }
66e933ab 843 /* Have both, check_string is floating */
6eb5f6b9
JH
844 if (t + start_shift >= check_at) /* Contradicts floating=check */
845 goto retry_floating_check;
846 /* Recheck anchored substring, but not floating... */
9041c2e3 847 s = check_at;
5e39e1e5
HS
848 if (!check)
849 goto giveup;
6eb5f6b9 850 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 851 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
852 (long)(other_last - i_strpos)) );
853 goto do_other_anchored;
854 }
60e71179
GS
855 /* Another way we could have checked stclass at the
856 current position only: */
857 if (ml_anch) {
858 s = t = t + 1;
5e39e1e5
HS
859 if (!check)
860 goto giveup;
60e71179 861 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
862 "Looking for /%s^%s/m starting at offset %ld...\n",
863 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 864 goto try_at_offset;
66e933ab 865 }
60e71179
GS
866 if (!prog->float_substr) /* Could have been deleted */
867 goto fail;
6eb5f6b9
JH
868 /* Check is floating subtring. */
869 retry_floating_check:
870 t = check_at - start_shift;
871 DEBUG_r( what = "floating" );
872 goto hop_and_restart;
873 }
b7953727
JH
874 if (t != s) {
875 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 876 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
877 (long)(t - i_strpos), (long)(s - i_strpos))
878 );
879 }
880 else {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
882 "Does not contradict STCLASS...\n");
883 );
884 }
6eb5f6b9 885 }
5e39e1e5
HS
886 giveup:
887 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
888 PL_colors[4], (check ? "Guessed" : "Giving up"),
889 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 890 return s;
2c2d71f5
JH
891
892 fail_finish: /* Substring not found */
66e933ab
GS
893 if (prog->check_substr) /* could be removed already */
894 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 895 fail:
2c2d71f5 896 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
897 PL_colors[4],PL_colors[5]));
898 return Nullch;
899}
9661b544 900
6eb5f6b9 901/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
902STATIC char *
903S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 904{
6eb5f6b9
JH
905 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
906 char *m;
d8093b23
G
907 STRLEN ln;
908 unsigned int c1;
909 unsigned int c2;
6eb5f6b9
JH
910 char *e;
911 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 912 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 913
6eb5f6b9
JH
914 /* We know what class it must start with. */
915 switch (OP(c)) {
6eb5f6b9 916 case ANYOF:
a0ed51b3 917 while (s < strend) {
ffc61ed2 918 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
919 if (tmp && (norun || regtry(prog, s)))
920 goto got_it;
921 else
922 tmp = doevery;
a0ed51b3 923 }
6eb5f6b9
JH
924 else
925 tmp = 1;
ffc61ed2 926 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 927 }
6eb5f6b9 928 break;
f33976b4
DB
929 case CANY:
930 while (s < strend) {
931 if (tmp && (norun || regtry(prog, s)))
932 goto got_it;
933 else
934 tmp = doevery;
935 s++;
936 }
937 break;
6eb5f6b9
JH
938 case EXACTF:
939 m = STRING(c);
940 ln = STR_LEN(c);
1aa99e6b 941 if (UTF) {
a2a2844f 942 STRLEN ulen1, ulen2;
e7ae6809
JH
943 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
944 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
945
946 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
947 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
948
949 c1 = utf8_to_uvuni(tmpbuf1, 0);
950 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
951 }
952 else {
953 c1 = *(U8*)m;
954 c2 = PL_fold[c1];
955 }
6eb5f6b9
JH
956 goto do_exactf;
957 case EXACTFL:
958 m = STRING(c);
959 ln = STR_LEN(c);
d8093b23 960 c1 = *(U8*)m;
6eb5f6b9
JH
961 c2 = PL_fold_locale[c1];
962 do_exactf:
963 e = strend - ln;
b3c9acc1 964
6eb5f6b9
JH
965 if (norun && e < s)
966 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
967
968 if (do_utf8) {
969 STRLEN len;
cadb39a9
JH
970 /* The ibcmp_utf8() uses to_uni_fold() which is more
971 * correct folding for Unicode than using lowercase.
972 * However, it doesn't work quite fully since the folding
973 * is a one-to-many mapping and the regex optimizer is
974 * unaware of this, so it may throw out good matches.
975 * Fortunately, not getting this right is allowed
976 * for Unicode Regular Expression Support level 1,
977 * only one-to-one matching is required. --jhi */
09091399 978 if (c1 == c2) {
1aa99e6b 979 while (s <= e) {
9041c2e3 980 if ( utf8_to_uvchr((U8*)s, &len) == c1
bc517b45 981 && (ln == len ||
332ddc25 982 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
983 m, UTF, ln))
984 && (norun || regtry(prog, s)) )
1aa99e6b
IH
985 goto got_it;
986 s += len;
987 }
09091399
JH
988 }
989 else {
1aa99e6b 990 while (s <= e) {
9041c2e3 991 UV c = utf8_to_uvchr((U8*)s, &len);
09091399
JH
992 if (c == UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
993 c == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
994 c = UNICODE_GREEK_SMALL_LETTER_SIGMA;
701a277b 995 if ( (c == c1 || c == c2)
bc517b45 996 && (ln == len ||
332ddc25 997 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
998 m, UTF, ln))
999 && (norun || regtry(prog, s)) )
1aa99e6b
IH
1000 goto got_it;
1001 s += len;
1002 }
09091399 1003 }
1aa99e6b
IH
1004 }
1005 else {
1006 if (c1 == c2)
1007 while (s <= e) {
1008 if ( *(U8*)s == c1
1009 && (ln == 1 || !(OP(c) == EXACTF
1010 ? ibcmp(s, m, ln)
1011 : ibcmp_locale(s, m, ln)))
1012 && (norun || regtry(prog, s)) )
1013 goto got_it;
1014 s++;
1015 }
1016 else
1017 while (s <= e) {
1018 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1019 && (ln == 1 || !(OP(c) == EXACTF
1020 ? ibcmp(s, m, ln)
1021 : ibcmp_locale(s, m, ln)))
1022 && (norun || regtry(prog, s)) )
1023 goto got_it;
1024 s++;
1025 }
b3c9acc1
IZ
1026 }
1027 break;
bbce6d69 1028 case BOUNDL:
3280af22 1029 PL_reg_flags |= RF_tainted;
bbce6d69 1030 /* FALL THROUGH */
a0d0e21e 1031 case BOUND:
ffc61ed2 1032 if (do_utf8) {
12d33761 1033 if (s == PL_bostr)
ffc61ed2
JH
1034 tmp = '\n';
1035 else {
1aa99e6b 1036 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1037
0064a8a9
JH
1038 if (s > (char*)r)
1039 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1040 }
1041 tmp = ((OP(c) == BOUND ?
9041c2e3 1042 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1043 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1044 while (s < strend) {
1045 if (tmp == !(OP(c) == BOUND ?
3568d838 1046 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1047 isALNUM_LC_utf8((U8*)s)))
1048 {
1049 tmp = !tmp;
1050 if ((norun || regtry(prog, s)))
1051 goto got_it;
1052 }
1053 s += UTF8SKIP(s);
a687059c 1054 }
a0d0e21e 1055 }
667bb95a 1056 else {
12d33761 1057 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1058 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1059 while (s < strend) {
1060 if (tmp ==
1061 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1062 tmp = !tmp;
1063 if ((norun || regtry(prog, s)))
1064 goto got_it;
1065 }
1066 s++;
a0ed51b3 1067 }
a0ed51b3 1068 }
6eb5f6b9 1069 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1070 goto got_it;
1071 break;
bbce6d69 1072 case NBOUNDL:
3280af22 1073 PL_reg_flags |= RF_tainted;
bbce6d69 1074 /* FALL THROUGH */
a0d0e21e 1075 case NBOUND:
ffc61ed2 1076 if (do_utf8) {
12d33761 1077 if (s == PL_bostr)
ffc61ed2
JH
1078 tmp = '\n';
1079 else {
1aa99e6b 1080 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1081
0064a8a9
JH
1082 if (s > (char*)r)
1083 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1084 }
1085 tmp = ((OP(c) == NBOUND ?
9041c2e3 1086 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1087 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1088 while (s < strend) {
1089 if (tmp == !(OP(c) == NBOUND ?
3568d838 1090 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1091 isALNUM_LC_utf8((U8*)s)))
1092 tmp = !tmp;
1093 else if ((norun || regtry(prog, s)))
1094 goto got_it;
1095 s += UTF8SKIP(s);
1096 }
a0d0e21e 1097 }
667bb95a 1098 else {
12d33761 1099 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1100 tmp = ((OP(c) == NBOUND ?
1101 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1102 while (s < strend) {
1103 if (tmp ==
1104 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1105 tmp = !tmp;
1106 else if ((norun || regtry(prog, s)))
1107 goto got_it;
1108 s++;
1109 }
a0ed51b3 1110 }
6eb5f6b9 1111 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1112 goto got_it;
1113 break;
a0d0e21e 1114 case ALNUM:
ffc61ed2 1115 if (do_utf8) {
8269fa76 1116 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1117 while (s < strend) {
3568d838 1118 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1119 if (tmp && (norun || regtry(prog, s)))
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
bbce6d69 1124 else
ffc61ed2
JH
1125 tmp = 1;
1126 s += UTF8SKIP(s);
bbce6d69 1127 }
bbce6d69 1128 }
ffc61ed2
JH
1129 else {
1130 while (s < strend) {
1131 if (isALNUM(*s)) {
1132 if (tmp && (norun || regtry(prog, s)))
1133 goto got_it;
1134 else
1135 tmp = doevery;
1136 }
a0ed51b3 1137 else
ffc61ed2
JH
1138 tmp = 1;
1139 s++;
a0ed51b3 1140 }
a0ed51b3
LW
1141 }
1142 break;
bbce6d69 1143 case ALNUML:
3280af22 1144 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1145 if (do_utf8) {
1146 while (s < strend) {
1147 if (isALNUM_LC_utf8((U8*)s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
a687059c 1153 else
ffc61ed2
JH
1154 tmp = 1;
1155 s += UTF8SKIP(s);
a0d0e21e 1156 }
a0d0e21e 1157 }
ffc61ed2
JH
1158 else {
1159 while (s < strend) {
1160 if (isALNUM_LC(*s)) {
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
a0ed51b3 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s++;
a0ed51b3 1169 }
a0ed51b3
LW
1170 }
1171 break;
a0d0e21e 1172 case NALNUM:
ffc61ed2 1173 if (do_utf8) {
8269fa76 1174 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1175 while (s < strend) {
3568d838 1176 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1177 if (tmp && (norun || regtry(prog, s)))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
bbce6d69 1182 else
ffc61ed2
JH
1183 tmp = 1;
1184 s += UTF8SKIP(s);
bbce6d69 1185 }
bbce6d69 1186 }
ffc61ed2
JH
1187 else {
1188 while (s < strend) {
1189 if (!isALNUM(*s)) {
1190 if (tmp && (norun || regtry(prog, s)))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
a0ed51b3 1195 else
ffc61ed2
JH
1196 tmp = 1;
1197 s++;
a0ed51b3 1198 }
a0ed51b3
LW
1199 }
1200 break;
bbce6d69 1201 case NALNUML:
3280af22 1202 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1203 if (do_utf8) {
1204 while (s < strend) {
1205 if (!isALNUM_LC_utf8((U8*)s)) {
1206 if (tmp && (norun || regtry(prog, s)))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
a687059c 1211 else
ffc61ed2
JH
1212 tmp = 1;
1213 s += UTF8SKIP(s);
a687059c 1214 }
a0d0e21e 1215 }
ffc61ed2
JH
1216 else {
1217 while (s < strend) {
1218 if (!isALNUM_LC(*s)) {
1219 if (tmp && (norun || regtry(prog, s)))
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
a0ed51b3 1224 else
ffc61ed2
JH
1225 tmp = 1;
1226 s++;
a0ed51b3 1227 }
a0ed51b3
LW
1228 }
1229 break;
a0d0e21e 1230 case SPACE:
ffc61ed2 1231 if (do_utf8) {
8269fa76 1232 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1233 while (s < strend) {
3568d838 1234 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1235 if (tmp && (norun || regtry(prog, s)))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
a0d0e21e 1240 else
ffc61ed2
JH
1241 tmp = 1;
1242 s += UTF8SKIP(s);
2304df62 1243 }
a0d0e21e 1244 }
ffc61ed2
JH
1245 else {
1246 while (s < strend) {
1247 if (isSPACE(*s)) {
1248 if (tmp && (norun || regtry(prog, s)))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a0ed51b3 1253 else
ffc61ed2
JH
1254 tmp = 1;
1255 s++;
a0ed51b3 1256 }
a0ed51b3
LW
1257 }
1258 break;
bbce6d69 1259 case SPACEL:
3280af22 1260 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1261 if (do_utf8) {
1262 while (s < strend) {
1263 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
bbce6d69 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s += UTF8SKIP(s);
bbce6d69 1272 }
bbce6d69 1273 }
ffc61ed2
JH
1274 else {
1275 while (s < strend) {
1276 if (isSPACE_LC(*s)) {
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0ed51b3 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s++;
a0ed51b3 1285 }
a0ed51b3
LW
1286 }
1287 break;
a0d0e21e 1288 case NSPACE:
ffc61ed2 1289 if (do_utf8) {
8269fa76 1290 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1291 while (s < strend) {
3568d838 1292 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
a0d0e21e 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s += UTF8SKIP(s);
a687059c 1301 }
a0d0e21e 1302 }
ffc61ed2
JH
1303 else {
1304 while (s < strend) {
1305 if (!isSPACE(*s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a0ed51b3 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s++;
a0ed51b3 1314 }
a0ed51b3
LW
1315 }
1316 break;
bbce6d69 1317 case NSPACEL:
3280af22 1318 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1319 if (do_utf8) {
1320 while (s < strend) {
1321 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
bbce6d69 1327 else
ffc61ed2
JH
1328 tmp = 1;
1329 s += UTF8SKIP(s);
bbce6d69 1330 }
bbce6d69 1331 }
ffc61ed2
JH
1332 else {
1333 while (s < strend) {
1334 if (!isSPACE_LC(*s)) {
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
a0ed51b3 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s++;
a0ed51b3 1343 }
a0ed51b3
LW
1344 }
1345 break;
a0d0e21e 1346 case DIGIT:
ffc61ed2 1347 if (do_utf8) {
8269fa76 1348 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1349 while (s < strend) {
3568d838 1350 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
a0d0e21e 1356 else
ffc61ed2
JH
1357 tmp = 1;
1358 s += UTF8SKIP(s);
2b69d0c2 1359 }
a0d0e21e 1360 }
ffc61ed2
JH
1361 else {
1362 while (s < strend) {
1363 if (isDIGIT(*s)) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
a0ed51b3 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s++;
a0ed51b3 1372 }
a0ed51b3
LW
1373 }
1374 break;
b8c5462f
JH
1375 case DIGITL:
1376 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1377 if (do_utf8) {
1378 while (s < strend) {
1379 if (isDIGIT_LC_utf8((U8*)s)) {
1380 if (tmp && (norun || regtry(prog, s)))
1381 goto got_it;
1382 else
1383 tmp = doevery;
1384 }
b8c5462f 1385 else
ffc61ed2
JH
1386 tmp = 1;
1387 s += UTF8SKIP(s);
b8c5462f 1388 }
b8c5462f 1389 }
ffc61ed2
JH
1390 else {
1391 while (s < strend) {
1392 if (isDIGIT_LC(*s)) {
1393 if (tmp && (norun || regtry(prog, s)))
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
b8c5462f 1398 else
ffc61ed2
JH
1399 tmp = 1;
1400 s++;
b8c5462f 1401 }
b8c5462f
JH
1402 }
1403 break;
a0d0e21e 1404 case NDIGIT:
ffc61ed2 1405 if (do_utf8) {
8269fa76 1406 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1407 while (s < strend) {
3568d838 1408 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1409 if (tmp && (norun || regtry(prog, s)))
1410 goto got_it;
1411 else
1412 tmp = doevery;
1413 }
a0d0e21e 1414 else
ffc61ed2
JH
1415 tmp = 1;
1416 s += UTF8SKIP(s);
a687059c 1417 }
a0d0e21e 1418 }
ffc61ed2
JH
1419 else {
1420 while (s < strend) {
1421 if (!isDIGIT(*s)) {
1422 if (tmp && (norun || regtry(prog, s)))
1423 goto got_it;
1424 else
1425 tmp = doevery;
1426 }
a0ed51b3 1427 else
ffc61ed2
JH
1428 tmp = 1;
1429 s++;
a0ed51b3 1430 }
a0ed51b3
LW
1431 }
1432 break;
b8c5462f
JH
1433 case NDIGITL:
1434 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1435 if (do_utf8) {
1436 while (s < strend) {
1437 if (!isDIGIT_LC_utf8((U8*)s)) {
1438 if (tmp && (norun || regtry(prog, s)))
1439 goto got_it;
1440 else
1441 tmp = doevery;
1442 }
b8c5462f 1443 else
ffc61ed2
JH
1444 tmp = 1;
1445 s += UTF8SKIP(s);
b8c5462f 1446 }
a0ed51b3 1447 }
ffc61ed2
JH
1448 else {
1449 while (s < strend) {
1450 if (!isDIGIT_LC(*s)) {
1451 if (tmp && (norun || regtry(prog, s)))
1452 goto got_it;
1453 else
1454 tmp = doevery;
1455 }
cf93c79d 1456 else
ffc61ed2
JH
1457 tmp = 1;
1458 s++;
b8c5462f 1459 }
b8c5462f
JH
1460 }
1461 break;
b3c9acc1 1462 default:
3c3eec57
GS
1463 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1464 break;
d6a28714 1465 }
6eb5f6b9
JH
1466 return 0;
1467 got_it:
1468 return s;
1469}
1470
1471/*
1472 - regexec_flags - match a regexp against a string
1473 */
1474I32
1475Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1476 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1477/* strend: pointer to null at end of string */
1478/* strbeg: real beginning of string */
1479/* minend: end of match must be >=minend after stringarg. */
1480/* data: May be used for some additional optimizations. */
1481/* nosave: For optimizations. */
1482{
6eb5f6b9
JH
1483 register char *s;
1484 register regnode *c;
1485 register char *startpos = stringarg;
6eb5f6b9
JH
1486 I32 minlen; /* must match at least this many chars */
1487 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1488 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1489 constant substr. */ /* CC */
1490 I32 end_shift = 0; /* Same for the end. */ /* CC */
1491 I32 scream_pos = -1; /* Internal iterator of scream. */
1492 char *scream_olds;
1493 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1494 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1495#ifdef DEBUGGING
ce333219 1496 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
2a782b5b 1497#endif
6eb5f6b9
JH
1498
1499 PL_regcc = 0;
1500
1501 cache_re(prog);
1502#ifdef DEBUGGING
aea4f609 1503 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1504#endif
1505
1506 /* Be paranoid... */
1507 if (prog == NULL || startpos == NULL) {
1508 Perl_croak(aTHX_ "NULL regexp parameter");
1509 return 0;
1510 }
1511
1512 minlen = prog->minlen;
a72c7584
JH
1513 if (strend - startpos < minlen) {
1514 DEBUG_r(PerlIO_printf(Perl_debug_log,
1515 "String too short [regexec_flags]...\n"));
1516 goto phooey;
1aa99e6b 1517 }
6eb5f6b9 1518
6eb5f6b9
JH
1519 /* Check validity of program. */
1520 if (UCHARAT(prog->program) != REG_MAGIC) {
1521 Perl_croak(aTHX_ "corrupted regexp program");
1522 }
1523
1524 PL_reg_flags = 0;
1525 PL_reg_eval_set = 0;
1526 PL_reg_maxiter = 0;
1527
1528 if (prog->reganch & ROPT_UTF8)
1529 PL_reg_flags |= RF_utf8;
1530
1531 /* Mark beginning of line for ^ and lookbehind. */
1532 PL_regbol = startpos;
1533 PL_bostr = strbeg;
1534 PL_reg_sv = sv;
1535
1536 /* Mark end of line for $ (and such) */
1537 PL_regeol = strend;
1538
1539 /* see how far we have to get to not match where we matched before */
1540 PL_regtill = startpos+minend;
1541
1542 /* We start without call_cc context. */
1543 PL_reg_call_cc = 0;
1544
1545 /* If there is a "must appear" string, look for it. */
1546 s = startpos;
1547
1548 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1549 MAGIC *mg;
1550
1551 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1552 PL_reg_ganch = startpos;
1553 else if (sv && SvTYPE(sv) >= SVt_PVMG
1554 && SvMAGIC(sv)
14befaf4
DM
1555 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1556 && mg->mg_len >= 0) {
6eb5f6b9
JH
1557 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1558 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1559 if (s > PL_reg_ganch)
6eb5f6b9
JH
1560 goto phooey;
1561 s = PL_reg_ganch;
1562 }
1563 }
1564 else /* pos() not defined */
1565 PL_reg_ganch = strbeg;
1566 }
1567
699c3c34
JH
1568 if (do_utf8 == (UTF!=0) &&
1569 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1570 re_scream_pos_data d;
1571
1572 d.scream_olds = &scream_olds;
1573 d.scream_pos = &scream_pos;
1574 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1575 if (!s) {
1576 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1577 goto phooey; /* not present */
3fa9c3d7 1578 }
6eb5f6b9
JH
1579 }
1580
2a782b5b 1581 DEBUG_r({
df1ffd02
JH
1582 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1583 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1584 if (!PL_colorset)
1585 reginitcolors();
1586 PerlIO_printf(Perl_debug_log,
1587 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1588 PL_colors[4],PL_colors[5],PL_colors[0],
1589 prog->precomp,
1590 PL_colors[1],
1591 (strlen(prog->precomp) > 60 ? "..." : ""),
1592 PL_colors[0],
1593 (int)(len > 60 ? 60 : len),
1594 s, PL_colors[1],
1595 (len > 60 ? "..." : "")
1596 );
1597 });
6eb5f6b9
JH
1598
1599 /* Simplest case: anchored match need be tried only once. */
1600 /* [unless only anchor is BOL and multiline is set] */
1601 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1602 if (s == startpos && regtry(prog, startpos))
1603 goto got_it;
1604 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1605 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1606 {
1607 char *end;
1608
1609 if (minlen)
1610 dontbother = minlen - 1;
1aa99e6b 1611 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1612 /* for multiline we only have to try after newlines */
1613 if (prog->check_substr) {
1614 if (s == startpos)
1615 goto after_try;
1616 while (1) {
1617 if (regtry(prog, s))
1618 goto got_it;
1619 after_try:
1620 if (s >= end)
1621 goto phooey;
1622 if (prog->reganch & RE_USE_INTUIT) {
1623 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1624 if (!s)
1625 goto phooey;
1626 }
1627 else
1628 s++;
1629 }
1630 } else {
1631 if (s > startpos)
1632 s--;
1633 while (s < end) {
1634 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1635 if (regtry(prog, s))
1636 goto got_it;
1637 }
1638 }
1639 }
1640 }
1641 goto phooey;
1642 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1643 if (regtry(prog, PL_reg_ganch))
1644 goto got_it;
1645 goto phooey;
1646 }
1647
1648 /* Messy cases: unanchored match. */
9041c2e3 1649 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1650 /* we have /x+whatever/ */
1651 /* it must be a one character string (XXXX Except UTF?) */
1652 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1653#ifdef DEBUGGING
1654 int did_match = 0;
1655#endif
1656
1aa99e6b 1657 if (do_utf8) {
6eb5f6b9
JH
1658 while (s < strend) {
1659 if (*s == ch) {
bf93d4cc 1660 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1661 if (regtry(prog, s)) goto got_it;
1662 s += UTF8SKIP(s);
1663 while (s < strend && *s == ch)
1664 s += UTF8SKIP(s);
1665 }
1666 s += UTF8SKIP(s);
1667 }
1668 }
1669 else {
1670 while (s < strend) {
1671 if (*s == ch) {
bf93d4cc 1672 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1673 if (regtry(prog, s)) goto got_it;
1674 s++;
1675 while (s < strend && *s == ch)
1676 s++;
1677 }
1678 s++;
1679 }
1680 }
b7953727 1681 DEBUG_r(if (!did_match)
bf93d4cc 1682 PerlIO_printf(Perl_debug_log,
b7953727
JH
1683 "Did not find anchored character...\n")
1684 );
6eb5f6b9
JH
1685 }
1686 /*SUPPRESS 560*/
1aa99e6b
IH
1687 else if (do_utf8 == (UTF!=0) &&
1688 (prog->anchored_substr != Nullsv
9041c2e3 1689 || (prog->float_substr != Nullsv
1aa99e6b 1690 && prog->float_max_offset < strend - s))) {
9041c2e3 1691 SV *must = prog->anchored_substr
6eb5f6b9 1692 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1693 I32 back_max =
6eb5f6b9 1694 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1695 I32 back_min =
6eb5f6b9 1696 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1697 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1698 -(I32)(CHR_SVLEN(must)
1aa99e6b 1699 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1700 char *last1; /* Last position checked before */
bf93d4cc
GS
1701#ifdef DEBUGGING
1702 int did_match = 0;
1703#endif
6eb5f6b9
JH
1704
1705 if (s > PL_bostr)
1706 last1 = HOPc(s, -1);
1707 else
1708 last1 = s - 1; /* bogus */
1709
1710 /* XXXX check_substr already used to find `s', can optimize if
1711 check_substr==must. */
1712 scream_pos = -1;
1713 dontbother = end_shift;
1714 strend = HOPc(strend, -dontbother);
1715 while ( (s <= last) &&
9041c2e3 1716 ((flags & REXEC_SCREAM)
1aa99e6b 1717 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1718 end_shift, &scream_pos, 0))
1aa99e6b 1719 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1720 (unsigned char*)strend, must,
6eb5f6b9 1721 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1722 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1723 if (HOPc(s, -back_max) > last1) {
1724 last1 = HOPc(s, -back_min);
1725 s = HOPc(s, -back_max);
1726 }
1727 else {
1728 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1729
1730 last1 = HOPc(s, -back_min);
1731 s = t;
1732 }
1aa99e6b 1733 if (do_utf8) {
6eb5f6b9
JH
1734 while (s <= last1) {
1735 if (regtry(prog, s))
1736 goto got_it;
1737 s += UTF8SKIP(s);
1738 }
1739 }
1740 else {
1741 while (s <= last1) {
1742 if (regtry(prog, s))
1743 goto got_it;
1744 s++;
1745 }
1746 }
1747 }
b7953727
JH
1748 DEBUG_r(if (!did_match)
1749 PerlIO_printf(Perl_debug_log,
1750 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1751 ((must == prog->anchored_substr)
1752 ? "anchored" : "floating"),
1753 PL_colors[0],
1754 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1755 SvPVX(must),
b7953727
JH
1756 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1757 );
6eb5f6b9
JH
1758 goto phooey;
1759 }
155aba94 1760 else if ((c = prog->regstclass)) {
66e933ab
GS
1761 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1762 /* don't bother with what can't match */
6eb5f6b9 1763 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1764 DEBUG_r({
1765 SV *prop = sv_newmortal();
1766 regprop(prop, c);
2a782b5b 1767 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1768 });
6eb5f6b9
JH
1769 if (find_byclass(prog, c, s, strend, startpos, 0))
1770 goto got_it;
bf93d4cc 1771 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1772 }
1773 else {
1774 dontbother = 0;
1775 if (prog->float_substr != Nullsv) { /* Trim the end. */
1776 char *last;
d6a28714
JH
1777
1778 if (flags & REXEC_SCREAM) {
1779 last = screaminstr(sv, prog->float_substr, s - strbeg,
1780 end_shift, &scream_pos, 1); /* last one */
1781 if (!last)
ffc61ed2 1782 last = scream_olds; /* Only one occurrence. */
b8c5462f 1783 }
d6a28714
JH
1784 else {
1785 STRLEN len;
1786 char *little = SvPV(prog->float_substr, len);
1787
1788 if (SvTAIL(prog->float_substr)) {
1789 if (memEQ(strend - len + 1, little, len - 1))
1790 last = strend - len + 1;
1791 else if (!PL_multiline)
9041c2e3 1792 last = memEQ(strend - len, little, len)
d6a28714 1793 ? strend - len : Nullch;
b8c5462f 1794 else
d6a28714
JH
1795 goto find_last;
1796 } else {
1797 find_last:
9041c2e3 1798 if (len)
d6a28714 1799 last = rninstr(s, strend, little, little + len);
b8c5462f 1800 else
d6a28714 1801 last = strend; /* matching `$' */
b8c5462f 1802 }
b8c5462f 1803 }
bf93d4cc
GS
1804 if (last == NULL) {
1805 DEBUG_r(PerlIO_printf(Perl_debug_log,
1806 "%sCan't trim the tail, match fails (should not happen)%s\n",
1807 PL_colors[4],PL_colors[5]));
1808 goto phooey; /* Should not happen! */
1809 }
d6a28714
JH
1810 dontbother = strend - last + prog->float_min_offset;
1811 }
1812 if (minlen && (dontbother < minlen))
1813 dontbother = minlen - 1;
1814 strend -= dontbother; /* this one's always in bytes! */
1815 /* We don't know much -- general case. */
1aa99e6b 1816 if (do_utf8) {
d6a28714
JH
1817 for (;;) {
1818 if (regtry(prog, s))
1819 goto got_it;
1820 if (s >= strend)
1821 break;
b8c5462f 1822 s += UTF8SKIP(s);
d6a28714
JH
1823 };
1824 }
1825 else {
1826 do {
1827 if (regtry(prog, s))
1828 goto got_it;
1829 } while (s++ < strend);
1830 }
1831 }
1832
1833 /* Failure. */
1834 goto phooey;
1835
1836got_it:
1837 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1838
1839 if (PL_reg_eval_set) {
1840 /* Preserve the current value of $^R */
1841 if (oreplsv != GvSV(PL_replgv))
1842 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1843 restored, the value remains
1844 the same. */
acfe0abc 1845 restore_pos(aTHX_ 0);
d6a28714
JH
1846 }
1847
1848 /* make sure $`, $&, $', and $digit will work later */
1849 if ( !(flags & REXEC_NOT_FIRST) ) {
1850 if (RX_MATCH_COPIED(prog)) {
1851 Safefree(prog->subbeg);
1852 RX_MATCH_COPIED_off(prog);
1853 }
1854 if (flags & REXEC_COPY_STR) {
1855 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1856
1857 s = savepvn(strbeg, i);
1858 prog->subbeg = s;
1859 prog->sublen = i;
1860 RX_MATCH_COPIED_on(prog);
1861 }
1862 else {
1863 prog->subbeg = strbeg;
1864 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1865 }
1866 }
9041c2e3 1867
d6a28714
JH
1868 return 1;
1869
1870phooey:
bf93d4cc
GS
1871 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1872 PL_colors[4],PL_colors[5]));
d6a28714 1873 if (PL_reg_eval_set)
acfe0abc 1874 restore_pos(aTHX_ 0);
d6a28714
JH
1875 return 0;
1876}
1877
1878/*
1879 - regtry - try match at specific point
1880 */
1881STATIC I32 /* 0 failure, 1 success */
1882S_regtry(pTHX_ regexp *prog, char *startpos)
1883{
d6a28714
JH
1884 register I32 i;
1885 register I32 *sp;
1886 register I32 *ep;
1887 CHECKPOINT lastcp;
1888
02db2b7b
IZ
1889#ifdef DEBUGGING
1890 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1891#endif
d6a28714
JH
1892 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1893 MAGIC *mg;
1894
1895 PL_reg_eval_set = RS_init;
1896 DEBUG_r(DEBUG_s(
b900a521
JH
1897 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1898 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1899 ));
e8347627 1900 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1901 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1902 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1903 SAVETMPS;
1904 /* Apparently this is not needed, judging by wantarray. */
e8347627 1905 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1906 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1907
1908 if (PL_reg_sv) {
1909 /* Make $_ available to executed code. */
1910 if (PL_reg_sv != DEFSV) {
4d1ff10f 1911 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1912 SAVESPTR(DEFSV);
1913 DEFSV = PL_reg_sv;
b8c5462f 1914 }
d6a28714 1915
9041c2e3 1916 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1917 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1918 /* prepare for quick setting of pos */
14befaf4
DM
1919 sv_magic(PL_reg_sv, (SV*)0,
1920 PERL_MAGIC_regex_global, Nullch, 0);
1921 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1922 mg->mg_len = -1;
b8c5462f 1923 }
d6a28714
JH
1924 PL_reg_magic = mg;
1925 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1926 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1927 }
09687e5a 1928 if (!PL_reg_curpm) {
0f79a09d 1929 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1930#ifdef USE_ITHREADS
1931 {
1932 SV* repointer = newSViv(0);
577e12cc 1933 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1934 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1935 av_push(PL_regex_padav,repointer);
1936 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1937 PL_regex_pad = AvARRAY(PL_regex_padav);
1938 }
1939#endif
1940 }
aaa362c4 1941 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1942 PL_reg_oldcurpm = PL_curpm;
1943 PL_curpm = PL_reg_curpm;
1944 if (RX_MATCH_COPIED(prog)) {
1945 /* Here is a serious problem: we cannot rewrite subbeg,
1946 since it may be needed if this match fails. Thus
1947 $` inside (?{}) could fail... */
1948 PL_reg_oldsaved = prog->subbeg;
1949 PL_reg_oldsavedlen = prog->sublen;
1950 RX_MATCH_COPIED_off(prog);
1951 }
1952 else
1953 PL_reg_oldsaved = Nullch;
1954 prog->subbeg = PL_bostr;
1955 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1956 }
1957 prog->startp[0] = startpos - PL_bostr;
1958 PL_reginput = startpos;
1959 PL_regstartp = prog->startp;
1960 PL_regendp = prog->endp;
1961 PL_reglastparen = &prog->lastparen;
a01268b5 1962 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1963 prog->lastparen = 0;
1964 PL_regsize = 0;
1965 DEBUG_r(PL_reg_starttry = startpos);
1966 if (PL_reg_start_tmpl <= prog->nparens) {
1967 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1968 if(PL_reg_start_tmp)
1969 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1970 else
1971 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1972 }
1973
128e8167
JH
1974#ifdef DEBUGGING
1975 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
1976 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
1977 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
1978#endif
1979
d6a28714
JH
1980 /* XXXX What this code is doing here?!!! There should be no need
1981 to do this again and again, PL_reglastparen should take care of
3dd2943c 1982 this! --ilya*/
dafc8851
JH
1983
1984 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1985 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1986 * PL_reglastparen), is not needed at all by the test suite
1987 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1988 * enough, for building DynaLoader, or otherwise this
1989 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1990 * will happen. Meanwhile, this code *is* needed for the
1991 * above-mentioned test suite tests to succeed. The common theme
1992 * on those tests seems to be returning null fields from matches.
1993 * --jhi */
dafc8851 1994#if 1
d6a28714
JH
1995 sp = prog->startp;
1996 ep = prog->endp;
1997 if (prog->nparens) {
09e8ae3b 1998 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1999 *++sp = -1;
2000 *++ep = -1;
2001 }
2002 }
dafc8851 2003#endif
02db2b7b 2004 REGCP_SET(lastcp);
d6a28714
JH
2005 if (regmatch(prog->program + 1)) {
2006 prog->endp[0] = PL_reginput - PL_bostr;
2007 return 1;
2008 }
02db2b7b 2009 REGCP_UNWIND(lastcp);
d6a28714
JH
2010 return 0;
2011}
2012
02db2b7b
IZ
2013#define RE_UNWIND_BRANCH 1
2014#define RE_UNWIND_BRANCHJ 2
2015
2016union re_unwind_t;
2017
2018typedef struct { /* XX: makes sense to enlarge it... */
2019 I32 type;
2020 I32 prev;
2021 CHECKPOINT lastcp;
2022} re_unwind_generic_t;
2023
2024typedef struct {
2025 I32 type;
2026 I32 prev;
2027 CHECKPOINT lastcp;
2028 I32 lastparen;
2029 regnode *next;
2030 char *locinput;
2031 I32 nextchr;
2032#ifdef DEBUGGING
2033 int regindent;
2034#endif
2035} re_unwind_branch_t;
2036
2037typedef union re_unwind_t {
2038 I32 type;
2039 re_unwind_generic_t generic;
2040 re_unwind_branch_t branch;
2041} re_unwind_t;
2042
8ba1375e
MJD
2043#define sayYES goto yes
2044#define sayNO goto no
2045#define sayYES_FINAL goto yes_final
2046#define sayYES_LOUD goto yes_loud
2047#define sayNO_FINAL goto no_final
2048#define sayNO_SILENT goto do_no
2049#define saySAME(x) if (x) goto yes; else goto no
2050
2051#define REPORT_CODE_OFF 24
2052
d6a28714
JH
2053/*
2054 - regmatch - main matching routine
2055 *
2056 * Conceptually the strategy is simple: check to see whether the current
2057 * node matches, call self recursively to see whether the rest matches,
2058 * and then act accordingly. In practice we make some effort to avoid
2059 * recursion, in particular by going through "ordinary" nodes (that don't
2060 * need to know whether the rest of the match failed) by a loop instead of
2061 * by recursion.
2062 */
2063/* [lwall] I've hoisted the register declarations to the outer block in order to
2064 * maybe save a little bit of pushing and popping on the stack. It also takes
2065 * advantage of machines that use a register save mask on subroutine entry.
2066 */
2067STATIC I32 /* 0 failure, 1 success */
2068S_regmatch(pTHX_ regnode *prog)
2069{
d6a28714
JH
2070 register regnode *scan; /* Current node. */
2071 regnode *next; /* Next node. */
2072 regnode *inner; /* Next node in internal branch. */
2073 register I32 nextchr; /* renamed nextchr - nextchar colides with
2074 function of same name */
2075 register I32 n; /* no or next */
b7953727
JH
2076 register I32 ln = 0; /* len or last */
2077 register char *s = Nullch; /* operand or save */
d6a28714 2078 register char *locinput = PL_reginput;
b7953727 2079 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2080 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2081 I32 unwind = 0;
b7953727 2082#if 0
02db2b7b 2083 I32 firstcp = PL_savestack_ix;
b7953727 2084#endif
53c4c00c 2085 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2086#ifdef DEBUGGING
ce333219
JH
2087 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2088 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2089 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2090#endif
02db2b7b 2091
d6a28714
JH
2092#ifdef DEBUGGING
2093 PL_regindent++;
2094#endif
2095
2096 /* Note that nextchr is a byte even in UTF */
2097 nextchr = UCHARAT(locinput);
2098 scan = prog;
2099 while (scan != NULL) {
8ba1375e 2100
2a782b5b 2101 DEBUG_r( {
d6a28714
JH
2102 SV *prop = sv_newmortal();
2103 int docolor = *PL_colors[0];
2104 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2105 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2106 /* The part of the string before starttry has one color
2107 (pref0_len chars), between starttry and current
2108 position another one (pref_len - pref0_len chars),
2109 after the current position the third one.
2110 We assume that pref0_len <= pref_len, otherwise we
2111 decrease pref0_len. */
9041c2e3 2112 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2113 ? (5 + taill) - l : locinput - PL_bostr;
2114 int pref0_len;
d6a28714 2115
df1ffd02 2116 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2117 pref_len++;
2118 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2119 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2120 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2121 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2122 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2123 l--;
d6a28714
JH
2124 if (pref0_len < 0)
2125 pref0_len = 0;
2126 if (pref0_len > pref_len)
2127 pref0_len = pref_len;
2128 regprop(prop, scan);
2a782b5b
JH
2129 {
2130 char *s0 =
df1ffd02 2131 do_utf8 ?
2a782b5b
JH
2132 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2133 pref0_len, 60, 0) :
2134 locinput - pref_len;
df1ffd02
JH
2135 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2136 char *s1 = do_utf8 ?
2a782b5b
JH
2137 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2138 pref_len - pref0_len, 60, 0) :
2139 locinput - pref_len + pref0_len;
df1ffd02
JH
2140 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2141 char *s2 = do_utf8 ?
2a782b5b
JH
2142 pv_uni_display(dsv2, (U8*)locinput,
2143 PL_regeol - locinput, 60, 0) :
2144 locinput;
df1ffd02 2145 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2146 PerlIO_printf(Perl_debug_log,
2147 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2148 (IV)(locinput - PL_bostr),
2149 PL_colors[4],
2150 len0, s0,
2151 PL_colors[5],
2152 PL_colors[2],
2153 len1, s1,
2154 PL_colors[3],
2155 (docolor ? "" : "> <"),
2156 PL_colors[0],
2157 len2, s2,
2158 PL_colors[1],
2159 15 - l - pref_len + 1,
2160 "",
2161 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2162 SvPVX(prop));
2163 }
2164 });
d6a28714
JH
2165
2166 next = scan + NEXT_OFF(scan);
2167 if (next == scan)
2168 next = NULL;
2169
2170 switch (OP(scan)) {
2171 case BOL:
12d33761
HS
2172 if (locinput == PL_bostr || (PL_multiline &&
2173 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2174 {
2175 /* regtill = regbol; */
b8c5462f
JH
2176 break;
2177 }
d6a28714
JH
2178 sayNO;
2179 case MBOL:
12d33761
HS
2180 if (locinput == PL_bostr ||
2181 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2182 {
b8c5462f
JH
2183 break;
2184 }
d6a28714
JH
2185 sayNO;
2186 case SBOL:
c2a73568 2187 if (locinput == PL_bostr)
b8c5462f 2188 break;
d6a28714
JH
2189 sayNO;
2190 case GPOS:
2191 if (locinput == PL_reg_ganch)
2192 break;
2193 sayNO;
2194 case EOL:
2195 if (PL_multiline)
2196 goto meol;
2197 else
2198 goto seol;
2199 case MEOL:
2200 meol:
2201 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2202 sayNO;
b8c5462f 2203 break;
d6a28714
JH
2204 case SEOL:
2205 seol:
2206 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2207 sayNO;
d6a28714 2208 if (PL_regeol - locinput > 1)
b8c5462f 2209 sayNO;
b8c5462f 2210 break;
d6a28714
JH
2211 case EOS:
2212 if (PL_regeol != locinput)
b8c5462f 2213 sayNO;
d6a28714 2214 break;
ffc61ed2 2215 case SANY:
d6a28714 2216 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2217 sayNO;
f33976b4
DB
2218 if (do_utf8) {
2219 locinput += PL_utf8skip[nextchr];
2220 if (locinput > PL_regeol)
2221 sayNO;
2222 nextchr = UCHARAT(locinput);
2223 }
2224 else
2225 nextchr = UCHARAT(++locinput);
2226 break;
2227 case CANY:
2228 if (!nextchr && locinput >= PL_regeol)
2229 sayNO;
b8c5462f 2230 nextchr = UCHARAT(++locinput);
a0d0e21e 2231 break;
ffc61ed2 2232 case REG_ANY:
1aa99e6b
IH
2233 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2234 sayNO;
2235 if (do_utf8) {
b8c5462f 2236 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2237 if (locinput > PL_regeol)
2238 sayNO;
a0ed51b3 2239 nextchr = UCHARAT(locinput);
a0ed51b3 2240 }
1aa99e6b
IH
2241 else
2242 nextchr = UCHARAT(++locinput);
a0ed51b3 2243 break;
d6a28714 2244 case EXACT:
cd439c50
IZ
2245 s = STRING(scan);
2246 ln = STR_LEN(scan);
1aa99e6b 2247 if (do_utf8 != (UTF!=0)) {
bc517b45 2248 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2249 char *l = locinput;
2250 char *e = s + ln;
bc517b45 2251 STRLEN ulen;
a72c7584 2252
5ff6fc6d
JH
2253 if (do_utf8) {
2254 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2255 while (s < e) {
2256 if (l >= PL_regeol)
5ff6fc6d
JH
2257 sayNO;
2258 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2259 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2260 sayNO;
bc517b45 2261 l += ulen;
5ff6fc6d 2262 s ++;
1aa99e6b 2263 }
5ff6fc6d
JH
2264 }
2265 else {
2266 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2267 while (s < e) {
2268 if (l >= PL_regeol)
2269 sayNO;
5ff6fc6d 2270 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2271 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2272 sayNO;
bc517b45 2273 s += ulen;
a72c7584 2274 l ++;
1aa99e6b 2275 }
5ff6fc6d 2276 }
1aa99e6b
IH
2277 locinput = l;
2278 nextchr = UCHARAT(locinput);
2279 break;
2280 }
bc517b45 2281 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2282 /* Inline the first character, for speed. */
2283 if (UCHARAT(s) != nextchr)
2284 sayNO;
2285 if (PL_regeol - locinput < ln)
2286 sayNO;
2287 if (ln > 1 && memNE(s, locinput, ln))
2288 sayNO;
2289 locinput += ln;
2290 nextchr = UCHARAT(locinput);
2291 break;
2292 case EXACTFL:
b8c5462f
JH
2293 PL_reg_flags |= RF_tainted;
2294 /* FALL THROUGH */
d6a28714 2295 case EXACTF:
cd439c50
IZ
2296 s = STRING(scan);
2297 ln = STR_LEN(scan);
d6a28714 2298
bc517b45 2299 {
d6a28714 2300 char *l = locinput;
bc517b45
JH
2301 char *e = s + ln;
2302 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
2303
2304 if (do_utf8 != (UTF!=0)) {
2305 /* The target and the pattern have differing utf8ness. */
2306 STRLEN ulen1, ulen2;
2307 UV cs, cl;
2308
2309 if (do_utf8) {
2310 /* The target is utf8, the pattern is not utf8. */
2311 while (s < e) {
2312 if (l >= PL_regeol)
2313 sayNO;
2314
2315 cs = to_uni_fold(NATIVE_TO_UNI(*(U8*)s),
2316 (U8*)s, &ulen1);
2317 cl = utf8_to_uvchr((U8*)l, &ulen2);
2318
2319 if (cs != cl) {
2320 cl = to_uni_fold(cl, (U8*)l, &ulen2);
2321 if (ulen1 != ulen2 || cs != cl)
2322 sayNO;
2323 }
2324 l += ulen1;
2325 s ++;
2326 }
2327 }
2328 else {
2329 /* The target is not utf8, the pattern is utf8. */
2330 while (s < e) {
2331 if (l >= PL_regeol)
2332 sayNO;
2333
2334 cs = utf8_to_uvchr((U8*)s, &ulen1);
2335
2336 cl = to_uni_fold(NATIVE_TO_UNI(*(U8*)l),
2337 (U8*)l, &ulen2);
2338
2339 if (cs != cl) {
2340 cs = to_uni_fold(cs, (U8*)s, &ulen1);
2341 if (ulen1 != ulen2 || cs != cl)
2342 sayNO;
2343 }
2344 l ++;
2345 s += ulen1;
2346 }
2347 }
2348 locinput = l;
2349 nextchr = UCHARAT(locinput);
2350 break;
2351 }
2352
2353 if (do_utf8 && UTF) {
2354 /* Both the target and the pattern are utf8. */
2355 STRLEN ulen;
2356
2357 while (s < e) {
2358 if (l >= PL_regeol)
2359 sayNO;
2360 if (UTF8SKIP(s) != UTF8SKIP(l) ||
2361 memNE(s, (char*)l, UTF8SKIP(s))) {
2362 to_utf8_fold((U8*)l, tmpbuf, &ulen);
2363 if (UTF8SKIP(s) != ulen ||
2364 memNE(s, (char*)tmpbuf, ulen))
2365 sayNO;
2366 }
2367 l += UTF8SKIP(l);
2368 s += UTF8SKIP(s);
2369 }
2370 locinput = l;
2371 nextchr = UCHARAT(locinput);
2372 break;
b8c5462f 2373 }
a0ed51b3 2374 }
d6a28714 2375
bc517b45
JH
2376 /* Neither the target and the pattern are utf8. */
2377
d6a28714
JH
2378 /* Inline the first character, for speed. */
2379 if (UCHARAT(s) != nextchr &&
2380 UCHARAT(s) != ((OP(scan) == EXACTF)
2381 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2382 sayNO;
d6a28714 2383 if (PL_regeol - locinput < ln)
b8c5462f 2384 sayNO;
d6a28714
JH
2385 if (ln > 1 && (OP(scan) == EXACTF
2386 ? ibcmp(s, locinput, ln)
2387 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2388 sayNO;
d6a28714
JH
2389 locinput += ln;
2390 nextchr = UCHARAT(locinput);
a0d0e21e 2391 break;
d6a28714 2392 case ANYOF:
ffc61ed2
JH
2393 if (do_utf8) {
2394 if (!reginclass(scan, (U8*)locinput, do_utf8))
2395 sayNO;
2396 if (locinput >= PL_regeol)
2397 sayNO;
2398 locinput += PL_utf8skip[nextchr];
b8c5462f 2399 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2400 }
2401 else {
2402 if (nextchr < 0)
2403 nextchr = UCHARAT(locinput);
2404 if (!reginclass(scan, (U8*)locinput, do_utf8))
2405 sayNO;
2406 if (!nextchr && locinput >= PL_regeol)
2407 sayNO;
2408 nextchr = UCHARAT(++locinput);
2409 }
b8c5462f 2410 break;
d6a28714 2411 case ALNUML:
b8c5462f
JH
2412 PL_reg_flags |= RF_tainted;
2413 /* FALL THROUGH */
d6a28714 2414 case ALNUM:
b8c5462f 2415 if (!nextchr)
4633a7c4 2416 sayNO;
ffc61ed2 2417 if (do_utf8) {
ad24be35 2418 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2419 if (!(OP(scan) == ALNUM
3568d838 2420 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2421 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2422 {
2423 sayNO;
a0ed51b3 2424 }
b8c5462f 2425 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2426 nextchr = UCHARAT(locinput);
2427 break;
2428 }
ffc61ed2 2429 if (!(OP(scan) == ALNUM
d6a28714 2430 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2431 sayNO;
b8c5462f 2432 nextchr = UCHARAT(++locinput);
a0d0e21e 2433 break;
d6a28714 2434 case NALNUML:
b8c5462f
JH
2435 PL_reg_flags |= RF_tainted;
2436 /* FALL THROUGH */
d6a28714
JH
2437 case NALNUM:
2438 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2439 sayNO;
ffc61ed2 2440 if (do_utf8) {
8269fa76 2441 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2442 if (OP(scan) == NALNUM
3568d838 2443 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2444 : isALNUM_LC_utf8((U8*)locinput))
2445 {
b8c5462f 2446 sayNO;
d6a28714 2447 }
b8c5462f
JH
2448 locinput += PL_utf8skip[nextchr];
2449 nextchr = UCHARAT(locinput);
2450 break;
2451 }
ffc61ed2 2452 if (OP(scan) == NALNUM
d6a28714 2453 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2454 sayNO;
76e3520e 2455 nextchr = UCHARAT(++locinput);
a0d0e21e 2456 break;
d6a28714
JH
2457 case BOUNDL:
2458 case NBOUNDL:
3280af22 2459 PL_reg_flags |= RF_tainted;
bbce6d69 2460 /* FALL THROUGH */
d6a28714
JH
2461 case BOUND:
2462 case NBOUND:
2463 /* was last char in word? */
ffc61ed2 2464 if (do_utf8) {
12d33761
HS
2465 if (locinput == PL_bostr)
2466 ln = '\n';
ffc61ed2
JH
2467 else {
2468 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2469
2b9d42f0 2470 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2471 }
2472 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2473 ln = isALNUM_uni(ln);
8269fa76 2474 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2475 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2476 }
2477 else {
9041c2e3 2478 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2479 n = isALNUM_LC_utf8((U8*)locinput);
2480 }
a0ed51b3 2481 }
d6a28714 2482 else {
12d33761
HS
2483 ln = (locinput != PL_bostr) ?
2484 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2485 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2486 ln = isALNUM(ln);
2487 n = isALNUM(nextchr);
2488 }
2489 else {
2490 ln = isALNUM_LC(ln);
2491 n = isALNUM_LC(nextchr);
2492 }
d6a28714 2493 }
ffc61ed2
JH
2494 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2495 OP(scan) == BOUNDL))
2496 sayNO;
a0ed51b3 2497 break;
d6a28714 2498 case SPACEL:
3280af22 2499 PL_reg_flags |= RF_tainted;
bbce6d69 2500 /* FALL THROUGH */
d6a28714 2501 case SPACE:
9442cb0e 2502 if (!nextchr)
4633a7c4 2503 sayNO;
1aa99e6b 2504 if (do_utf8) {
fd400ab9 2505 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2506 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2507 if (!(OP(scan) == SPACE
3568d838 2508 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2509 : isSPACE_LC_utf8((U8*)locinput)))
2510 {
2511 sayNO;
2512 }
2513 locinput += PL_utf8skip[nextchr];
2514 nextchr = UCHARAT(locinput);
2515 break;
d6a28714 2516 }
ffc61ed2
JH
2517 if (!(OP(scan) == SPACE
2518 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2519 sayNO;
2520 nextchr = UCHARAT(++locinput);
2521 }
2522 else {
2523 if (!(OP(scan) == SPACE
2524 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2525 sayNO;
2526 nextchr = UCHARAT(++locinput);
a0ed51b3 2527 }
a0ed51b3 2528 break;
d6a28714 2529 case NSPACEL:
3280af22 2530 PL_reg_flags |= RF_tainted;
bbce6d69 2531 /* FALL THROUGH */
d6a28714 2532 case NSPACE:
9442cb0e 2533 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2534 sayNO;
1aa99e6b 2535 if (do_utf8) {
8269fa76 2536 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2537 if (OP(scan) == NSPACE
3568d838 2538 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2539 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2540 {
2541 sayNO;
2542 }
2543 locinput += PL_utf8skip[nextchr];
2544 nextchr = UCHARAT(locinput);
2545 break;
a0ed51b3 2546 }
ffc61ed2 2547 if (OP(scan) == NSPACE
d6a28714 2548 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2549 sayNO;
76e3520e 2550 nextchr = UCHARAT(++locinput);
a0d0e21e 2551 break;
d6a28714 2552 case DIGITL:
a0ed51b3
LW
2553 PL_reg_flags |= RF_tainted;
2554 /* FALL THROUGH */
d6a28714 2555 case DIGIT:
9442cb0e 2556 if (!nextchr)
a0ed51b3 2557 sayNO;
1aa99e6b 2558 if (do_utf8) {
8269fa76 2559 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2560 if (!(OP(scan) == DIGIT
3568d838 2561 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2562 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2563 {
a0ed51b3 2564 sayNO;
dfe13c55 2565 }
6f06b55f 2566 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2567 nextchr = UCHARAT(locinput);
2568 break;
2569 }
ffc61ed2 2570 if (!(OP(scan) == DIGIT
9442cb0e 2571 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2572 sayNO;
2573 nextchr = UCHARAT(++locinput);
2574 break;
d6a28714 2575 case NDIGITL:
b8c5462f
JH
2576 PL_reg_flags |= RF_tainted;
2577 /* FALL THROUGH */
d6a28714 2578 case NDIGIT:
9442cb0e 2579 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2580 sayNO;
1aa99e6b 2581 if (do_utf8) {
8269fa76 2582 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2583 if (OP(scan) == NDIGIT
3568d838 2584 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2585 : isDIGIT_LC_utf8((U8*)locinput))
2586 {
a0ed51b3 2587 sayNO;
9442cb0e 2588 }
6f06b55f 2589 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2590 nextchr = UCHARAT(locinput);
2591 break;
2592 }
ffc61ed2 2593 if (OP(scan) == NDIGIT
9442cb0e 2594 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2595 sayNO;
2596 nextchr = UCHARAT(++locinput);
2597 break;
2598 case CLUMP:
b7c83a7e 2599 if (locinput >= PL_regeol)
a0ed51b3 2600 sayNO;
b7c83a7e
JH
2601 if (do_utf8) {
2602 LOAD_UTF8_CHARCLASS(mark,"~");
2603 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2604 sayNO;
2605 locinput += PL_utf8skip[nextchr];
2606 while (locinput < PL_regeol &&
2607 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2608 locinput += UTF8SKIP(locinput);
2609 if (locinput > PL_regeol)
2610 sayNO;
eb08e2da
JH
2611 }
2612 else
2613 locinput++;
a0ed51b3
LW
2614 nextchr = UCHARAT(locinput);
2615 break;
c8756f30 2616 case REFFL:
3280af22 2617 PL_reg_flags |= RF_tainted;
c8756f30 2618 /* FALL THROUGH */
c277df42 2619 case REF:
c8756f30 2620 case REFF:
c277df42 2621 n = ARG(scan); /* which paren pair */
cf93c79d 2622 ln = PL_regstartp[n];
2c2d71f5 2623 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2624 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2625 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2626 if (ln == PL_regendp[n])
a0d0e21e 2627 break;
a0ed51b3 2628
cf93c79d 2629 s = PL_bostr + ln;
1aa99e6b 2630 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2631 char *l = locinput;
cf93c79d 2632 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2633 /*
2634 * Note that we can't do the "other character" lookup trick as
2635 * in the 8-bit case (no pun intended) because in Unicode we
2636 * have to map both upper and title case to lower case.
2637 */
2638 if (OP(scan) == REFF) {
a2a2844f 2639 STRLEN ulen1, ulen2;
e7ae6809
JH
2640 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2641 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2642 while (s < e) {
2643 if (l >= PL_regeol)
2644 sayNO;
a2a2844f
JH
2645 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2646 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2647 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2648 sayNO;
a2a2844f
JH
2649 s += ulen1;
2650 l += ulen2;
a0ed51b3
LW
2651 }
2652 }
2653 locinput = l;
2654 nextchr = UCHARAT(locinput);
2655 break;
2656 }
2657
a0d0e21e 2658 /* Inline the first character, for speed. */
76e3520e 2659 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2660 (OP(scan) == REF ||
2661 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2662 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2663 sayNO;
cf93c79d 2664 ln = PL_regendp[n] - ln;
3280af22 2665 if (locinput + ln > PL_regeol)
4633a7c4 2666 sayNO;
c8756f30
AK
2667 if (ln > 1 && (OP(scan) == REF
2668 ? memNE(s, locinput, ln)
2669 : (OP(scan) == REFF
2670 ? ibcmp(s, locinput, ln)
2671 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2672 sayNO;
a0d0e21e 2673 locinput += ln;
76e3520e 2674 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2675 break;
2676
2677 case NOTHING:
c277df42 2678 case TAIL:
a0d0e21e
LW
2679 break;
2680 case BACK:
2681 break;
c277df42
IZ
2682 case EVAL:
2683 {
2684 dSP;
533c011a 2685 OP_4tree *oop = PL_op;
3280af22
NIS
2686 COP *ocurcop = PL_curcop;
2687 SV **ocurpad = PL_curpad;
c277df42 2688 SV *ret;
9041c2e3 2689
c277df42 2690 n = ARG(scan);
533c011a 2691 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2692 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2693 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2694 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2695
8e5e9ebe
RGS
2696 {
2697 SV **before = SP;
2698 CALLRUNOPS(aTHX); /* Scalar context. */
2699 SPAGAIN;
2700 if (SP == before)
2701 ret = Nullsv; /* protect against empty (?{}) blocks. */
2702 else {
2703 ret = POPs;
2704 PUTBACK;
2705 }
2706 }
2707
0f5d15d6
IZ
2708 PL_op = oop;
2709 PL_curpad = ocurpad;
2710 PL_curcop = ocurcop;
c277df42 2711 if (logical) {
0f5d15d6
IZ
2712 if (logical == 2) { /* Postponed subexpression. */
2713 regexp *re;
22c35a8c 2714 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2715 re_cc_state state;
0f5d15d6
IZ
2716 CHECKPOINT cp, lastcp;
2717
2718 if(SvROK(ret) || SvRMAGICAL(ret)) {
2719 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2720
2721 if(SvMAGICAL(sv))
14befaf4 2722 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2723 }
2724 if (mg) {
2725 re = (regexp *)mg->mg_obj;
df0003d4 2726 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2727 }
2728 else {
2729 STRLEN len;
2730 char *t = SvPV(ret, len);
2731 PMOP pm;
2732 char *oprecomp = PL_regprecomp;
2733 I32 osize = PL_regsize;
2734 I32 onpar = PL_regnpar;
2735
5fcd1c1b 2736 Zero(&pm, 1, PMOP);
cea2e8a9 2737 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2738 if (!(SvFLAGS(ret)
0f5d15d6 2739 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2740 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2741 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2742 PL_regprecomp = oprecomp;
2743 PL_regsize = osize;
2744 PL_regnpar = onpar;
2745 }
2746 DEBUG_r(
9041c2e3 2747 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2748 "Entering embedded `%s%.60s%s%s'\n",
2749 PL_colors[0],
2750 re->precomp,
2751 PL_colors[1],
2752 (strlen(re->precomp) > 60 ? "..." : ""))
2753 );
2754 state.node = next;
2755 state.prev = PL_reg_call_cc;
2756 state.cc = PL_regcc;
2757 state.re = PL_reg_re;
2758
2ab05381 2759 PL_regcc = 0;
9041c2e3 2760
0f5d15d6 2761 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2762 REGCP_SET(lastcp);
0f5d15d6
IZ
2763 cache_re(re);
2764 state.ss = PL_savestack_ix;
2765 *PL_reglastparen = 0;
a01268b5 2766 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2767 PL_reg_call_cc = &state;
2768 PL_reginput = locinput;
2c2d71f5
JH
2769
2770 /* XXXX This is too dramatic a measure... */
2771 PL_reg_maxiter = 0;
2772
0f5d15d6 2773 if (regmatch(re->program + 1)) {
2c914db6
IZ
2774 /* Even though we succeeded, we need to restore
2775 global variables, since we may be wrapped inside
2776 SUSPEND, thus the match may be not finished yet. */
2777
2778 /* XXXX Do this only if SUSPENDed? */
2779 PL_reg_call_cc = state.prev;
2780 PL_regcc = state.cc;
2781 PL_reg_re = state.re;
2782 cache_re(PL_reg_re);
2783
2784 /* XXXX This is too dramatic a measure... */
2785 PL_reg_maxiter = 0;
2786
2787 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2788 ReREFCNT_dec(re);
2789 regcpblow(cp);
2790 sayYES;
2791 }
0f5d15d6 2792 ReREFCNT_dec(re);
02db2b7b 2793 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2794 regcppop();
2795 PL_reg_call_cc = state.prev;
2796 PL_regcc = state.cc;
2797 PL_reg_re = state.re;
d3790889 2798 cache_re(PL_reg_re);
2c2d71f5
JH
2799
2800 /* XXXX This is too dramatic a measure... */
2801 PL_reg_maxiter = 0;
2802
8e514ae6 2803 logical = 0;
0f5d15d6
IZ
2804 sayNO;
2805 }
c277df42 2806 sw = SvTRUE(ret);
0f5d15d6 2807 logical = 0;
a0ed51b3
LW
2808 }
2809 else
3280af22 2810 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2811 break;
2812 }
a0d0e21e 2813 case OPEN:
c277df42 2814 n = ARG(scan); /* which paren pair */
3280af22
NIS
2815 PL_reg_start_tmp[n] = locinput;
2816 if (n > PL_regsize)
2817 PL_regsize = n;
a0d0e21e
LW
2818 break;
2819 case CLOSE:
c277df42 2820 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2821 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2822 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2823 if (n > *PL_reglastparen)
2824 *PL_reglastparen = n;
a01268b5 2825 *PL_reglastcloseparen = n;
a0d0e21e 2826 break;
c277df42
IZ
2827 case GROUPP:
2828 n = ARG(scan); /* which paren pair */
cf93c79d 2829 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2830 break;
2831 case IFTHEN:
2c2d71f5 2832 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2833 if (sw)
2834 next = NEXTOPER(NEXTOPER(scan));
2835 else {
2836 next = scan + ARG(scan);
2837 if (OP(next) == IFTHEN) /* Fake one. */
2838 next = NEXTOPER(NEXTOPER(next));
2839 }
2840 break;
2841 case LOGICAL:
0f5d15d6 2842 logical = scan->flags;
c277df42 2843 break;
2ab05381
IZ
2844/*******************************************************************
2845 PL_regcc contains infoblock about the innermost (...)* loop, and
2846 a pointer to the next outer infoblock.
2847
2848 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2849
2850 1) After matching X, regnode for CURLYX is processed;
2851
9041c2e3 2852 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2853 regmatch() recursively with the starting point at WHILEM node;
2854
2855 3) Each hit of WHILEM node tries to match A and Z (in the order
2856 depending on the current iteration, min/max of {min,max} and
2857 greediness). The information about where are nodes for "A"
2858 and "Z" is read from the infoblock, as is info on how many times "A"
2859 was already matched, and greediness.
2860
2861 4) After A matches, the same WHILEM node is hit again.
2862
2863 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2864 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2865 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2866 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2867 of the external loop.
2868
2869 Currently present infoblocks form a tree with a stem formed by PL_curcc
2870 and whatever it mentions via ->next, and additional attached trees
2871 corresponding to temporarily unset infoblocks as in "5" above.
2872
9041c2e3 2873 In the following picture infoblocks for outer loop of
2ab05381
IZ
2874 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2875 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2876 infoblocks are drawn below the "reset" infoblock.
2877
2878 In fact in the picture below we do not show failed matches for Z and T
2879 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2880 more obvious *why* one needs to *temporary* unset infoblocks.]
2881
2882 Matched REx position InfoBlocks Comment
2883 (Y(A)*?Z)*?T x
2884 Y(A)*?Z)*?T x <- O
2885 Y (A)*?Z)*?T x <- O
2886 Y A)*?Z)*?T x <- O <- I
2887 YA )*?Z)*?T x <- O <- I
2888 YA A)*?Z)*?T x <- O <- I
2889 YAA )*?Z)*?T x <- O <- I
2890 YAA Z)*?T x <- O # Temporary unset I
2891 I
2892
2893 YAAZ Y(A)*?Z)*?T x <- O
2894 I
2895
2896 YAAZY (A)*?Z)*?T x <- O
2897 I
2898
2899 YAAZY A)*?Z)*?T x <- O <- I
2900 I
2901
2902 YAAZYA )*?Z)*?T x <- O <- I
2903 I
2904
2905 YAAZYA Z)*?T x <- O # Temporary unset I
2906 I,I
2907
2908 YAAZYAZ )*?T x <- O
2909 I,I
2910
2911 YAAZYAZ T x # Temporary unset O
2912 O
2913 I,I
2914
2915 YAAZYAZT x
2916 O
2917 I,I
2918 *******************************************************************/
a0d0e21e
LW
2919 case CURLYX: {
2920 CURCUR cc;
3280af22 2921 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2922 /* No need to save/restore up to this paren */
2923 I32 parenfloor = scan->flags;
c277df42
IZ
2924
2925 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2926 next += ARG(next);
3280af22
NIS
2927 cc.oldcc = PL_regcc;
2928 PL_regcc = &cc;
cb434fcc
IZ
2929 /* XXXX Probably it is better to teach regpush to support
2930 parenfloor > PL_regsize... */
2931 if (parenfloor > *PL_reglastparen)
2932 parenfloor = *PL_reglastparen; /* Pessimization... */
2933 cc.parenfloor = parenfloor;
a0d0e21e
LW
2934 cc.cur = -1;
2935 cc.min = ARG1(scan);
2936 cc.max = ARG2(scan);
c277df42 2937 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2938 cc.next = next;
2939 cc.minmod = minmod;
2940 cc.lastloc = 0;
3280af22 2941 PL_reginput = locinput;
a0d0e21e
LW
2942 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2943 regcpblow(cp);
3280af22 2944 PL_regcc = cc.oldcc;
4633a7c4 2945 saySAME(n);
a0d0e21e
LW
2946 }
2947 /* NOT REACHED */
2948 case WHILEM: {
2949 /*
2950 * This is really hard to understand, because after we match
2951 * what we're trying to match, we must make sure the rest of
2c2d71f5 2952 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2953 * to go back UP the parse tree by recursing ever deeper. And
2954 * if it fails, we have to reset our parent's current state
2955 * that we can try again after backing off.
2956 */
2957
c277df42 2958 CHECKPOINT cp, lastcp;
3280af22 2959 CURCUR* cc = PL_regcc;
c277df42
IZ
2960 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2961
4633a7c4 2962 n = cc->cur + 1; /* how many we know we matched */
3280af22 2963 PL_reginput = locinput;
a0d0e21e 2964
c277df42 2965 DEBUG_r(
9041c2e3
NIS
2966 PerlIO_printf(Perl_debug_log,
2967 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2968 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2969 (long)n, (long)cc->min,
c277df42
IZ
2970 (long)cc->max, (long)cc)
2971 );
4633a7c4 2972
a0d0e21e
LW
2973 /* If degenerate scan matches "", assume scan done. */
2974
579cf2c3 2975 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2976 PL_regcc = cc->oldcc;
2ab05381
IZ
2977 if (PL_regcc)
2978 ln = PL_regcc->cur;
c277df42 2979 DEBUG_r(
c3464db5
DD
2980 PerlIO_printf(Perl_debug_log,
2981 "%*s empty match detected, try continuation...\n",
3280af22 2982 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2983 );
a0d0e21e 2984 if (regmatch(cc->next))
4633a7c4 2985 sayYES;
2ab05381
IZ
2986 if (PL_regcc)
2987 PL_regcc->cur = ln;
3280af22 2988 PL_regcc = cc;
4633a7c4 2989 sayNO;
a0d0e21e
LW
2990 }
2991
2992 /* First just match a string of min scans. */
2993
2994 if (n < cc->min) {
2995 cc->cur = n;
2996 cc->lastloc = locinput;
4633a7c4
LW
2997 if (regmatch(cc->scan))
2998 sayYES;
2999 cc->cur = n - 1;
c277df42 3000 cc->lastloc = lastloc;
4633a7c4 3001 sayNO;
a0d0e21e
LW
3002 }
3003
2c2d71f5
JH
3004 if (scan->flags) {
3005 /* Check whether we already were at this position.
3006 Postpone detection until we know the match is not
3007 *that* much linear. */
3008 if (!PL_reg_maxiter) {
3009 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3010 PL_reg_leftiter = PL_reg_maxiter;
3011 }
3012 if (PL_reg_leftiter-- == 0) {
3013 I32 size = (PL_reg_maxiter + 7)/8;
3014 if (PL_reg_poscache) {
3015 if (PL_reg_poscache_size < size) {
3016 Renew(PL_reg_poscache, size, char);
3017 PL_reg_poscache_size = size;
3018 }
3019 Zero(PL_reg_poscache, size, char);
3020 }
3021 else {
3022 PL_reg_poscache_size = size;
3023 Newz(29, PL_reg_poscache, size, char);
3024 }
3025 DEBUG_r(
3026 PerlIO_printf(Perl_debug_log,
3027 "%sDetected a super-linear match, switching on caching%s...\n",
3028 PL_colors[4], PL_colors[5])
3029 );
3030 }
3031 if (PL_reg_leftiter < 0) {
3032 I32 o = locinput - PL_bostr, b;
3033
3034 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3035 b = o % 8;
3036 o /= 8;
3037 if (PL_reg_poscache[o] & (1<<b)) {
3038 DEBUG_r(
3039 PerlIO_printf(Perl_debug_log,
3040 "%*s already tried at this position...\n",
3041 REPORT_CODE_OFF+PL_regindent*2, "")
3042 );
7821416a 3043 sayNO_SILENT;
2c2d71f5
JH
3044 }
3045 PL_reg_poscache[o] |= (1<<b);
3046 }
3047 }
3048
a0d0e21e
LW
3049 /* Prefer next over scan for minimal matching. */
3050
3051 if (cc->minmod) {
3280af22 3052 PL_regcc = cc->oldcc;
2ab05381
IZ
3053 if (PL_regcc)
3054 ln = PL_regcc->cur;
5f05dabc 3055 cp = regcppush(cc->parenfloor);
02db2b7b 3056 REGCP_SET(lastcp);
5f05dabc 3057 if (regmatch(cc->next)) {
c277df42 3058 regcpblow(cp);
4633a7c4 3059 sayYES; /* All done. */
5f05dabc 3060 }
02db2b7b 3061 REGCP_UNWIND(lastcp);
5f05dabc 3062 regcppop();
2ab05381
IZ
3063 if (PL_regcc)
3064 PL_regcc->cur = ln;
3280af22 3065 PL_regcc = cc;
a0d0e21e 3066
c277df42 3067 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3068 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3069 && !(PL_reg_flags & RF_warned)) {
3070 PL_reg_flags |= RF_warned;
e476b1b5 3071 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3072 "Complex regular subexpression recursion",
3073 REG_INFTY - 1);
c277df42 3074 }
4633a7c4 3075 sayNO;
c277df42 3076 }
a687059c 3077
c277df42 3078 DEBUG_r(
c3464db5
DD
3079 PerlIO_printf(Perl_debug_log,
3080 "%*s trying longer...\n",
3280af22 3081 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3082 );
a0d0e21e 3083 /* Try scanning more and see if it helps. */
3280af22 3084 PL_reginput = locinput;
a0d0e21e
LW
3085 cc->cur = n;
3086 cc->lastloc = locinput;
5f05dabc 3087 cp = regcppush(cc->parenfloor);
02db2b7b 3088 REGCP_SET(lastcp);
5f05dabc 3089 if (regmatch(cc->scan)) {
c277df42 3090 regcpblow(cp);
4633a7c4 3091 sayYES;
5f05dabc 3092 }
02db2b7b 3093 REGCP_UNWIND(lastcp);
5f05dabc 3094 regcppop();
4633a7c4 3095 cc->cur = n - 1;
c277df42 3096 cc->lastloc = lastloc;
4633a7c4 3097 sayNO;
a0d0e21e
LW
3098 }
3099
3100 /* Prefer scan over next for maximal matching. */
3101
3102 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3103 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3104 cc->cur = n;
3105 cc->lastloc = locinput;
02db2b7b 3106 REGCP_SET(lastcp);
5f05dabc 3107 if (regmatch(cc->scan)) {
c277df42 3108 regcpblow(cp);
4633a7c4 3109 sayYES;
5f05dabc 3110 }
02db2b7b 3111 REGCP_UNWIND(lastcp);
a0d0e21e 3112 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3113 PL_reginput = locinput;
c277df42 3114 DEBUG_r(
c3464db5
DD
3115 PerlIO_printf(Perl_debug_log,
3116 "%*s failed, try continuation...\n",
3280af22 3117 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3118 );
3119 }
9041c2e3 3120 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3121 && !(PL_reg_flags & RF_warned)) {
3280af22 3122 PL_reg_flags |= RF_warned;
e476b1b5 3123 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3124 "Complex regular subexpression recursion",
3125 REG_INFTY - 1);
a0d0e21e
LW
3126 }
3127
3128 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3129 PL_regcc = cc->oldcc;
2ab05381
IZ
3130 if (PL_regcc)
3131 ln = PL_regcc->cur;
a0d0e21e 3132 if (regmatch(cc->next))
4633a7c4 3133 sayYES;
2ab05381
IZ
3134 if (PL_regcc)
3135 PL_regcc->cur = ln;
3280af22 3136 PL_regcc = cc;
4633a7c4 3137 cc->cur = n - 1;
c277df42 3138 cc->lastloc = lastloc;
4633a7c4 3139 sayNO;
a0d0e21e
LW
3140 }
3141 /* NOT REACHED */
9041c2e3 3142 case BRANCHJ:
c277df42
IZ
3143 next = scan + ARG(scan);
3144 if (next == scan)
3145 next = NULL;
3146 inner = NEXTOPER(NEXTOPER(scan));
3147 goto do_branch;
9041c2e3 3148 case BRANCH:
c277df42
IZ
3149 inner = NEXTOPER(scan);
3150 do_branch:
3151 {
c277df42
IZ
3152 c1 = OP(scan);
3153 if (OP(next) != c1) /* No choice. */
3154 next = inner; /* Avoid recursion. */
a0d0e21e 3155 else {
02db2b7b
IZ
3156 I32 lastparen = *PL_reglastparen;
3157 I32 unwind1;
3158 re_unwind_branch_t *uw;
3159
3160 /* Put unwinding data on stack */
3161 unwind1 = SSNEWt(1,re_unwind_branch_t);
3162 uw = SSPTRt(unwind1,re_unwind_branch_t);
3163 uw->prev = unwind;
3164 unwind = unwind1;
3165 uw->type = ((c1 == BRANCH)
3166 ? RE_UNWIND_BRANCH
3167 : RE_UNWIND_BRANCHJ);
3168 uw->lastparen = lastparen;
3169 uw->next = next;
3170 uw->locinput = locinput;
3171 uw->nextchr = nextchr;
3172#ifdef DEBUGGING
3173 uw->regindent = ++PL_regindent;
3174#endif
c277df42 3175
02db2b7b
IZ
3176 REGCP_SET(uw->lastcp);
3177
3178 /* Now go into the first branch */
3179 next = inner;
a687059c 3180 }
a0d0e21e
LW
3181 }
3182 break;
3183 case MINMOD:
3184 minmod = 1;
3185 break;
c277df42
IZ
3186 case CURLYM:
3187 {
00db4c45 3188 I32 l = 0;
c277df42 3189 CHECKPOINT lastcp;
9041c2e3 3190
c277df42
IZ
3191 /* We suppose that the next guy does not need
3192 backtracking: in particular, it is of constant length,
3193 and has no parenths to influence future backrefs. */
3194 ln = ARG1(scan); /* min to match */
3195 n = ARG2(scan); /* max to match */
c277df42
IZ
3196 paren = scan->flags;
3197 if (paren) {
3280af22
NIS
3198 if (paren > PL_regsize)
3199 PL_regsize = paren;
3200 if (paren > *PL_reglastparen)
3201 *PL_reglastparen = paren;
c277df42 3202 }
dc45a647 3203 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3204 if (paren)
3205 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3206 PL_reginput = locinput;
c277df42
IZ
3207 if (minmod) {
3208 minmod = 0;
3209 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3210 sayNO;
f31a99c8
HS
3211 /* if we matched something zero-length we don't need to
3212 backtrack - capturing parens are already defined, so
3213 the caveat in the maximal case doesn't apply
3214
3215 XXXX if ln == 0, we can redo this check first time
3216 through the following loop
3217 */
3218 if (ln && l == 0)
3219 n = ln; /* don't backtrack */
3280af22 3220 locinput = PL_reginput;
cca55fe3 3221 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3222 regnode *text_node = next;
3223
cca55fe3 3224 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3225
cca55fe3 3226 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3227 else {
cca55fe3
JP
3228 if (PL_regkind[(U8)OP(text_node)] == REF) {
3229 I32 n, ln;
3230 n = ARG(text_node); /* which paren pair */
3231 ln = PL_regstartp[n];
3232 /* assume yes if we haven't seen CLOSEn */
3233 if (
3234 *PL_reglastparen < n ||
3235 ln == -1 ||
3236 ln == PL_regendp[n]
3237 ) {
3238 c1 = c2 = -1000;
3239 goto assume_ok_MM;
3240 }
3241 c1 = *(PL_bostr + ln);
3242 }
3243 else { c1 = (U8)*STRING(text_node); }
af5decee 3244 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3245 c2 = PL_fold[c1];
af5decee 3246 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3247 c2 = PL_fold_locale[c1];
3248 else
3249 c2 = c1;
3250 }
a0ed51b3
LW
3251 }
3252 else
c277df42 3253 c1 = c2 = -1000;
cca55fe3 3254 assume_ok_MM:
02db2b7b 3255 REGCP_SET(lastcp);
5f4b28b2 3256 /* This may be improved if l == 0. */
c277df42
IZ
3257 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3258 /* If it could work, try it. */
3259 if (c1 == -1000 ||
3280af22
NIS
3260 UCHARAT(PL_reginput) == c1 ||
3261 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3262 {
3263 if (paren) {
f31a99c8 3264 if (ln) {
cf93c79d
IZ
3265 PL_regstartp[paren] =
3266 HOPc(PL_reginput, -l) - PL_bostr;
3267 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3268 }
3269 else
cf93c79d 3270 PL_regendp[paren] = -1;
c277df42
IZ
3271 }
3272 if (regmatch(next))
3273 sayYES;
02db2b7b 3274 REGCP_UNWIND(lastcp);
c277df42
IZ
3275 }
3276 /* Couldn't or didn't -- move forward. */
3280af22 3277 PL_reginput = locinput;
c277df42
IZ
3278 if (regrepeat_hard(scan, 1, &l)) {
3279 ln++;
3280af22 3280 locinput = PL_reginput;
c277df42
IZ
3281 }
3282 else
3283 sayNO;
3284 }
a0ed51b3
LW
3285 }
3286 else {
c277df42 3287 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3288 /* if we matched something zero-length we don't need to
3289 backtrack, unless the minimum count is zero and we
3290 are capturing the result - in that case the capture
3291 being defined or not may affect later execution
3292 */
3293 if (n != 0 && l == 0 && !(paren && ln == 0))
3294 ln = n; /* don't backtrack */
3280af22 3295 locinput = PL_reginput;
c277df42 3296 DEBUG_r(
5c0ca799 3297 PerlIO_printf(Perl_debug_log,
faccc32b 3298 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3299 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3300 (IV) n, (IV)l)
c277df42
IZ
3301 );
3302 if (n >= ln) {
cca55fe3 3303 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3304 regnode *text_node = next;
3305
cca55fe3 3306 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3307
cca55fe3 3308 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3309 else {
cca55fe3
JP
3310 if (PL_regkind[(U8)OP(text_node)] == REF) {
3311 I32 n, ln;
3312 n = ARG(text_node); /* which paren pair */
3313 ln = PL_regstartp[n];
3314 /* assume yes if we haven't seen CLOSEn */
3315 if (
3316 *PL_reglastparen < n ||
3317 ln == -1 ||
3318 ln == PL_regendp[n]
3319 ) {
3320 c1 = c2 = -1000;
3321 goto assume_ok_REG;
3322 }
3323 c1 = *(PL_bostr + ln);
3324 }
3325 else { c1 = (U8)*STRING(text_node); }
3326
af5decee 3327 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3328 c2 = PL_fold[c1];
af5decee 3329 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3330 c2 = PL_fold_locale[c1];
3331 else
3332 c2 = c1;
3333 }
a0ed51b3
LW
3334 }
3335 else
c277df42
IZ
3336 c1 = c2 = -1000;
3337 }
cca55fe3 3338 assume_ok_REG:
02db2b7b 3339 REGCP_SET(lastcp);
c277df42
IZ
3340 while (n >= ln) {
3341 /* If it could work, try it. */
3342 if (c1 == -1000 ||
3280af22
NIS
3343 UCHARAT(PL_reginput) == c1 ||
3344 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3345 {
3346 DEBUG_r(
c3464db5 3347 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3348 "%*s trying tail with n=%"IVdf"...\n",
3349 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3350 );
3351 if (paren) {
3352 if (n) {
cf93c79d
IZ
3353 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3354 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3355 }
a0ed51b3 3356 else
cf93c79d 3357 PL_regendp[paren] = -1;
c277df42 3358 }
a0ed51b3
LW
3359 if (regmatch(next))
3360 sayYES;
02db2b7b 3361 REGCP_UNWIND(lastcp);
a0ed51b3 3362 }
c277df42
IZ
3363 /* Couldn't or didn't -- back up. */
3364 n--;
dfe13c55 3365 locinput = HOPc(locinput, -l);
3280af22 3366 PL_reginput = locinput;
c277df42
IZ
3367 }
3368 }
3369 sayNO;
3370 break;
3371 }
3372 case CURLYN:
3373 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3374 if (paren > PL_regsize)
3375 PL_regsize = paren;
3376 if (paren > *PL_reglastparen)
3377 *PL_reglastparen = paren;
c277df42
IZ
3378 ln = ARG1(scan); /* min to match */
3379 n = ARG2(scan); /* max to match */
dc45a647 3380 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3381 goto repeat;
a0d0e21e 3382 case CURLY:
c277df42 3383 paren = 0;
a0d0e21e
LW
3384 ln = ARG1(scan); /* min to match */
3385 n = ARG2(scan); /* max to match */
dc45a647 3386 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3387 goto repeat;
3388 case STAR:
3389 ln = 0;
c277df42 3390 n = REG_INFTY;
a0d0e21e 3391 scan = NEXTOPER(scan);
c277df42 3392 paren = 0;
a0d0e21e
LW
3393 goto repeat;
3394 case PLUS:
c277df42
IZ
3395 ln = 1;
3396 n = REG_INFTY;
3397 scan = NEXTOPER(scan);
3398 paren = 0;
3399 repeat:
a0d0e21e
LW
3400 /*
3401 * Lookahead to avoid useless match attempts
3402 * when we know what character comes next.
3403 */
5f80c4cf
JP
3404
3405 /*
3406 * Used to only do .*x and .*?x, but now it allows
3407 * for )'s, ('s and (?{ ... })'s to be in the way
3408 * of the quantifier and the EXACT-like node. -- japhy
3409 */
3410
cca55fe3 3411 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3412 U8 *s;
3413 regnode *text_node = next;
3414
cca55fe3 3415 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3416
cca55fe3 3417 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3418 else {
cca55fe3
JP
3419 if (PL_regkind[(U8)OP(text_node)] == REF) {
3420 I32 n, ln;
3421 n = ARG(text_node); /* which paren pair */
3422 ln = PL_regstartp[n];
3423 /* assume yes if we haven't seen CLOSEn */
3424 if (
3425 *PL_reglastparen < n ||
3426 ln == -1 ||
3427 ln == PL_regendp[n]
3428 ) {
3429 c1 = c2 = -1000;
3430 goto assume_ok_easy;
3431 }
9246c65e 3432 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3433 }
3434 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3435
3436 if (!UTF) {
3437 c2 = c1 = *s;
f65d3ee7 3438 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3439 c2 = PL_fold[c1];
f65d3ee7 3440 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3441 c2 = PL_fold_locale[c1];
1aa99e6b 3442 }
5f80c4cf 3443 else { /* UTF */
f65d3ee7 3444 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3445 STRLEN ulen1, ulen2;
e7ae6809
JH
3446 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3447 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3448
3449 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3450 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3451
3452 c1 = utf8_to_uvuni(tmpbuf1, 0);
3453 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3454 }
3455 else {
3456 c2 = c1 = utf8_to_uvchr(s, NULL);
3457 }
1aa99e6b
IH
3458 }
3459 }
bbce6d69 3460 }
a0d0e21e 3461 else
bbce6d69 3462 c1 = c2 = -1000;
cca55fe3 3463 assume_ok_easy:
3280af22 3464 PL_reginput = locinput;
a0d0e21e 3465 if (minmod) {
c277df42 3466 CHECKPOINT lastcp;
a0d0e21e
LW
3467 minmod = 0;
3468 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3469 sayNO;
a0ed51b3 3470 locinput = PL_reginput;
02db2b7b 3471 REGCP_SET(lastcp);
0fe9bf95 3472 if (c1 != -1000) {
1aa99e6b 3473 char *e; /* Should not check after this */
0fe9bf95
IZ
3474 char *old = locinput;
3475
1aa99e6b 3476 if (n == REG_INFTY) {
0fe9bf95 3477 e = PL_regeol - 1;
1aa99e6b
IH
3478 if (do_utf8)
3479 while (UTF8_IS_CONTINUATION(*(U8*)e))
3480 e--;
3481 }
3482 else if (do_utf8) {
3483 int m = n - ln;
3484 for (e = locinput;
3485 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3486 e += UTF8SKIP(e);
3487 }
3488 else {
3489 e = locinput + n - ln;
3490 if (e >= PL_regeol)
3491 e = PL_regeol - 1;
3492 }
0fe9bf95 3493 while (1) {
1aa99e6b 3494 int count;
0fe9bf95 3495 /* Find place 'next' could work */
1aa99e6b
IH
3496 if (!do_utf8) {
3497 if (c1 == c2) {
a8e8ab15
JH
3498 while (locinput <= e &&
3499 UCHARAT(locinput) != c1)
1aa99e6b
IH
3500 locinput++;
3501 } else {
9041c2e3 3502 while (locinput <= e
a8e8ab15
JH
3503 && UCHARAT(locinput) != c1
3504 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3505 locinput++;
3506 }
3507 count = locinput - old;
3508 }
3509 else {
3510 STRLEN len;
3511 if (c1 == c2) {
3512 for (count = 0;
3513 locinput <= e &&
9041c2e3 3514 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3515 count++)
3516 locinput += len;
3517
3518 } else {
3519 for (count = 0; locinput <= e; count++) {
9041c2e3 3520 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3521 if (c == c1 || c == c2)
3522 break;
9041c2e3 3523 locinput += len;
1aa99e6b
IH
3524 }
3525 }
0fe9bf95 3526 }
9041c2e3 3527 if (locinput > e)
0fe9bf95
IZ
3528 sayNO;
3529 /* PL_reginput == old now */
3530 if (locinput != old) {
3531 ln = 1; /* Did some */
1aa99e6b 3532 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3533 sayNO;
3534 }
3535 /* PL_reginput == locinput now */
29d1e993 3536 TRYPAREN(paren, ln, locinput);
0fe9bf95 3537 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3538 REGCP_UNWIND(lastcp);
0fe9bf95 3539 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3540 old = locinput;
3541 if (do_utf8)
3542 locinput += UTF8SKIP(locinput);
3543 else
3544 locinput++;
0fe9bf95
IZ
3545 }
3546 }
3547 else
c277df42 3548 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3549 UV c;
3550 if (c1 != -1000) {
3551 if (do_utf8)
9041c2e3 3552 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3553 else
9041c2e3 3554 c = UCHARAT(PL_reginput);
2390ecbc
PP
3555 /* If it could work, try it. */
3556 if (c == c1 || c == c2)
3557 {
3558 TRYPAREN(paren, n, PL_reginput);
3559 REGCP_UNWIND(lastcp);
3560 }
1aa99e6b 3561 }
a0d0e21e 3562 /* If it could work, try it. */
2390ecbc 3563 else if (c1 == -1000)
bbce6d69 3564 {
29d1e993 3565 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3566 REGCP_UNWIND(lastcp);
bbce6d69 3567 }
c277df42 3568 /* Couldn't or didn't -- move forward. */
a0ed51b3 3569 PL_reginput = locinput;
a0d0e21e
LW
3570 if (regrepeat(scan, 1)) {
3571 ln++;
a0ed51b3
LW
3572 locinput = PL_reginput;
3573 }
3574 else
4633a7c4 3575 sayNO;
a0d0e21e
LW
3576 }
3577 }
3578 else {
c277df42 3579 CHECKPOINT lastcp;
a0d0e21e 3580 n = regrepeat(scan, n);
a0ed51b3 3581 locinput = PL_reginput;
22c35a8c 3582 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3583 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3584 ln = n; /* why back off? */
1aeab75a
GS
3585 /* ...because $ and \Z can match before *and* after
3586 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3587 We should back off by one in this case. */
3588 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3589 ln--;
3590 }
02db2b7b 3591 REGCP_SET(lastcp);
c277df42 3592 if (paren) {
8fa7f367 3593 UV c = 0;
c277df42 3594 while (n >= ln) {
1aa99e6b
IH
3595 if (c1 != -1000) {
3596 if (do_utf8)
9041c2e3 3597 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3598 else
9041c2e3 3599 c = UCHARAT(PL_reginput);
1aa99e6b 3600 }
c277df42 3601 /* If it could work, try it. */
1aa99e6b 3602 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3603 {
29d1e993 3604 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3605 REGCP_UNWIND(lastcp);
c277df42
IZ
3606 }
3607 /* Couldn't or didn't -- back up. */
3608 n--;
dfe13c55 3609 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3610 }
a0ed51b3
LW
3611 }
3612 else {
8fa7f367 3613 UV c = 0;
c277df42 3614 while (n >= ln) {
1aa99e6b
IH
3615 if (c1 != -1000) {
3616 if (do_utf8)
9041c2e3 3617 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3618 else
9041c2e3 3619 c = UCHARAT(PL_reginput);
1aa99e6b 3620 }
c277df42 3621 /* If it could work, try it. */
1aa99e6b 3622 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3623 {
29d1e993 3624 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3625 REGCP_UNWIND(lastcp);
c277df42
IZ
3626 }
3627 /* Couldn't or didn't -- back up. */
3628 n--;
dfe13c55 3629 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3630 }
a0d0e21e
LW
3631 }
3632 }
4633a7c4 3633 sayNO;
c277df42 3634 break;
a0d0e21e 3635 case END:
0f5d15d6
IZ
3636 if (PL_reg_call_cc) {
3637 re_cc_state *cur_call_cc = PL_reg_call_cc;
3638 CURCUR *cctmp = PL_regcc;
3639 regexp *re = PL_reg_re;
3640 CHECKPOINT cp, lastcp;
3641
3642 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3643 REGCP_SET(lastcp);
0f5d15d6
IZ
3644 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3645 the caller. */
3646 PL_reginput = locinput; /* Make position available to
3647 the callcc. */
3648 cache_re(PL_reg_call_cc->re);
3649 PL_regcc = PL_reg_call_cc->cc;
3650 PL_reg_call_cc = PL_reg_call_cc->prev;
3651 if (regmatch(cur_call_cc->node)) {
3652 PL_reg_call_cc = cur_call_cc;
3653 regcpblow(cp);
3654 sayYES;
3655 }
02db2b7b 3656 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3657 regcppop();
3658 PL_reg_call_cc = cur_call_cc;
3659 PL_regcc = cctmp;
3660 PL_reg_re = re;
3661 cache_re(re);
3662
3663 DEBUG_r(
3664 PerlIO_printf(Perl_debug_log,
3665 "%*s continuation failed...\n",
3666 REPORT_CODE_OFF+PL_regindent*2, "")
3667 );
7821416a 3668 sayNO_SILENT;
0f5d15d6 3669 }
7821416a
IZ
3670 if (locinput < PL_regtill) {
3671 DEBUG_r(PerlIO_printf(Perl_debug_log,
3672 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3673 PL_colors[4],
3674 (long)(locinput - PL_reg_starttry),
3675 (long)(PL_regtill - PL_reg_starttry),
3676 PL_colors[5]));
3677 sayNO_FINAL; /* Cannot match: too short. */
3678 }
3679 PL_reginput = locinput; /* put where regtry can find it */
3680 sayYES_FINAL; /* Success! */
7e5428c5 3681 case SUCCEED:
3280af22 3682 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3683 sayYES_LOUD; /* Success! */
c277df42
IZ
3684 case SUSPEND:
3685 n = 1;
9fe1d20c 3686 PL_reginput = locinput;
9041c2e3 3687 goto do_ifmatch;
a0d0e21e 3688 case UNLESSM:
c277df42 3689 n = 0;
a0ed51b3 3690 if (scan->flags) {
efb30f32
HS
3691 s = HOPBACKc(locinput, scan->flags);
3692 if (!s)
3693 goto say_yes;
3694 PL_reginput = s;
a0ed51b3
LW
3695 }
3696 else
3697 PL_reginput = locinput;
c277df42
IZ
3698 goto do_ifmatch;
3699 case IFMATCH:
3700 n = 1;
a0ed51b3 3701 if (scan->flags) {
efb30f32
HS
3702 s = HOPBACKc(locinput, scan->flags);
3703 if (!s)
3704 goto say_no;
3705 PL_reginput = s;
a0ed51b3
LW
3706 }
3707 else
3708 PL_reginput = locinput;
3709
c277df42 3710 do_ifmatch:
c277df42
IZ
3711 inner = NEXTOPER(NEXTOPER(scan));
3712 if (regmatch(inner) != n) {
3713 say_no:
3714 if (logical) {
3715 logical = 0;
3716 sw = 0;
3717 goto do_longjump;
a0ed51b3
LW
3718 }
3719 else
c277df42
IZ
3720 sayNO;
3721 }
3722 say_yes:
3723 if (logical) {
3724 logical = 0;
3725 sw = 1;
3726 }
fe44a5e8 3727 if (OP(scan) == SUSPEND) {
3280af22 3728 locinput = PL_reginput;
565764a8 3729 nextchr = UCHARAT(locinput);
fe44a5e8 3730 }
c277df42
IZ
3731 /* FALL THROUGH. */
3732 case LONGJMP:
3733 do_longjump:
3734 next = scan + ARG(scan);
3735 if (next == scan)
3736 next = NULL;
a0d0e21e
LW
3737 break;
3738 default:
b900a521 3739 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3740 PTR2UV(scan), OP(scan));
cea2e8a9 3741 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3742 }
02db2b7b 3743 reenter:
a0d0e21e
LW
3744 scan = next;
3745 }
a687059c 3746
a0d0e21e
LW
3747 /*
3748 * We get here only if there's trouble -- normally "case END" is
3749 * the terminating point.
3750 */
cea2e8a9 3751 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3752 /*NOTREACHED*/
4633a7c4
LW
3753 sayNO;
3754
7821416a
IZ
3755yes_loud:
3756 DEBUG_r(
3757 PerlIO_printf(Perl_debug_log,
3758 "%*s %scould match...%s\n",
3759 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3760 );
3761 goto yes;
3762yes_final:
3763 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3764 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3765yes:
3766#ifdef DEBUGGING
3280af22 3767 PL_regindent--;
4633a7c4 3768#endif
02db2b7b
IZ
3769
3770#if 0 /* Breaks $^R */
3771 if (unwind)
3772 regcpblow(firstcp);
3773#endif
4633a7c4
LW
3774 return 1;
3775
3776no:
7821416a
IZ
3777 DEBUG_r(
3778 PerlIO_printf(Perl_debug_log,
3779 "%*s %sfailed...%s\n",
3780 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3781 );
3782 goto do_no;
3783no_final:
3784do_no:
02db2b7b
IZ
3785 if (unwind) {
3786 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3787
3788 switch (uw->type) {
3789 case RE_UNWIND_BRANCH:
3790 case RE_UNWIND_BRANCHJ:
3791 {
3792 re_unwind_branch_t *uwb = &(uw->branch);
3793 I32 lastparen = uwb->lastparen;
9041c2e3 3794
02db2b7b
IZ
3795 REGCP_UNWIND(uwb->lastcp);
3796 for (n = *PL_reglastparen; n > lastparen; n--)
3797 PL_regendp[n] = -1;
3798 *PL_reglastparen = n;
3799 scan = next = uwb->next;
9041c2e3
NIS
3800 if ( !scan ||
3801 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3802 ? BRANCH : BRANCHJ) ) { /* Failure */
3803 unwind = uwb->prev;
3804#ifdef DEBUGGING
3805 PL_regindent--;
3806#endif
3807 goto do_no;
3808 }
3809 /* Have more choice yet. Reuse the same uwb. */
3810 /*SUPPRESS 560*/
3811 if ((n = (uwb->type == RE_UNWIND_BRANCH
3812 ? NEXT_OFF(next) : ARG(next))))
3813 next += n;
3814 else
3815 next = NULL; /* XXXX Needn't unwinding in this case... */
3816 uwb->next = next;
3817 next = NEXTOPER(scan);
3818 if (uwb->type == RE_UNWIND_BRANCHJ)
3819 next = NEXTOPER(next);
3820 locinput = uwb->locinput;
3821 nextchr = uwb->nextchr;
3822#ifdef DEBUGGING
3823 PL_regindent = uwb->regindent;
3824#endif
3825
3826 goto reenter;
3827 }
3828 /* NOT REACHED */
3829 default:
3830 Perl_croak(aTHX_ "regexp unwind memory corruption");
3831 }
3832 /* NOT REACHED */
3833 }
4633a7c4 3834#ifdef DEBUGGING
3280af22 3835 PL_regindent--;
4633a7c4 3836#endif
a0d0e21e 3837 return 0;
a687059c
LW
3838}
3839
3840/*
3841 - regrepeat - repeatedly match something simple, report how many
3842 */
3843/*
3844 * [This routine now assumes that it will only match on things of length 1.
3845 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3846 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3847 */
76e3520e 3848STATIC I32
cea2e8a9 3849S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3850{
a0d0e21e 3851 register char *scan;
a0d0e21e 3852 register I32 c;
3280af22 3853 register char *loceol = PL_regeol;
a0ed51b3 3854 register I32 hardcount = 0;
53c4c00c 3855 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3856
3280af22 3857 scan = PL_reginput;
c277df42 3858 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3859 loceol = scan + max;
a0d0e21e 3860 switch (OP(p)) {
22c35a8c 3861 case REG_ANY:
1aa99e6b 3862 if (do_utf8) {
ffc61ed2 3863 loceol = PL_regeol;
1aa99e6b 3864 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3865 scan += UTF8SKIP(scan);
3866 hardcount++;
3867 }
3868 } else {
3869 while (scan < loceol && *scan != '\n')
3870 scan++;
a0ed51b3
LW
3871 }
3872 break;
ffc61ed2 3873 case SANY:
3baa4c62 3874 scan = loceol;
a0ed51b3 3875 break;
f33976b4
DB
3876 case CANY:
3877 scan = loceol;
3878 break;
bbce6d69 3879 case EXACT: /* length of string is 1 */
cd439c50 3880 c = (U8)*STRING(p);
bbce6d69 3881 while (scan < loceol && UCHARAT(scan) == c)
3882 scan++;
3883 break;
3884 case EXACTF: /* length of string is 1 */
cd439c50 3885 c = (U8)*STRING(p);
bbce6d69 3886 while (scan < loceol &&
22c35a8c 3887 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3888 scan++;
3889 break;
3890 case EXACTFL: /* length of string is 1 */
3280af22 3891 PL_reg_flags |= RF_tainted;
cd439c50 3892 c = (U8)*STRING(p);
bbce6d69 3893 while (scan < loceol &&
22c35a8c 3894 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3895 scan++;
3896 break;
3897 case ANYOF:
ffc61ed2
JH
3898 if (do_utf8) {
3899 loceol = PL_regeol;
1aa99e6b
IH
3900 while (hardcount < max && scan < loceol &&
3901 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3902 scan += UTF8SKIP(scan);
3903 hardcount++;
3904 }
3905 } else {
3906 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3907 scan++;
3908 }
a0d0e21e
LW
3909 break;
3910 case ALNUM:
1aa99e6b 3911 if (do_utf8) {
ffc61ed2 3912 loceol = PL_regeol;
8269fa76 3913 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3914 while (hardcount < max && scan < loceol &&
3568d838 3915 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3916 scan += UTF8SKIP(scan);
3917 hardcount++;
3918 }
3919 } else {
3920 while (scan < loceol && isALNUM(*scan))
3921 scan++;
a0ed51b3
LW
3922 }
3923 break;
bbce6d69 3924 case ALNUML:
3280af22 3925 PL_reg_flags |= RF_tainted;
1aa99e6b 3926 if (do_utf8) {
ffc61ed2 3927 loceol = PL_regeol;
1aa99e6b
IH
3928 while (hardcount < max && scan < loceol &&
3929 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3930 scan += UTF8SKIP(scan);
3931 hardcount++;
3932 }
3933 } else {
3934 while (scan < loceol && isALNUM_LC(*scan))
3935 scan++;
a0ed51b3
LW
3936 }
3937 break;
a0d0e21e 3938 case NALNUM:
1aa99e6b 3939 if (do_utf8) {
ffc61ed2 3940 loceol = PL_regeol;
8269fa76 3941 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3942 while (hardcount < max && scan < loceol &&
3568d838 3943 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3944 scan += UTF8SKIP(scan);
3945 hardcount++;
3946 }
3947 } else {
3948 while (scan < loceol && !isALNUM(*scan))
3949 scan++;
a0ed51b3
LW
3950 }
3951 break;
bbce6d69 3952 case NALNUML:
3280af22 3953 PL_reg_flags |= RF_tainted;
1aa99e6b 3954 if (do_utf8) {
ffc61ed2 3955 loceol = PL_regeol;
1aa99e6b
IH
3956 while (hardcount < max && scan < loceol &&
3957 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3958 scan += UTF8SKIP(scan);
3959 hardcount++;
3960 }
3961 } else {
3962 while (scan < loceol && !isALNUM_LC(*scan))
3963 scan++;
a0ed51b3
LW
3964 }
3965 break;
a0d0e21e 3966 case SPACE:
1aa99e6b 3967 if (do_utf8) {
ffc61ed2 3968 loceol = PL_regeol;
8269fa76 3969 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3970 while (hardcount < max && scan < loceol &&
3568d838
JH
3971 (*scan == ' ' ||
3972 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3973 scan += UTF8SKIP(scan);
3974 hardcount++;
3975 }
3976 } else {
3977 while (scan < loceol && isSPACE(*scan))
3978 scan++;
a0ed51b3
LW
3979 }
3980 break;
bbce6d69 3981 case SPACEL:
3280af22 3982 PL_reg_flags |= RF_tainted;
1aa99e6b 3983 if (do_utf8) {
ffc61ed2 3984 loceol = PL_regeol;
1aa99e6b 3985 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3986 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3987 scan += UTF8SKIP(scan);
3988 hardcount++;
3989 }
3990 } else {
3991 while (scan < loceol && isSPACE_LC(*scan))
3992 scan++;
a0ed51b3
LW
3993 }
3994 break;
a0d0e21e 3995 case NSPACE:
1aa99e6b 3996 if (do_utf8) {
ffc61ed2 3997 loceol = PL_regeol;
8269fa76 3998 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3999 while (hardcount < max && scan < loceol &&
3568d838
JH
4000 !(*scan == ' ' ||
4001 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4002 scan += UTF8SKIP(scan);
4003 hardcount++;
4004 }
4005 } else {
4006 while (scan < loceol && !isSPACE(*scan))
4007 scan++;
4008 break;
a0ed51b3 4009 }
bbce6d69 4010 case NSPACEL:
3280af22 4011 PL_reg_flags |= RF_tainted;
1aa99e6b 4012 if (do_utf8) {
ffc61ed2 4013 loceol = PL_regeol;
1aa99e6b 4014 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4015 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4016 scan += UTF8SKIP(scan);
4017 hardcount++;
4018 }
4019 } else {
4020 while (scan < loceol && !isSPACE_LC(*scan))
4021 scan++;
a0ed51b3
LW
4022 }
4023 break;
a0d0e21e 4024 case DIGIT:
1aa99e6b 4025 if (do_utf8) {
ffc61ed2 4026 loceol = PL_regeol;
8269fa76 4027 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4028 while (hardcount < max && scan < loceol &&
3568d838 4029 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4030 scan += UTF8SKIP(scan);
4031 hardcount++;
4032 }
4033 } else {
4034 while (scan < loceol && isDIGIT(*scan))
4035 scan++;
a0ed51b3
LW
4036 }
4037 break;
a0d0e21e 4038 case NDIGIT:
1aa99e6b 4039 if (do_utf8) {
ffc61ed2 4040 loceol = PL_regeol;
8269fa76 4041 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4042 while (hardcount < max && scan < loceol &&
3568d838 4043 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4044 scan += UTF8SKIP(scan);
4045 hardcount++;
4046 }
4047 } else {
4048 while (scan < loceol && !isDIGIT(*scan))
4049 scan++;
a0ed51b3
LW
4050 }
4051 break;
a0d0e21e
LW
4052 default: /* Called on something of 0 width. */
4053 break; /* So match right here or not at all. */
4054 }
a687059c 4055
a0ed51b3
LW
4056 if (hardcount)
4057 c = hardcount;
4058 else
4059 c = scan - PL_reginput;
3280af22 4060 PL_reginput = scan;
a687059c 4061
9041c2e3 4062 DEBUG_r(
c277df42
IZ
4063 {
4064 SV *prop = sv_newmortal();
4065
4066 regprop(prop, p);
9041c2e3
NIS
4067 PerlIO_printf(Perl_debug_log,
4068 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4069 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4070 });
9041c2e3 4071
a0d0e21e 4072 return(c);
a687059c
LW
4073}
4074
4075/*
c277df42 4076 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4077 *
c277df42
IZ
4078 * The repeater is supposed to have constant length.
4079 */
4080
76e3520e 4081STATIC I32
cea2e8a9 4082S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4083{
b7953727 4084 register char *scan = Nullch;
c277df42 4085 register char *start;
3280af22 4086 register char *loceol = PL_regeol;
a0ed51b3 4087 I32 l = 0;
708e3b05 4088 I32 count = 0, res = 1;
a0ed51b3
LW
4089
4090 if (!max)
4091 return 0;
c277df42 4092
3280af22 4093 start = PL_reginput;
53c4c00c 4094 if (PL_reg_match_utf8) {
708e3b05 4095 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4096 if (!count++) {
4097 l = 0;
4098 while (start < PL_reginput) {
4099 l++;
4100 start += UTF8SKIP(start);
4101 }
4102 *lp = l;
4103 if (l == 0)
4104 return max;
4105 }
4106 if (count == max)
4107 return count;
4108 }
4109 }
4110 else {
708e3b05 4111 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4112 if (!count++) {
4113 *lp = l = PL_reginput - start;
4114 if (max != REG_INFTY && l*max < loceol - scan)
4115 loceol = scan + l*max;
4116 if (l == 0)
4117 return max;
c277df42
IZ
4118 }
4119 }
4120 }
708e3b05 4121 if (!res)
3280af22 4122 PL_reginput = scan;
9041c2e3 4123
a0ed51b3 4124 return count;
c277df42
IZ
4125}
4126
4127/*
ffc61ed2
JH
4128- regclass_swash - prepare the utf8 swash
4129*/
4130
4131SV *
4132Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
4133{
4134 SV *sw = NULL;
4135 SV *si = NULL;
4136
4137 if (PL_regdata && PL_regdata->count) {
4138 U32 n = ARG(node);
4139
4140 if (PL_regdata->what[n] == 's') {
4141 SV *rv = (SV*)PL_regdata->data[n];
4142 AV *av = (AV*)SvRV((SV*)rv);
4143 SV **a;
9041c2e3 4144
ffc61ed2
JH
4145 si = *av_fetch(av, 0, FALSE);
4146 a = av_fetch(av, 1, FALSE);
9041c2e3 4147
ffc61ed2
JH
4148 if (a)
4149 sw = *a;
4150 else if (si && doinit) {
4151 sw = swash_init("utf8", "", si, 1, 0);
4152 (void)av_store(av, 1, sw);
4153 }
4154 }
4155 }
4156
4157 if (initsvp)
4158 *initsvp = si;
4159
4160 return sw;
4161}
4162
4163/*
cb8d8820 4164 - reginclass - determine if a character falls into a character class
bbce6d69 4165 */
4166
76e3520e 4167STATIC bool
ffc61ed2 4168S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 4169{
ffc61ed2 4170 char flags = ANYOF_FLAGS(n);
bbce6d69 4171 bool match = FALSE;
1aa99e6b 4172 UV c;
3568d838 4173 STRLEN len = 0;
1aa99e6b 4174
3568d838 4175 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4176
ffc61ed2
JH
4177 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4178 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4179 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4180 match = TRUE;
bbce6d69 4181 }
3568d838 4182 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4183 match = TRUE;
ffc61ed2
JH
4184 if (!match) {
4185 SV *sw = regclass_swash(n, TRUE, 0);
4186
4187 if (sw) {
3568d838 4188 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4189 match = TRUE;
4190 else if (flags & ANYOF_FOLD) {
a2a2844f 4191 STRLEN ulen;
a5961de5 4192 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
a2a2844f 4193
a5961de5
JH
4194 to_utf8_fold(p, tmpbuf, &ulen);
4195 if (swash_fetch(sw, tmpbuf, do_utf8))
4196 match = TRUE;
4197 to_utf8_upper(p, tmpbuf, &ulen);
3568d838 4198 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
4199 match = TRUE;
4200 }
4201 }
bbce6d69 4202 }
4203 }
1aa99e6b 4204 if (!match && c < 256) {
ffc61ed2
JH
4205 if (ANYOF_BITMAP_TEST(n, c))
4206 match = TRUE;
4207 else if (flags & ANYOF_FOLD) {
3568d838 4208 I32 f;
a0ed51b3 4209
ffc61ed2
JH
4210 if (flags & ANYOF_LOCALE) {
4211 PL_reg_flags |= RF_tainted;
4212 f = PL_fold_locale[c];
4213 }
4214 else
4215 f = PL_fold[c];
4216 if (f != c && ANYOF_BITMAP_TEST(n, f))
4217 match = TRUE;
4218 }
4219
4220 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4221 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4222 if (
4223 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4224 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4225 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4226 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4227 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4228 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4229 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4230 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4231 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4232 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4233 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4234 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4235 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4236 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4237 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4238 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4239 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4240 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4241 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4242 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4243 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4244 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4245 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4246 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4247 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4248 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4249 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4250 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4251 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4252 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4253 ) /* How's that for a conditional? */
4254 {
4255 match = TRUE;
4256 }
a0ed51b3 4257 }
a0ed51b3
LW
4258 }
4259
a0ed51b3
LW
4260 return (flags & ANYOF_INVERT) ? !match : match;
4261}
161b471a 4262
dfe13c55 4263STATIC U8 *
cea2e8a9 4264S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4265{
1aa99e6b
IH
4266 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4267}
4268
4269STATIC U8 *
4270S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4271{
a0ed51b3 4272 if (off >= 0) {
1aa99e6b 4273 while (off-- && s < lim) {
ffc61ed2 4274 /* XXX could check well-formedness here */
a0ed51b3 4275 s += UTF8SKIP(s);
ffc61ed2 4276 }
a0ed51b3
LW
4277 }
4278 else {
4279 while (off++) {
1aa99e6b 4280 if (s > lim) {
a0ed51b3 4281 s--;
ffc61ed2 4282 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4283 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4284 s--;
ffc61ed2
JH
4285 }
4286 /* XXX could check well-formedness here */
a0ed51b3
LW
4287 }
4288 }
4289 }
4290 return s;
4291}
161b471a 4292
dfe13c55 4293STATIC U8 *
1aa99e6b 4294S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4295{
1aa99e6b
IH
4296 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4297}
4298
4299STATIC U8 *
4300S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4301{
4302 if (off >= 0) {
1aa99e6b 4303 while (off-- && s < lim) {
ffc61ed2 4304 /* XXX could check well-formedness here */
a0ed51b3 4305 s += UTF8SKIP(s);
ffc61ed2 4306 }
a0ed51b3
LW
4307 if (off >= 0)
4308 return 0;
4309 }
4310 else {
4311 while (off++) {
1aa99e6b 4312 if (s > lim) {
a0ed51b3 4313 s--;
ffc61ed2 4314 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4315 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4316 s--;
ffc61ed2
JH
4317 }
4318 /* XXX could check well-formedness here */
a0ed51b3
LW
4319 }
4320 else
4321 break;
4322 }
4323 if (off <= 0)
4324 return 0;
4325 }
4326 return s;
4327}
51371543 4328
51371543 4329static void
acfe0abc 4330restore_pos(pTHX_ void *arg)
51371543 4331{
51371543
GS
4332 if (PL_reg_eval_set) {
4333 if (PL_reg_oldsaved) {
4334 PL_reg_re->subbeg = PL_reg_oldsaved;
4335 PL_reg_re->sublen = PL_reg_oldsavedlen;
4336 RX_MATCH_COPIED_on(PL_reg_re);
4337 }
4338 PL_reg_magic->mg_len = PL_reg_oldpos;
4339 PL_reg_eval_set = 0;
4340 PL_curpm = PL_reg_oldcurpm;
4341 }
4342}