This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate changes #7971(perlio),8982,9061,9062,9068,9069,
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
3c42a273
JH
109#ifdef DEBUGGING
110# define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch(*av_fetch((AV*)SvRV((SV*)PL_regdata->data[ARG2(f)]),0,FALSE),p))
111#else
112# define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
113#endif
a0ed51b3
LW
114
115#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
116#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
117
dfe13c55
GS
118#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
119#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
120#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
121#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
122#define HOPc(pos,off) ((char*)HOP(pos,off))
123#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 124
51371543
GS
125static void restore_pos(pTHXo_ void *arg);
126
127
76e3520e 128STATIC CHECKPOINT
cea2e8a9 129S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 130{
3280af22 131 int retval = PL_savestack_ix;
3c42a273
JH
132#define REGCP_PAREN_ELEMS 4
133 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
134 int p;
135
3c42a273
JH
136#define REGCP_OTHER_ELEMS 5
137 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 138 for (p = PL_regsize; p > parenfloor; p--) {
3c42a273 139/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
140 SSPUSHINT(PL_regendp[p]);
141 SSPUSHINT(PL_regstartp[p]);
3280af22 142 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
143 SSPUSHINT(p);
144 }
3c42a273 145/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
146 SSPUSHINT(PL_regsize);
147 SSPUSHINT(*PL_reglastparen);
148 SSPUSHPTR(PL_reginput);
3d4730f2
JH
149#define REGCP_FRAME_ELEMS 2
150/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
151 * are needed for the regexp context stack bookkeeping. */
152 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
3c42a273 153 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
3d4730f2 154
a0d0e21e
LW
155 return retval;
156}
157
c277df42 158/* These are needed since we do not localize EVAL nodes: */
3c42a273 159# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 160 " Setting an EVAL scope, savestack=%"IVdf"\n", \
3c42a273 161 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 162
3c42a273 163# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 164 PerlIO_printf(Perl_debug_log, \
faccc32b 165 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
3c42a273 166 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 167
76e3520e 168STATIC char *
cea2e8a9 169S_regcppop(pTHX)
a0d0e21e 170{
3c42a273 171 I32 i;
a0d0e21e
LW
172 U32 paren = 0;
173 char *input;
cf93c79d 174 I32 tmps;
3c42a273
JH
175
176 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 177 i = SSPOPINT;
3c42a273
JH
178 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
179 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 180 input = (char *) SSPOPPTR;
3280af22
NIS
181 *PL_reglastparen = SSPOPINT;
182 PL_regsize = SSPOPINT;
3c42a273
JH
183
184 /* Now restore the parentheses context. */
3d4730f2
JH
185 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
186 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 187 paren = (U32)SSPOPINT;
3280af22 188 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
189 PL_regstartp[paren] = SSPOPINT;
190 tmps = SSPOPINT;
3280af22
NIS
191 if (paren <= *PL_reglastparen)
192 PL_regendp[paren] = tmps;
c277df42 193 DEBUG_r(
c3464db5 194 PerlIO_printf(Perl_debug_log,
b900a521
JH
195 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
196 (UV)paren, (IV)PL_regstartp[paren],
197 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
198 (IV)PL_regendp[paren],
3280af22 199 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 200 );
a0d0e21e 201 }
c277df42 202 DEBUG_r(
3280af22 203 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 204 PerlIO_printf(Perl_debug_log,
faccc32b
JH
205 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
206 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
207 }
208 );
3c42a273
JH
209#if 1
210 /* It would seem that the similar code in regtry()
211 * already takes care of this, and in fact it is in
212 * a better location to since this code can #if 0-ed out
213 * but the code in regtry() is needed or otherwise tests
214 * requiring null fields (pat.t#187 and split.t#{13,14}
215 * (as of patchlevel 7877) will fail. Then again,
216 * this code seems to be necessary or otherwise
217 * building DynaLoader will fail:
218 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
219 * --jhi */
3280af22
NIS
220 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
221 if (paren > PL_regsize)
cf93c79d
IZ
222 PL_regstartp[paren] = -1;
223 PL_regendp[paren] = -1;
a0d0e21e 224 }
3c42a273 225#endif
a0d0e21e
LW
226 return input;
227}
228
0f5d15d6 229STATIC char *
cea2e8a9 230S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
231{
232 I32 tmp = PL_savestack_ix;
233
234 PL_savestack_ix = ss;
235 regcppop();
236 PL_savestack_ix = tmp;
942e002e 237 return Nullch;
0f5d15d6
IZ
238}
239
240typedef struct re_cc_state
241{
242 I32 ss;
243 regnode *node;
244 struct re_cc_state *prev;
245 CURCUR *cc;
246 regexp *re;
247} re_cc_state;
248
3c42a273 249#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 250
67368f91
HS
251#define TRYPAREN(paren, n, input) { \
252 if (paren) { \
253 if (n) { \
254 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
255 PL_regendp[paren] = input - PL_bostr; \
256 } \
257 else \
258 PL_regendp[paren] = -1; \
259 } \
260 if (regmatch(next)) \
261 sayYES; \
262 if (paren && n) \
263 PL_regendp[paren] = -1; \
264}
265
266
a687059c 267/*
e50aee73 268 * pregexec and friends
a687059c
LW
269 */
270
271/*
c277df42 272 - pregexec - match a regexp against a string
a687059c 273 */
c277df42 274I32
864dbfa3 275Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 276 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
277/* strend: pointer to null at end of string */
278/* strbeg: real beginning of string */
279/* minend: end of match must be >=minend after stringarg. */
280/* nosave: For optimizations. */
281{
282 return
283 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
284 nosave ? 0 : REXEC_COPY_STR);
285}
0f5d15d6
IZ
286
287STATIC void
cea2e8a9 288S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
289{
290 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
291#ifdef DEBUGGING
292 PL_regprogram = prog->program;
293#endif
294 PL_regnpar = prog->nparens;
295 PL_regdata = prog->data;
296 PL_reg_re = prog;
297}
22e551b9 298
cad2e5aa
JH
299/*
300 * Need to implement the following flags for reg_anch:
301 *
302 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
303 * USE_INTUIT_ML
304 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
305 * INTUIT_AUTORITATIVE_ML
306 * INTUIT_ONCE_NOML - Intuit can match in one location only.
307 * INTUIT_ONCE_ML
308 *
309 * Another flag for this function: SECOND_TIME (so that float substrs
310 * with giant delta may be not rechecked).
311 */
312
313/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
314
2c2d71f5 315/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
316 Otherwise, only SvCUR(sv) is used to get strbeg. */
317
318/* XXXX We assume that strpos is strbeg unless sv. */
319
6eb5f6b9
JH
320/* XXXX Some places assume that there is a fixed substring.
321 An update may be needed if optimizer marks as "INTUITable"
322 RExen without fixed substrings. Similarly, it is assumed that
323 lengths of all the strings are no more than minlen, thus they
324 cannot come from lookahead.
325 (Or minlen should take into account lookahead.) */
326
2c2d71f5
JH
327/* A failure to find a constant substring means that there is no need to make
328 an expensive call to REx engine, thus we celebrate a failure. Similarly,
329 finding a substring too deep into the string means that less calls to
30944b6d
IZ
330 regtry() should be needed.
331
332 REx compiler's optimizer found 4 possible hints:
333 a) Anchored substring;
334 b) Fixed substring;
335 c) Whether we are anchored (beginning-of-line or \G);
336 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 337 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
338 string which does not contradict any of them.
339 */
2c2d71f5 340
6eb5f6b9
JH
341/* Most of decisions we do here should have been done at compile time.
342 The nodes of the REx which we used for the search should have been
343 deleted from the finite automaton. */
344
cad2e5aa
JH
345char *
346Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
347 char *strend, U32 flags, re_scream_pos_data *data)
348{
2c2d71f5 349 register I32 start_shift;
cad2e5aa 350 /* Should be nonnegative! */
2c2d71f5
JH
351 register I32 end_shift;
352 register char *s;
353 register SV *check;
e2439105 354 char *strbeg;
cad2e5aa
JH
355 char *t;
356 I32 ml_anch;
2c2d71f5 357 char *tmp;
6eb5f6b9
JH
358 register char *other_last = Nullch; /* other substr checked before this */
359 char *check_at; /* check substr found at this pos */
30944b6d
IZ
360#ifdef DEBUGGING
361 char *i_strpos = strpos;
362#endif
cad2e5aa
JH
363
364 DEBUG_r( if (!PL_colorset) reginitcolors() );
365 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 366 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
367 PL_colors[4],PL_colors[5],PL_colors[0],
368 prog->precomp,
369 PL_colors[1],
370 (strlen(prog->precomp) > 60 ? "..." : ""),
371 PL_colors[0],
b900a521 372 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
373 strpos, PL_colors[1],
374 (strend - strpos > 60 ? "..." : ""))
375 );
376
2c2d71f5
JH
377 if (prog->minlen > strend - strpos) {
378 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 379 goto fail;
2c2d71f5 380 }
e2439105 381 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 382 check = prog->check_substr;
2c2d71f5 383 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
384 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
385 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 386 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 387
72d299db
GS
388 if (!ml_anch) {
389 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
390 /* SvCUR is not set on references: SvRV and SvPVX overlap */
391 && sv && !SvROK(sv)
e2439105 392 && (strpos != strbeg)) {
72d299db
GS
393 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
394 goto fail;
395 }
396 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 397 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
398 I32 slen;
399
adac82c7 400 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 401 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
402 if (SvTAIL(check)) {
403 slen = SvCUR(check); /* >= 1 */
cad2e5aa 404
2c2d71f5
JH
405 if ( strend - s > slen || strend - s < slen - 1
406 || (strend - s == slen && strend[-1] != '\n')) {
407 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
408 goto fail_finish;
cad2e5aa
JH
409 }
410 /* Now should match s[0..slen-2] */
411 slen--;
653099ff 412 if (slen && (*SvPVX(check) != *s
cad2e5aa 413 || (slen > 1
653099ff 414 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
415 report_neq:
416 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
417 goto fail_finish;
418 }
cad2e5aa 419 }
653099ff
GS
420 else if (*SvPVX(check) != *s
421 || ((slen = SvCUR(check)) > 1
422 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
423 goto report_neq;
424 goto success_at_start;
72d299db 425 }
cad2e5aa 426 }
2c2d71f5 427 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 428 s = strpos;
2c2d71f5 429 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 430 end_shift = prog->minlen - start_shift -
653099ff 431 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 432 if (!ml_anch) {
653099ff
GS
433 I32 end = prog->check_offset_max + CHR_SVLEN(check)
434 - (SvTAIL(check) != 0);
2c2d71f5
JH
435 I32 eshift = strend - s - end;
436
437 if (end_shift < eshift)
438 end_shift = eshift;
439 }
cad2e5aa 440 }
2c2d71f5 441 else { /* Can match at random position */
cad2e5aa
JH
442 ml_anch = 0;
443 s = strpos;
2c2d71f5
JH
444 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
445 /* Should be nonnegative! */
446 end_shift = prog->minlen - start_shift -
653099ff 447 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
448 }
449
2c2d71f5 450#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 451 if (end_shift < 0)
6bbae5e6 452 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
453#endif
454
2c2d71f5 455 restart:
a8ae6a0b
JH
456 other_last = Nullch;
457
2c2d71f5
JH
458 /* Find a possible match in the region s..strend by looking for
459 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 460 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
461 I32 p = -1; /* Internal iterator of scream. */
462 I32 *pp = data ? data->scream_pos : &p;
463
2c2d71f5
JH
464 if (PL_screamfirst[BmRARE(check)] >= 0
465 || ( BmRARE(check) == '\n'
466 && (BmPREVIOUS(check) == SvCUR(check) - 1)
467 && SvTAIL(check) ))
468 s = screaminstr(sv, check,
469 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 470 else
2c2d71f5 471 goto fail_finish;
cad2e5aa
JH
472 if (data)
473 *data->scream_olds = s;
474 }
475 else
476 s = fbm_instr((unsigned char*)s + start_shift,
477 (unsigned char*)strend - end_shift,
2c2d71f5 478 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
479
480 /* Update the count-of-usability, remove useless subpatterns,
481 unshift s. */
2c2d71f5
JH
482
483 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
484 (s ? "Found" : "Did not find"),
485 ((check == prog->anchored_substr) ? "anchored" : "floating"),
486 PL_colors[0],
7b0972df
JH
487 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
488 SvPVX(check),
2c2d71f5
JH
489 PL_colors[1], (SvTAIL(check) ? "$" : ""),
490 (s ? " at offset " : "...\n") ) );
491
492 if (!s)
493 goto fail_finish;
494
6eb5f6b9
JH
495 check_at = s;
496
2c2d71f5 497 /* Finish the diagnostic message */
30944b6d 498 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
499
500 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
501 Start with the other substr.
502 XXXX no SCREAM optimization yet - and a very coarse implementation
503 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
504 *always* match. Probably should be marked during compile...
505 Probably it is right to do no SCREAM here...
506 */
507
508 if (prog->float_substr && prog->anchored_substr) {
30944b6d 509 /* Take into account the "other" substring. */
2c2d71f5
JH
510 /* XXXX May be hopelessly wrong for UTF... */
511 if (!other_last)
6eb5f6b9 512 other_last = strpos;
2c2d71f5 513 if (check == prog->float_substr) {
30944b6d
IZ
514 do_other_anchored:
515 {
2c2d71f5
JH
516 char *last = s - start_shift, *last1, *last2;
517 char *s1 = s;
518
519 tmp = PL_bostr;
520 t = s - prog->check_offset_max;
521 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
522 && (!(prog->reganch & ROPT_UTF8)
523 || (PL_bostr = strpos, /* Used in regcopmaybe() */
524 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
525 && t > strpos)))
30944b6d 526 /* EMPTY */;
2c2d71f5
JH
527 else
528 t = strpos;
529 t += prog->anchored_offset;
6eb5f6b9
JH
530 if (t < other_last) /* These positions already checked */
531 t = other_last;
2c2d71f5
JH
532 PL_bostr = tmp;
533 last2 = last1 = strend - prog->minlen;
534 if (last < last1)
535 last1 = last;
536 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
537 /* On end-of-str: see comment below. */
538 s = fbm_instr((unsigned char*)t,
539 (unsigned char*)last1 + prog->anchored_offset
540 + SvCUR(prog->anchored_substr)
541 - (SvTAIL(prog->anchored_substr)!=0),
542 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
543 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
544 (s ? "Found" : "Contradicts"),
545 PL_colors[0],
7b0972df
JH
546 (int)(SvCUR(prog->anchored_substr)
547 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
548 SvPVX(prog->anchored_substr),
549 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
550 if (!s) {
551 if (last1 >= last2) {
552 DEBUG_r(PerlIO_printf(Perl_debug_log,
553 ", giving up...\n"));
554 goto fail_finish;
555 }
556 DEBUG_r(PerlIO_printf(Perl_debug_log,
557 ", trying floating at offset %ld...\n",
30944b6d 558 (long)(s1 + 1 - i_strpos)));
2c2d71f5 559 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 560 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
561 s = HOPc(last, 1);
562 goto restart;
563 }
564 else {
565 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 566 (long)(s - i_strpos)));
2c2d71f5 567 t = s - prog->anchored_offset;
6eb5f6b9 568 other_last = s + 1;
30944b6d 569 s = s1;
2c2d71f5
JH
570 if (t == strpos)
571 goto try_at_start;
2c2d71f5
JH
572 goto try_at_offset;
573 }
30944b6d 574 }
2c2d71f5
JH
575 }
576 else { /* Take into account the floating substring. */
577 char *last, *last1;
578 char *s1 = s;
579
580 t = s - start_shift;
581 last1 = last = strend - prog->minlen + prog->float_min_offset;
582 if (last - t > prog->float_max_offset)
583 last = t + prog->float_max_offset;
584 s = t + prog->float_min_offset;
6eb5f6b9
JH
585 if (s < other_last)
586 s = other_last;
2c2d71f5
JH
587 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
588 /* fbm_instr() takes into account exact value of end-of-str
589 if the check is SvTAIL(ed). Since false positives are OK,
590 and end-of-str is not later than strend we are OK. */
591 s = fbm_instr((unsigned char*)s,
592 (unsigned char*)last + SvCUR(prog->float_substr)
593 - (SvTAIL(prog->float_substr)!=0),
594 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
595 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
596 (s ? "Found" : "Contradicts"),
597 PL_colors[0],
7b0972df
JH
598 (int)(SvCUR(prog->float_substr)
599 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
600 SvPVX(prog->float_substr),
601 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
602 if (!s) {
603 if (last1 == last) {
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
606 goto fail_finish;
607 }
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying anchored starting at offset %ld...\n",
30944b6d 610 (long)(s1 + 1 - i_strpos)));
3c42a273 611 other_last = last;
2c2d71f5
JH
612 PL_regeol = strend; /* Used in HOP() */
613 s = HOPc(t, 1);
614 goto restart;
615 }
616 else {
617 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 618 (long)(s - i_strpos)));
3c42a273 619 other_last = s; /* Fix this later. --Hugo */
30944b6d 620 s = s1;
2c2d71f5
JH
621 if (t == strpos)
622 goto try_at_start;
2c2d71f5
JH
623 goto try_at_offset;
624 }
625 }
cad2e5aa 626 }
2c2d71f5
JH
627
628 t = s - prog->check_offset_max;
629 tmp = PL_bostr;
630 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
631 && (!(prog->reganch & ROPT_UTF8)
632 || (PL_bostr = strpos, /* Used in regcopmaybe() */
633 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
634 && t > strpos)))) {
635 PL_bostr = tmp;
636 /* Fixed substring is found far enough so that the match
637 cannot start at strpos. */
638 try_at_offset:
cad2e5aa 639 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
640 /* Eventually fbm_*() should handle this, but often
641 anchored_offset is not 0, so this check will not be wasted. */
642 /* XXXX In the code below we prefer to look for "^" even in
643 presence of anchored substrings. And we search even
644 beyond the found float position. These pessimizations
645 are historical artefacts only. */
646 find_anchor:
2c2d71f5 647 while (t < strend - prog->minlen) {
cad2e5aa 648 if (*t == '\n') {
4ee3650e 649 if (t < check_at - prog->check_offset_min) {
30944b6d 650 if (prog->anchored_substr) {
4ee3650e
GS
651 /* Since we moved from the found position,
652 we definitely contradict the found anchored
30944b6d
IZ
653 substr. Due to the above check we do not
654 contradict "check" substr.
655 Thus we can arrive here only if check substr
656 is float. Redo checking for "other"=="fixed".
657 */
658 strpos = t + 1;
659 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
660 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
661 goto do_other_anchored;
662 }
4ee3650e
GS
663 /* We don't contradict the found floating substring. */
664 /* XXXX Why not check for STCLASS? */
cad2e5aa 665 s = t + 1;
2c2d71f5 666 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 667 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
668 goto set_useful;
669 }
4ee3650e
GS
670 /* Position contradicts check-string */
671 /* XXXX probably better to look for check-string
672 than for "\n", so one should lower the limit for t? */
673 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 674 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
f87d2592 675 other_last = strpos = s = t + 1;
cad2e5aa
JH
676 goto restart;
677 }
678 t++;
679 }
2c2d71f5
JH
680 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
681 PL_colors[0],PL_colors[1]));
682 goto fail_finish;
cad2e5aa 683 }
f5952150
GS
684 else {
685 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
686 PL_colors[0],PL_colors[1]));
687 }
cad2e5aa
JH
688 s = t;
689 set_useful:
2c2d71f5 690 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
691 }
692 else {
2c2d71f5 693 PL_bostr = tmp;
f5952150 694 /* The found string does not prohibit matching at strpos,
2c2d71f5 695 - no optimization of calling REx engine can be performed,
f5952150
GS
696 unless it was an MBOL and we are not after MBOL,
697 or a future STCLASS check will fail this. */
2c2d71f5
JH
698 try_at_start:
699 /* Even in this situation we may use MBOL flag if strpos is offset
700 wrt the start of the string. */
05b4157f 701 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
e2439105 702 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
703 /* May be due to an implicit anchor of m{.*foo} */
704 && !(prog->reganch & ROPT_IMPLICIT))
705 {
cad2e5aa
JH
706 t = strpos;
707 goto find_anchor;
708 }
30944b6d 709 DEBUG_r( if (ml_anch)
f5952150
GS
710 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
711 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 712 );
2c2d71f5 713 success_at_start:
30944b6d 714 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 715 && prog->check_substr /* Could be deleted already */
cad2e5aa 716 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
717 && prog->check_substr == prog->float_substr)
718 {
cad2e5aa 719 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 720 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
721 SvREFCNT_dec(prog->check_substr);
722 prog->check_substr = Nullsv; /* disable */
723 prog->float_substr = Nullsv; /* clear */
0139fc7d 724 check = Nullsv; /* abort */
cad2e5aa 725 s = strpos;
3cf5c195
IZ
726 /* XXXX This is a remnant of the old implementation. It
727 looks wasteful, since now INTUIT can use many
6eb5f6b9 728 other heuristics. */
cad2e5aa
JH
729 prog->reganch &= ~RE_USE_INTUIT;
730 }
731 else
732 s = strpos;
733 }
734
6eb5f6b9
JH
735 /* Last resort... */
736 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
737 if (prog->regstclass) {
738 /* minlen == 0 is possible if regstclass is \b or \B,
739 and the fixed substr is ''$.
740 Since minlen is already taken into account, s+1 is before strend;
741 accidentally, minlen >= 1 guaranties no false positives at s + 1
742 even for \b or \B. But (minlen? 1 : 0) below assumes that
743 regstclass does not come from lookahead... */
744 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
745 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
746 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
747 ? STR_LEN(prog->regstclass)
748 : 1);
6eb5f6b9 749 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
750 ? s + (prog->minlen? cl_l : 0)
751 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 752 : strend) ;
e2439105 753 char *startpos = strbeg;
6eb5f6b9
JH
754
755 t = s;
76384e4a
GS
756 if (prog->reganch & ROPT_UTF8) {
757 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
758 PL_bostr = startpos;
759 }
6eb5f6b9
JH
760 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
761 if (!s) {
762#ifdef DEBUGGING
763 char *what;
764#endif
765 if (endpos == strend) {
766 DEBUG_r( PerlIO_printf(Perl_debug_log,
767 "Could not match STCLASS...\n") );
768 goto fail;
769 }
66e933ab
GS
770 DEBUG_r( PerlIO_printf(Perl_debug_log,
771 "This position contradicts STCLASS...\n") );
653099ff
GS
772 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
773 goto fail;
6eb5f6b9
JH
774 /* Contradict one of substrings */
775 if (prog->anchored_substr) {
6eb5f6b9
JH
776 if (prog->anchored_substr == check) {
777 DEBUG_r( what = "anchored" );
778 hop_and_restart:
779 PL_regeol = strend; /* Used in HOP() */
780 s = HOPc(t, 1);
66e933ab
GS
781 if (s + start_shift + end_shift > strend) {
782 /* XXXX Should be taken into account earlier? */
783 DEBUG_r( PerlIO_printf(Perl_debug_log,
784 "Could not match STCLASS...\n") );
785 goto fail;
786 }
0139fc7d
GS
787 if (!check)
788 goto giveup;
6eb5f6b9 789 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 790 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
791 what, (long)(s + start_shift - i_strpos)) );
792 goto restart;
793 }
66e933ab 794 /* Have both, check_string is floating */
6eb5f6b9
JH
795 if (t + start_shift >= check_at) /* Contradicts floating=check */
796 goto retry_floating_check;
797 /* Recheck anchored substring, but not floating... */
798 s = check_at;
0139fc7d
GS
799 if (!check)
800 goto giveup;
6eb5f6b9 801 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 802 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
803 (long)(other_last - i_strpos)) );
804 goto do_other_anchored;
805 }
60e71179
GS
806 /* Another way we could have checked stclass at the
807 current position only: */
808 if (ml_anch) {
809 s = t = t + 1;
0139fc7d
GS
810 if (!check)
811 goto giveup;
60e71179 812 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
813 "Looking for /%s^%s/m starting at offset %ld...\n",
814 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 815 goto try_at_offset;
66e933ab 816 }
60e71179
GS
817 if (!prog->float_substr) /* Could have been deleted */
818 goto fail;
6eb5f6b9
JH
819 /* Check is floating subtring. */
820 retry_floating_check:
821 t = check_at - start_shift;
822 DEBUG_r( what = "floating" );
823 goto hop_and_restart;
824 }
825 DEBUG_r( if (t != s)
826 PerlIO_printf(Perl_debug_log,
827 "By STCLASS: moving %ld --> %ld\n",
828 (long)(t - i_strpos), (long)(s - i_strpos));
829 else
830 PerlIO_printf(Perl_debug_log,
831 "Does not contradict STCLASS...\n") );
832 }
0139fc7d
GS
833 giveup:
834 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
835 PL_colors[4], (check ? "Guessed" : "Giving up"),
836 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 837 return s;
2c2d71f5
JH
838
839 fail_finish: /* Substring not found */
66e933ab
GS
840 if (prog->check_substr) /* could be removed already */
841 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 842 fail:
2c2d71f5 843 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
844 PL_colors[4],PL_colors[5]));
845 return Nullch;
846}
9661b544 847
6eb5f6b9 848/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
849STATIC char *
850S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 851{
6eb5f6b9
JH
852 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
853 char *m;
d8093b23
G
854 STRLEN ln;
855 unsigned int c1;
856 unsigned int c2;
6eb5f6b9
JH
857 char *e;
858 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 859
6eb5f6b9
JH
860 /* We know what class it must start with. */
861 switch (OP(c)) {
862 case ANYOFUTF8:
863 while (s < strend) {
864 if (REGINCLASSUTF8(c, (U8*)s)) {
865 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 866 goto got_it;
3cf5c195 867 else
6eb5f6b9 868 tmp = doevery;
a0ed51b3 869 }
6eb5f6b9
JH
870 else
871 tmp = 1;
a0ed51b3
LW
872 s += UTF8SKIP(s);
873 }
6eb5f6b9
JH
874 break;
875 case ANYOF:
a0ed51b3 876 while (s < strend) {
d8093b23 877 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
878 if (tmp && (norun || regtry(prog, s)))
879 goto got_it;
880 else
881 tmp = doevery;
a0ed51b3 882 }
6eb5f6b9
JH
883 else
884 tmp = 1;
a0ed51b3 885 s++;
a0d0e21e 886 }
6eb5f6b9
JH
887 break;
888 case EXACTF:
889 m = STRING(c);
890 ln = STR_LEN(c);
d8093b23 891 c1 = *(U8*)m;
6eb5f6b9
JH
892 c2 = PL_fold[c1];
893 goto do_exactf;
894 case EXACTFL:
895 m = STRING(c);
896 ln = STR_LEN(c);
d8093b23 897 c1 = *(U8*)m;
6eb5f6b9
JH
898 c2 = PL_fold_locale[c1];
899 do_exactf:
900 e = strend - ln;
b3c9acc1 901
6eb5f6b9
JH
902 if (norun && e < s)
903 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
904 /* Here it is NOT UTF! */
905 if (c1 == c2) {
906 while (s <= e) {
d8093b23 907 if ( *(U8*)s == c1
66e933ab
GS
908 && (ln == 1 || !(OP(c) == EXACTF
909 ? ibcmp(s, m, ln)
910 : ibcmp_locale(s, m, ln)))
6eb5f6b9 911 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
912 goto got_it;
913 s++;
914 }
915 } else {
916 while (s <= e) {
d8093b23 917 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
918 && (ln == 1 || !(OP(c) == EXACTF
919 ? ibcmp(s, m, ln)
920 : ibcmp_locale(s, m, ln)))
6eb5f6b9 921 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
922 goto got_it;
923 s++;
924 }
925 }
926 break;
bbce6d69 927 case BOUNDL:
3280af22 928 PL_reg_flags |= RF_tainted;
bbce6d69 929 /* FALL THROUGH */
a0d0e21e 930 case BOUND:
6eb5f6b9 931 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 932 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 933 while (s < strend) {
95bac841 934 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 935 tmp = !tmp;
6eb5f6b9 936 if ((norun || regtry(prog, s)))
a0d0e21e 937 goto got_it;
a687059c 938 }
a0d0e21e
LW
939 s++;
940 }
6eb5f6b9 941 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
942 goto got_it;
943 break;
a0ed51b3
LW
944 case BOUNDLUTF8:
945 PL_reg_flags |= RF_tainted;
946 /* FALL THROUGH */
947 case BOUNDUTF8:
3c42a273
JH
948 if (s == startpos)
949 tmp = '\n';
950 else {
951 U8 *r = reghop((U8*)s, -1);
952
953 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
954 }
955 tmp = ((OP(c) == BOUNDUTF8 ?
956 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 957 while (s < strend) {
76384e4a 958 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
959 swash_fetch(PL_utf8_alnum, (U8*)s) :
960 isALNUM_LC_utf8((U8*)s)))
961 {
a0ed51b3 962 tmp = !tmp;
6eb5f6b9 963 if ((norun || regtry(prog, s)))
a0ed51b3
LW
964 goto got_it;
965 }
966 s += UTF8SKIP(s);
967 }
6eb5f6b9 968 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
969 goto got_it;
970 break;
bbce6d69 971 case NBOUNDL:
3280af22 972 PL_reg_flags |= RF_tainted;
bbce6d69 973 /* FALL THROUGH */
a0d0e21e 974 case NBOUND:
6eb5f6b9 975 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 976 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 977 while (s < strend) {
95bac841 978 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 979 tmp = !tmp;
6eb5f6b9 980 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
981 goto got_it;
982 s++;
983 }
6eb5f6b9 984 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
985 goto got_it;
986 break;
a0ed51b3
LW
987 case NBOUNDLUTF8:
988 PL_reg_flags |= RF_tainted;
989 /* FALL THROUGH */
990 case NBOUNDUTF8:
3c42a273
JH
991 if (s == startpos)
992 tmp = '\n';
993 else {
994 U8 *r = reghop((U8*)s, -1);
995
996 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
997 }
998 tmp = ((OP(c) == NBOUNDUTF8 ?
999 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 1000 while (s < strend) {
76384e4a 1001 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
1002 swash_fetch(PL_utf8_alnum, (U8*)s) :
1003 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 1004 tmp = !tmp;
6eb5f6b9 1005 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
1006 goto got_it;
1007 s += UTF8SKIP(s);
1008 }
6eb5f6b9 1009 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1010 goto got_it;
1011 break;
a0d0e21e
LW
1012 case ALNUM:
1013 while (s < strend) {
bbce6d69 1014 if (isALNUM(*s)) {
6eb5f6b9 1015 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1016 goto got_it;
1017 else
1018 tmp = doevery;
1019 }
1020 else
1021 tmp = 1;
1022 s++;
1023 }
1024 break;
a0ed51b3
LW
1025 case ALNUMUTF8:
1026 while (s < strend) {
dfe13c55 1027 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1028 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1029 goto got_it;
1030 else
1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s += UTF8SKIP(s);
1036 }
1037 break;
bbce6d69 1038 case ALNUML:
3280af22 1039 PL_reg_flags |= RF_tainted;
bbce6d69 1040 while (s < strend) {
1041 if (isALNUM_LC(*s)) {
6eb5f6b9 1042 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1043 goto got_it;
a687059c 1044 else
a0d0e21e
LW
1045 tmp = doevery;
1046 }
1047 else
1048 tmp = 1;
1049 s++;
1050 }
1051 break;
a0ed51b3
LW
1052 case ALNUMLUTF8:
1053 PL_reg_flags |= RF_tainted;
1054 while (s < strend) {
dfe13c55 1055 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1056 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1057 goto got_it;
1058 else
1059 tmp = doevery;
1060 }
1061 else
1062 tmp = 1;
1063 s += UTF8SKIP(s);
1064 }
1065 break;
a0d0e21e
LW
1066 case NALNUM:
1067 while (s < strend) {
bbce6d69 1068 if (!isALNUM(*s)) {
6eb5f6b9 1069 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1070 goto got_it;
1071 else
1072 tmp = doevery;
1073 }
1074 else
1075 tmp = 1;
1076 s++;
1077 }
1078 break;
a0ed51b3
LW
1079 case NALNUMUTF8:
1080 while (s < strend) {
dfe13c55 1081 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1082 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
bbce6d69 1092 case NALNUML:
3280af22 1093 PL_reg_flags |= RF_tainted;
bbce6d69 1094 while (s < strend) {
1095 if (!isALNUM_LC(*s)) {
6eb5f6b9 1096 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1097 goto got_it;
a687059c 1098 else
a0d0e21e 1099 tmp = doevery;
a687059c 1100 }
a0d0e21e
LW
1101 else
1102 tmp = 1;
1103 s++;
1104 }
1105 break;
a0ed51b3
LW
1106 case NALNUMLUTF8:
1107 PL_reg_flags |= RF_tainted;
1108 while (s < strend) {
dfe13c55 1109 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1110 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1111 goto got_it;
1112 else
1113 tmp = doevery;
1114 }
1115 else
1116 tmp = 1;
1117 s += UTF8SKIP(s);
1118 }
1119 break;
a0d0e21e
LW
1120 case SPACE:
1121 while (s < strend) {
1122 if (isSPACE(*s)) {
6eb5f6b9 1123 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1124 goto got_it;
1125 else
1126 tmp = doevery;
2304df62 1127 }
a0d0e21e
LW
1128 else
1129 tmp = 1;
1130 s++;
1131 }
1132 break;
a0ed51b3
LW
1133 case SPACEUTF8:
1134 while (s < strend) {
dfe13c55 1135 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1136 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
bbce6d69 1146 case SPACEL:
3280af22 1147 PL_reg_flags |= RF_tainted;
bbce6d69 1148 while (s < strend) {
1149 if (isSPACE_LC(*s)) {
6eb5f6b9 1150 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
1155 else
1156 tmp = 1;
1157 s++;
1158 }
1159 break;
a0ed51b3
LW
1160 case SPACELUTF8:
1161 PL_reg_flags |= RF_tainted;
1162 while (s < strend) {
dfe13c55 1163 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1164 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1165 goto got_it;
1166 else
1167 tmp = doevery;
1168 }
1169 else
1170 tmp = 1;
1171 s += UTF8SKIP(s);
1172 }
1173 break;
a0d0e21e
LW
1174 case NSPACE:
1175 while (s < strend) {
1176 if (!isSPACE(*s)) {
6eb5f6b9 1177 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1178 goto got_it;
1179 else
1180 tmp = doevery;
a687059c 1181 }
a0d0e21e
LW
1182 else
1183 tmp = 1;
1184 s++;
1185 }
1186 break;
a0ed51b3
LW
1187 case NSPACEUTF8:
1188 while (s < strend) {
dfe13c55 1189 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1190 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
bbce6d69 1200 case NSPACEL:
3280af22 1201 PL_reg_flags |= RF_tainted;
bbce6d69 1202 while (s < strend) {
1203 if (!isSPACE_LC(*s)) {
6eb5f6b9 1204 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
1209 else
1210 tmp = 1;
1211 s++;
1212 }
1213 break;
a0ed51b3
LW
1214 case NSPACELUTF8:
1215 PL_reg_flags |= RF_tainted;
1216 while (s < strend) {
dfe13c55 1217 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1218 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1219 goto got_it;
1220 else
1221 tmp = doevery;
1222 }
1223 else
1224 tmp = 1;
1225 s += UTF8SKIP(s);
1226 }
1227 break;
a0d0e21e
LW
1228 case DIGIT:
1229 while (s < strend) {
1230 if (isDIGIT(*s)) {
6eb5f6b9 1231 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1232 goto got_it;
1233 else
1234 tmp = doevery;
2b69d0c2 1235 }
a0d0e21e
LW
1236 else
1237 tmp = 1;
1238 s++;
1239 }
1240 break;
a0ed51b3
LW
1241 case DIGITUTF8:
1242 while (s < strend) {
dfe13c55 1243 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1244 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
b8c5462f
JH
1254 case DIGITL:
1255 PL_reg_flags |= RF_tainted;
1256 while (s < strend) {
1257 if (isDIGIT_LC(*s)) {
6eb5f6b9 1258 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
1263 else
1264 tmp = 1;
1265 s++;
1266 }
1267 break;
1268 case DIGITLUTF8:
1269 PL_reg_flags |= RF_tainted;
1270 while (s < strend) {
1271 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1272 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1273 goto got_it;
1274 else
1275 tmp = doevery;
1276 }
1277 else
1278 tmp = 1;
1279 s += UTF8SKIP(s);
1280 }
1281 break;
a0d0e21e
LW
1282 case NDIGIT:
1283 while (s < strend) {
1284 if (!isDIGIT(*s)) {
6eb5f6b9 1285 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1286 goto got_it;
1287 else
1288 tmp = doevery;
a687059c 1289 }
a0d0e21e
LW
1290 else
1291 tmp = 1;
1292 s++;
1293 }
1294 break;
a0ed51b3
LW
1295 case NDIGITUTF8:
1296 while (s < strend) {
dfe13c55 1297 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1298 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s += UTF8SKIP(s);
1306 }
1307 break;
b8c5462f
JH
1308 case NDIGITL:
1309 PL_reg_flags |= RF_tainted;
1310 while (s < strend) {
1311 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1312 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1313 goto got_it;
1314 else
1315 tmp = doevery;
1316 }
1317 else
1318 tmp = 1;
1319 s++;
a0ed51b3 1320 }
b8c5462f
JH
1321 break;
1322 case NDIGITLUTF8:
1323 PL_reg_flags |= RF_tainted;
1324 while (s < strend) {
1325 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1326 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1327 goto got_it;
cf93c79d 1328 else
b8c5462f
JH
1329 tmp = doevery;
1330 }
1331 else
1332 tmp = 1;
1333 s += UTF8SKIP(s);
1334 }
1335 break;
b3c9acc1 1336 default:
3c3eec57
GS
1337 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1338 break;
d6a28714 1339 }
6eb5f6b9
JH
1340 return 0;
1341 got_it:
1342 return s;
1343}
1344
1345/*
1346 - regexec_flags - match a regexp against a string
1347 */
1348I32
1349Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1350 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1351/* strend: pointer to null at end of string */
1352/* strbeg: real beginning of string */
1353/* minend: end of match must be >=minend after stringarg. */
1354/* data: May be used for some additional optimizations. */
1355/* nosave: For optimizations. */
1356{
6eb5f6b9
JH
1357 register char *s;
1358 register regnode *c;
1359 register char *startpos = stringarg;
6eb5f6b9
JH
1360 I32 minlen; /* must match at least this many chars */
1361 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1362 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1363 constant substr. */ /* CC */
1364 I32 end_shift = 0; /* Same for the end. */ /* CC */
1365 I32 scream_pos = -1; /* Internal iterator of scream. */
1366 char *scream_olds;
1367 SV* oreplsv = GvSV(PL_replgv);
1368
1369 PL_regcc = 0;
1370
1371 cache_re(prog);
1372#ifdef DEBUGGING
1373 PL_regnarrate = PL_debug & 512;
1374#endif
1375
1376 /* Be paranoid... */
1377 if (prog == NULL || startpos == NULL) {
1378 Perl_croak(aTHX_ "NULL regexp parameter");
1379 return 0;
1380 }
1381
1382 minlen = prog->minlen;
1383 if (strend - startpos < minlen) goto phooey;
1384
1385 if (startpos == strbeg) /* is ^ valid at stringarg? */
1386 PL_regprev = '\n';
1387 else {
1388 PL_regprev = (U32)stringarg[-1];
1389 if (!PL_multiline && PL_regprev == '\n')
1390 PL_regprev = '\0'; /* force ^ to NOT match */
1391 }
1392
1393 /* Check validity of program. */
1394 if (UCHARAT(prog->program) != REG_MAGIC) {
1395 Perl_croak(aTHX_ "corrupted regexp program");
1396 }
1397
1398 PL_reg_flags = 0;
1399 PL_reg_eval_set = 0;
1400 PL_reg_maxiter = 0;
1401
1402 if (prog->reganch & ROPT_UTF8)
1403 PL_reg_flags |= RF_utf8;
1404
1405 /* Mark beginning of line for ^ and lookbehind. */
1406 PL_regbol = startpos;
1407 PL_bostr = strbeg;
1408 PL_reg_sv = sv;
1409
1410 /* Mark end of line for $ (and such) */
1411 PL_regeol = strend;
1412
1413 /* see how far we have to get to not match where we matched before */
1414 PL_regtill = startpos+minend;
1415
1416 /* We start without call_cc context. */
1417 PL_reg_call_cc = 0;
1418
1419 /* If there is a "must appear" string, look for it. */
1420 s = startpos;
1421
1422 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1423 MAGIC *mg;
1424
1425 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1426 PL_reg_ganch = startpos;
1427 else if (sv && SvTYPE(sv) >= SVt_PVMG
1428 && SvMAGIC(sv)
1429 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1430 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1431 if (prog->reganch & ROPT_ANCH_GPOS) {
1432 if (s > PL_reg_ganch)
1433 goto phooey;
1434 s = PL_reg_ganch;
1435 }
1436 }
1437 else /* pos() not defined */
1438 PL_reg_ganch = strbeg;
1439 }
1440
1441 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1442 re_scream_pos_data d;
1443
1444 d.scream_olds = &scream_olds;
1445 d.scream_pos = &scream_pos;
1446 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1447 if (!s)
1448 goto phooey; /* not present */
1449 }
1450
1451 DEBUG_r( if (!PL_colorset) reginitcolors() );
1452 DEBUG_r(PerlIO_printf(Perl_debug_log,
1453 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1454 PL_colors[4],PL_colors[5],PL_colors[0],
1455 prog->precomp,
1456 PL_colors[1],
1457 (strlen(prog->precomp) > 60 ? "..." : ""),
1458 PL_colors[0],
1459 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1460 startpos, PL_colors[1],
1461 (strend - startpos > 60 ? "..." : ""))
1462 );
1463
1464 /* Simplest case: anchored match need be tried only once. */
1465 /* [unless only anchor is BOL and multiline is set] */
1466 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1467 if (s == startpos && regtry(prog, startpos))
1468 goto got_it;
1469 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1470 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1471 {
1472 char *end;
1473
1474 if (minlen)
1475 dontbother = minlen - 1;
1476 end = HOPc(strend, -dontbother) - 1;
1477 /* for multiline we only have to try after newlines */
1478 if (prog->check_substr) {
1479 if (s == startpos)
1480 goto after_try;
1481 while (1) {
1482 if (regtry(prog, s))
1483 goto got_it;
1484 after_try:
1485 if (s >= end)
1486 goto phooey;
1487 if (prog->reganch & RE_USE_INTUIT) {
1488 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1489 if (!s)
1490 goto phooey;
1491 }
1492 else
1493 s++;
1494 }
1495 } else {
1496 if (s > startpos)
1497 s--;
1498 while (s < end) {
1499 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1500 if (regtry(prog, s))
1501 goto got_it;
1502 }
1503 }
1504 }
1505 }
1506 goto phooey;
1507 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1508 if (regtry(prog, PL_reg_ganch))
1509 goto got_it;
1510 goto phooey;
1511 }
1512
1513 /* Messy cases: unanchored match. */
1514 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1515 /* we have /x+whatever/ */
1516 /* it must be a one character string (XXXX Except UTF?) */
1517 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1518#ifdef DEBUGGING
1519 int did_match = 0;
1520#endif
1521
6eb5f6b9
JH
1522 if (UTF) {
1523 while (s < strend) {
1524 if (*s == ch) {
bf93d4cc 1525 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1526 if (regtry(prog, s)) goto got_it;
1527 s += UTF8SKIP(s);
1528 while (s < strend && *s == ch)
1529 s += UTF8SKIP(s);
1530 }
1531 s += UTF8SKIP(s);
1532 }
1533 }
1534 else {
1535 while (s < strend) {
1536 if (*s == ch) {
bf93d4cc 1537 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1538 if (regtry(prog, s)) goto got_it;
1539 s++;
1540 while (s < strend && *s == ch)
1541 s++;
1542 }
1543 s++;
1544 }
1545 }
bf93d4cc
GS
1546 DEBUG_r(did_match ||
1547 PerlIO_printf(Perl_debug_log,
1548 "Did not find anchored character...\n"));
6eb5f6b9
JH
1549 }
1550 /*SUPPRESS 560*/
1551 else if (prog->anchored_substr != Nullsv
1552 || (prog->float_substr != Nullsv
1553 && prog->float_max_offset < strend - s)) {
1554 SV *must = prog->anchored_substr
1555 ? prog->anchored_substr : prog->float_substr;
1556 I32 back_max =
1557 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1558 I32 back_min =
1559 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1560 char *last = HOPc(strend, /* Cannot start after this */
1561 -(I32)(CHR_SVLEN(must)
1562 - (SvTAIL(must) != 0) + back_min));
1563 char *last1; /* Last position checked before */
bf93d4cc
GS
1564#ifdef DEBUGGING
1565 int did_match = 0;
1566#endif
6eb5f6b9
JH
1567
1568 if (s > PL_bostr)
1569 last1 = HOPc(s, -1);
1570 else
1571 last1 = s - 1; /* bogus */
1572
1573 /* XXXX check_substr already used to find `s', can optimize if
1574 check_substr==must. */
1575 scream_pos = -1;
1576 dontbother = end_shift;
1577 strend = HOPc(strend, -dontbother);
1578 while ( (s <= last) &&
1579 ((flags & REXEC_SCREAM)
1580 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1581 end_shift, &scream_pos, 0))
1582 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1583 (unsigned char*)strend, must,
1584 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1585 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1586 if (HOPc(s, -back_max) > last1) {
1587 last1 = HOPc(s, -back_min);
1588 s = HOPc(s, -back_max);
1589 }
1590 else {
1591 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1592
1593 last1 = HOPc(s, -back_min);
1594 s = t;
1595 }
1596 if (UTF) {
1597 while (s <= last1) {
1598 if (regtry(prog, s))
1599 goto got_it;
1600 s += UTF8SKIP(s);
1601 }
1602 }
1603 else {
1604 while (s <= last1) {
1605 if (regtry(prog, s))
1606 goto got_it;
1607 s++;
1608 }
1609 }
1610 }
bf93d4cc
GS
1611 DEBUG_r(did_match ||
1612 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1613 ((must == prog->anchored_substr)
1614 ? "anchored" : "floating"),
1615 PL_colors[0],
1616 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1617 SvPVX(must),
1618 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1619 goto phooey;
1620 }
155aba94 1621 else if ((c = prog->regstclass)) {
66e933ab
GS
1622 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1623 /* don't bother with what can't match */
6eb5f6b9
JH
1624 strend = HOPc(strend, -(minlen - 1));
1625 if (find_byclass(prog, c, s, strend, startpos, 0))
1626 goto got_it;
bf93d4cc 1627 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1628 }
1629 else {
1630 dontbother = 0;
1631 if (prog->float_substr != Nullsv) { /* Trim the end. */
1632 char *last;
d6a28714
JH
1633
1634 if (flags & REXEC_SCREAM) {
1635 last = screaminstr(sv, prog->float_substr, s - strbeg,
1636 end_shift, &scream_pos, 1); /* last one */
1637 if (!last)
1638 last = scream_olds; /* Only one occurence. */
b8c5462f 1639 }
d6a28714
JH
1640 else {
1641 STRLEN len;
1642 char *little = SvPV(prog->float_substr, len);
1643
1644 if (SvTAIL(prog->float_substr)) {
1645 if (memEQ(strend - len + 1, little, len - 1))
1646 last = strend - len + 1;
1647 else if (!PL_multiline)
1648 last = memEQ(strend - len, little, len)
1649 ? strend - len : Nullch;
b8c5462f 1650 else
d6a28714
JH
1651 goto find_last;
1652 } else {
1653 find_last:
1654 if (len)
1655 last = rninstr(s, strend, little, little + len);
b8c5462f 1656 else
d6a28714 1657 last = strend; /* matching `$' */
b8c5462f 1658 }
b8c5462f 1659 }
bf93d4cc
GS
1660 if (last == NULL) {
1661 DEBUG_r(PerlIO_printf(Perl_debug_log,
1662 "%sCan't trim the tail, match fails (should not happen)%s\n",
1663 PL_colors[4],PL_colors[5]));
1664 goto phooey; /* Should not happen! */
1665 }
d6a28714
JH
1666 dontbother = strend - last + prog->float_min_offset;
1667 }
1668 if (minlen && (dontbother < minlen))
1669 dontbother = minlen - 1;
1670 strend -= dontbother; /* this one's always in bytes! */
1671 /* We don't know much -- general case. */
1672 if (UTF) {
1673 for (;;) {
1674 if (regtry(prog, s))
1675 goto got_it;
1676 if (s >= strend)
1677 break;
b8c5462f 1678 s += UTF8SKIP(s);
d6a28714
JH
1679 };
1680 }
1681 else {
1682 do {
1683 if (regtry(prog, s))
1684 goto got_it;
1685 } while (s++ < strend);
1686 }
1687 }
1688
1689 /* Failure. */
1690 goto phooey;
1691
1692got_it:
1693 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1694
1695 if (PL_reg_eval_set) {
1696 /* Preserve the current value of $^R */
1697 if (oreplsv != GvSV(PL_replgv))
1698 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1699 restored, the value remains
1700 the same. */
1701 restore_pos(aTHXo_ 0);
1702 }
1703
1704 /* make sure $`, $&, $', and $digit will work later */
1705 if ( !(flags & REXEC_NOT_FIRST) ) {
1706 if (RX_MATCH_COPIED(prog)) {
1707 Safefree(prog->subbeg);
1708 RX_MATCH_COPIED_off(prog);
1709 }
1710 if (flags & REXEC_COPY_STR) {
1711 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1712
1713 s = savepvn(strbeg, i);
1714 prog->subbeg = s;
1715 prog->sublen = i;
1716 RX_MATCH_COPIED_on(prog);
1717 }
1718 else {
1719 prog->subbeg = strbeg;
1720 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1721 }
1722 }
1723
1724 return 1;
1725
1726phooey:
bf93d4cc
GS
1727 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1728 PL_colors[4],PL_colors[5]));
d6a28714
JH
1729 if (PL_reg_eval_set)
1730 restore_pos(aTHXo_ 0);
1731 return 0;
1732}
1733
1734/*
1735 - regtry - try match at specific point
1736 */
1737STATIC I32 /* 0 failure, 1 success */
1738S_regtry(pTHX_ regexp *prog, char *startpos)
1739{
d6a28714
JH
1740 register I32 i;
1741 register I32 *sp;
1742 register I32 *ep;
1743 CHECKPOINT lastcp;
1744
3c42a273
JH
1745#ifdef DEBUGGING
1746 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1747#endif
d6a28714
JH
1748 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1749 MAGIC *mg;
1750
1751 PL_reg_eval_set = RS_init;
1752 DEBUG_r(DEBUG_s(
b900a521
JH
1753 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1754 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1755 ));
e8347627 1756 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1757 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1758 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1759 SAVETMPS;
1760 /* Apparently this is not needed, judging by wantarray. */
e8347627 1761 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1762 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1763
1764 if (PL_reg_sv) {
1765 /* Make $_ available to executed code. */
1766 if (PL_reg_sv != DEFSV) {
1767 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1768 SAVESPTR(DEFSV);
1769 DEFSV = PL_reg_sv;
b8c5462f 1770 }
d6a28714
JH
1771
1772 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1773 && (mg = mg_find(PL_reg_sv, 'g')))) {
1774 /* prepare for quick setting of pos */
1775 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1776 mg = mg_find(PL_reg_sv, 'g');
1777 mg->mg_len = -1;
b8c5462f 1778 }
d6a28714
JH
1779 PL_reg_magic = mg;
1780 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1781 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1782 }
1783 if (!PL_reg_curpm)
0f79a09d 1784 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1785 PL_reg_curpm->op_pmregexp = prog;
1786 PL_reg_oldcurpm = PL_curpm;
1787 PL_curpm = PL_reg_curpm;
1788 if (RX_MATCH_COPIED(prog)) {
1789 /* Here is a serious problem: we cannot rewrite subbeg,
1790 since it may be needed if this match fails. Thus
1791 $` inside (?{}) could fail... */
1792 PL_reg_oldsaved = prog->subbeg;
1793 PL_reg_oldsavedlen = prog->sublen;
1794 RX_MATCH_COPIED_off(prog);
1795 }
1796 else
1797 PL_reg_oldsaved = Nullch;
1798 prog->subbeg = PL_bostr;
1799 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1800 }
1801 prog->startp[0] = startpos - PL_bostr;
1802 PL_reginput = startpos;
1803 PL_regstartp = prog->startp;
1804 PL_regendp = prog->endp;
1805 PL_reglastparen = &prog->lastparen;
1806 prog->lastparen = 0;
1807 PL_regsize = 0;
1808 DEBUG_r(PL_reg_starttry = startpos);
1809 if (PL_reg_start_tmpl <= prog->nparens) {
1810 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1811 if(PL_reg_start_tmp)
1812 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1813 else
1814 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1815 }
1816
1817 /* XXXX What this code is doing here?!!! There should be no need
1818 to do this again and again, PL_reglastparen should take care of
3c42a273
JH
1819 this! --ilya*/
1820
1821 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1822 * Actually, the code in regcppop() (which Ilya may be meaning by
1823 * PL_reglastparen), is not needed at all by the test suite
1824 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1825 * enough, for building DynaLoader, or otherwise this
1826 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1827 * will happen. Meanwhile, this code *is* needed for the
1828 * above-mentioned test suite tests to succeed. The common theme
1829 * on those tests seems to be returning null fields from matches.
1830 * --jhi */
1831#if 1
d6a28714
JH
1832 sp = prog->startp;
1833 ep = prog->endp;
1834 if (prog->nparens) {
3c42a273 1835 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1836 *++sp = -1;
1837 *++ep = -1;
1838 }
1839 }
3c42a273
JH
1840#endif
1841 REGCP_SET(lastcp);
d6a28714
JH
1842 if (regmatch(prog->program + 1)) {
1843 prog->endp[0] = PL_reginput - PL_bostr;
1844 return 1;
1845 }
3c42a273 1846 REGCP_UNWIND(lastcp);
d6a28714
JH
1847 return 0;
1848}
1849
3c42a273
JH
1850#define RE_UNWIND_BRANCH 1
1851#define RE_UNWIND_BRANCHJ 2
1852
1853union re_unwind_t;
1854
1855typedef struct { /* XX: makes sense to enlarge it... */
1856 I32 type;
1857 I32 prev;
1858 CHECKPOINT lastcp;
1859} re_unwind_generic_t;
1860
1861typedef struct {
1862 I32 type;
1863 I32 prev;
1864 CHECKPOINT lastcp;
1865 I32 lastparen;
1866 regnode *next;
1867 char *locinput;
1868 I32 nextchr;
1869#ifdef DEBUGGING
1870 int regindent;
1871#endif
1872} re_unwind_branch_t;
1873
1874typedef union re_unwind_t {
1875 I32 type;
1876 re_unwind_generic_t generic;
1877 re_unwind_branch_t branch;
1878} re_unwind_t;
1879
d6a28714
JH
1880/*
1881 - regmatch - main matching routine
1882 *
1883 * Conceptually the strategy is simple: check to see whether the current
1884 * node matches, call self recursively to see whether the rest matches,
1885 * and then act accordingly. In practice we make some effort to avoid
1886 * recursion, in particular by going through "ordinary" nodes (that don't
1887 * need to know whether the rest of the match failed) by a loop instead of
1888 * by recursion.
1889 */
1890/* [lwall] I've hoisted the register declarations to the outer block in order to
1891 * maybe save a little bit of pushing and popping on the stack. It also takes
1892 * advantage of machines that use a register save mask on subroutine entry.
1893 */
1894STATIC I32 /* 0 failure, 1 success */
1895S_regmatch(pTHX_ regnode *prog)
1896{
d6a28714
JH
1897 register regnode *scan; /* Current node. */
1898 regnode *next; /* Next node. */
1899 regnode *inner; /* Next node in internal branch. */
1900 register I32 nextchr; /* renamed nextchr - nextchar colides with
1901 function of same name */
1902 register I32 n; /* no or next */
1903 register I32 ln; /* len or last */
1904 register char *s; /* operand or save */
1905 register char *locinput = PL_reginput;
1906 register I32 c1, c2, paren; /* case fold search, parenth */
1907 int minmod = 0, sw = 0, logical = 0;
3c42a273
JH
1908 I32 unwind = 0;
1909 I32 firstcp = PL_savestack_ix;
1910
d6a28714
JH
1911#ifdef DEBUGGING
1912 PL_regindent++;
1913#endif
1914
1915 /* Note that nextchr is a byte even in UTF */
1916 nextchr = UCHARAT(locinput);
1917 scan = prog;
1918 while (scan != NULL) {
1919#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
3c42a273 1920#if 1
d6a28714
JH
1921# define sayYES goto yes
1922# define sayNO goto no
7821416a
IZ
1923# define sayYES_FINAL goto yes_final
1924# define sayYES_LOUD goto yes_loud
1925# define sayNO_FINAL goto no_final
1926# define sayNO_SILENT goto do_no
d6a28714
JH
1927# define saySAME(x) if (x) goto yes; else goto no
1928# define REPORT_CODE_OFF 24
1929#else
1930# define sayYES return 1
1931# define sayNO return 0
7821416a
IZ
1932# define sayYES_FINAL return 1
1933# define sayYES_LOUD return 1
1934# define sayNO_FINAL return 0
1935# define sayNO_SILENT return 0
d6a28714
JH
1936# define saySAME(x) return x
1937#endif
1938 DEBUG_r( {
1939 SV *prop = sv_newmortal();
1940 int docolor = *PL_colors[0];
1941 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1942 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1943 /* The part of the string before starttry has one color
1944 (pref0_len chars), between starttry and current
1945 position another one (pref_len - pref0_len chars),
1946 after the current position the third one.
1947 We assume that pref0_len <= pref_len, otherwise we
1948 decrease pref0_len. */
1949 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1950 ? (5 + taill) - l : locinput - PL_bostr);
1951 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1952
1953 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1954 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1955 ? (5 + taill) - pref_len : PL_regeol - locinput);
1956 if (pref0_len < 0)
1957 pref0_len = 0;
1958 if (pref0_len > pref_len)
1959 pref0_len = pref_len;
1960 regprop(prop, scan);
1961 PerlIO_printf(Perl_debug_log,
b900a521
JH
1962 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1963 (IV)(locinput - PL_bostr),
d6a28714
JH
1964 PL_colors[4], pref0_len,
1965 locinput - pref_len, PL_colors[5],
1966 PL_colors[2], pref_len - pref0_len,
1967 locinput - pref_len + pref0_len, PL_colors[3],
1968 (docolor ? "" : "> <"),
1969 PL_colors[0], l, locinput, PL_colors[1],
1970 15 - l - pref_len + 1,
1971 "",
b900a521 1972 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1973 SvPVX(prop));
1974 } );
1975
1976 next = scan + NEXT_OFF(scan);
1977 if (next == scan)
1978 next = NULL;
1979
1980 switch (OP(scan)) {
1981 case BOL:
1982 if (locinput == PL_bostr
1983 ? PL_regprev == '\n'
1984 : (PL_multiline &&
1985 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1986 {
1987 /* regtill = regbol; */
b8c5462f
JH
1988 break;
1989 }
d6a28714
JH
1990 sayNO;
1991 case MBOL:
1992 if (locinput == PL_bostr
1993 ? PL_regprev == '\n'
1994 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1995 {
b8c5462f
JH
1996 break;
1997 }
d6a28714
JH
1998 sayNO;
1999 case SBOL:
c2a73568 2000 if (locinput == PL_bostr)
b8c5462f 2001 break;
d6a28714
JH
2002 sayNO;
2003 case GPOS:
2004 if (locinput == PL_reg_ganch)
2005 break;
2006 sayNO;
2007 case EOL:
2008 if (PL_multiline)
2009 goto meol;
2010 else
2011 goto seol;
2012 case MEOL:
2013 meol:
2014 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2015 sayNO;
b8c5462f 2016 break;
d6a28714
JH
2017 case SEOL:
2018 seol:
2019 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2020 sayNO;
d6a28714 2021 if (PL_regeol - locinput > 1)
b8c5462f 2022 sayNO;
b8c5462f 2023 break;
d6a28714
JH
2024 case EOS:
2025 if (PL_regeol != locinput)
b8c5462f 2026 sayNO;
d6a28714
JH
2027 break;
2028 case SANYUTF8:
b8c5462f 2029 if (nextchr & 0x80) {
b8c5462f 2030 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2031 if (locinput > PL_regeol)
2032 sayNO;
b8c5462f
JH
2033 nextchr = UCHARAT(locinput);
2034 break;
2035 }
d6a28714 2036 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2037 sayNO;
b8c5462f 2038 nextchr = UCHARAT(++locinput);
a0d0e21e 2039 break;
d6a28714
JH
2040 case SANY:
2041 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2042 sayNO;
2043 nextchr = UCHARAT(++locinput);
b85d18e9 2044 break;
d6a28714 2045 case ANYUTF8:
a0ed51b3 2046 if (nextchr & 0x80) {
b8c5462f 2047 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2048 if (locinput > PL_regeol)
2049 sayNO;
a0ed51b3
LW
2050 nextchr = UCHARAT(locinput);
2051 break;
2052 }
155aba94 2053 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
2054 sayNO;
2055 nextchr = UCHARAT(++locinput);
2056 break;
d6a28714 2057 case REG_ANY:
155aba94 2058 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 2059 sayNO;
76e3520e 2060 nextchr = UCHARAT(++locinput);
a0d0e21e 2061 break;
d6a28714 2062 case EXACT:
cd439c50
IZ
2063 s = STRING(scan);
2064 ln = STR_LEN(scan);
d6a28714
JH
2065 /* Inline the first character, for speed. */
2066 if (UCHARAT(s) != nextchr)
2067 sayNO;
2068 if (PL_regeol - locinput < ln)
2069 sayNO;
2070 if (ln > 1 && memNE(s, locinput, ln))
2071 sayNO;
2072 locinput += ln;
2073 nextchr = UCHARAT(locinput);
2074 break;
2075 case EXACTFL:
b8c5462f
JH
2076 PL_reg_flags |= RF_tainted;
2077 /* FALL THROUGH */
d6a28714 2078 case EXACTF:
cd439c50
IZ
2079 s = STRING(scan);
2080 ln = STR_LEN(scan);
d6a28714
JH
2081
2082 if (UTF) {
2083 char *l = locinput;
2084 char *e = s + ln;
2085 c1 = OP(scan) == EXACTF;
2086 while (s < e) {
2087 if (l >= PL_regeol)
2088 sayNO;
3c42a273
JH
2089 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2090 (c1 ?
2091 toLOWER_utf8((U8*)l) :
2092 toLOWER_LC_utf8((U8*)l)))
d6a28714
JH
2093 {
2094 sayNO;
2095 }
2096 s += UTF8SKIP(s);
2097 l += UTF8SKIP(l);
b8c5462f 2098 }
d6a28714 2099 locinput = l;
a0ed51b3
LW
2100 nextchr = UCHARAT(locinput);
2101 break;
2102 }
d6a28714
JH
2103
2104 /* Inline the first character, for speed. */
2105 if (UCHARAT(s) != nextchr &&
2106 UCHARAT(s) != ((OP(scan) == EXACTF)
2107 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2108 sayNO;
d6a28714 2109 if (PL_regeol - locinput < ln)
b8c5462f 2110 sayNO;
d6a28714
JH
2111 if (ln > 1 && (OP(scan) == EXACTF
2112 ? ibcmp(s, locinput, ln)
2113 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2114 sayNO;
d6a28714
JH
2115 locinput += ln;
2116 nextchr = UCHARAT(locinput);
a0d0e21e 2117 break;
d6a28714 2118 case ANYOFUTF8:
d6a28714 2119 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2120 sayNO;
d6a28714
JH
2121 if (locinput >= PL_regeol)
2122 sayNO;
2123 locinput += PL_utf8skip[nextchr];
2124 nextchr = UCHARAT(locinput);
2125 break;
2126 case ANYOF:
d6a28714 2127 if (nextchr < 0)
b8c5462f 2128 nextchr = UCHARAT(locinput);
936ed897 2129 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2130 sayNO;
2131 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2132 sayNO;
b8c5462f
JH
2133 nextchr = UCHARAT(++locinput);
2134 break;
d6a28714 2135 case ALNUML:
b8c5462f
JH
2136 PL_reg_flags |= RF_tainted;
2137 /* FALL THROUGH */
d6a28714 2138 case ALNUM:
b8c5462f 2139 if (!nextchr)
4633a7c4 2140 sayNO;
d6a28714
JH
2141 if (!(OP(scan) == ALNUM
2142 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2143 sayNO;
2144 nextchr = UCHARAT(++locinput);
bbce6d69 2145 break;
d6a28714 2146 case ALNUMLUTF8:
3280af22 2147 PL_reg_flags |= RF_tainted;
bbce6d69 2148 /* FALL THROUGH */
d6a28714 2149 case ALNUMUTF8:
b8c5462f
JH
2150 if (!nextchr)
2151 sayNO;
2152 if (nextchr & 0x80) {
d6a28714
JH
2153 if (!(OP(scan) == ALNUMUTF8
2154 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2155 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2156 {
2157 sayNO;
a0ed51b3 2158 }
b8c5462f 2159 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2160 nextchr = UCHARAT(locinput);
2161 break;
2162 }
d6a28714
JH
2163 if (!(OP(scan) == ALNUMUTF8
2164 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2165 sayNO;
b8c5462f 2166 nextchr = UCHARAT(++locinput);
a0d0e21e 2167 break;
d6a28714 2168 case NALNUML:
b8c5462f
JH
2169 PL_reg_flags |= RF_tainted;
2170 /* FALL THROUGH */
d6a28714
JH
2171 case NALNUM:
2172 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2173 sayNO;
d6a28714
JH
2174 if (OP(scan) == NALNUM
2175 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2176 sayNO;
b8c5462f 2177 nextchr = UCHARAT(++locinput);
a0ed51b3 2178 break;
d6a28714 2179 case NALNUMLUTF8:
b8c5462f
JH
2180 PL_reg_flags |= RF_tainted;
2181 /* FALL THROUGH */
d6a28714 2182 case NALNUMUTF8:
3280af22 2183 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2184 sayNO;
b8c5462f 2185 if (nextchr & 0x80) {
d6a28714
JH
2186 if (OP(scan) == NALNUMUTF8
2187 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2188 : isALNUM_LC_utf8((U8*)locinput))
2189 {
b8c5462f 2190 sayNO;
d6a28714 2191 }
b8c5462f
JH
2192 locinput += PL_utf8skip[nextchr];
2193 nextchr = UCHARAT(locinput);
2194 break;
2195 }
d6a28714
JH
2196 if (OP(scan) == NALNUMUTF8
2197 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2198 sayNO;
76e3520e 2199 nextchr = UCHARAT(++locinput);
a0d0e21e 2200 break;
d6a28714
JH
2201 case BOUNDL:
2202 case NBOUNDL:
3280af22 2203 PL_reg_flags |= RF_tainted;
bbce6d69 2204 /* FALL THROUGH */
d6a28714
JH
2205 case BOUND:
2206 case NBOUND:
2207 /* was last char in word? */
2208 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2209 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2210 ln = isALNUM(ln);
2211 n = isALNUM(nextchr);
2212 }
2213 else {
2214 ln = isALNUM_LC(ln);
2215 n = isALNUM_LC(nextchr);
2216 }
2217 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2218 sayNO;
a0d0e21e 2219 break;
d6a28714
JH
2220 case BOUNDLUTF8:
2221 case NBOUNDLUTF8:
a0ed51b3
LW
2222 PL_reg_flags |= RF_tainted;
2223 /* FALL THROUGH */
d6a28714
JH
2224 case BOUNDUTF8:
2225 case NBOUNDUTF8:
2226 /* was last char in word? */
3c42a273
JH
2227 if (locinput == PL_regbol)
2228 ln = PL_regprev;
2229 else {
2230 U8 *r = reghop((U8*)locinput, -1);
2231
2232 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2233 }
d6a28714
JH
2234 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2235 ln = isALNUM_uni(ln);
2236 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2237 }
d6a28714
JH
2238 else {
2239 ln = isALNUM_LC_uni(ln);
2240 n = isALNUM_LC_utf8((U8*)locinput);
2241 }
2242 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2243 sayNO;
a0ed51b3 2244 break;
d6a28714 2245 case SPACEL:
3280af22 2246 PL_reg_flags |= RF_tainted;
bbce6d69 2247 /* FALL THROUGH */
d6a28714 2248 case SPACE:
9442cb0e 2249 if (!nextchr)
4633a7c4 2250 sayNO;
d6a28714
JH
2251 if (!(OP(scan) == SPACE
2252 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2253 sayNO;
76e3520e 2254 nextchr = UCHARAT(++locinput);
a0d0e21e 2255 break;
d6a28714 2256 case SPACELUTF8:
a0ed51b3
LW
2257 PL_reg_flags |= RF_tainted;
2258 /* FALL THROUGH */
d6a28714 2259 case SPACEUTF8:
9442cb0e 2260 if (!nextchr)
a0ed51b3
LW
2261 sayNO;
2262 if (nextchr & 0x80) {
d6a28714 2263 if (!(OP(scan) == SPACEUTF8
9442cb0e 2264 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2265 : isSPACE_LC_utf8((U8*)locinput)))
2266 {
a0ed51b3 2267 sayNO;
d6a28714 2268 }
6f06b55f 2269 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2270 nextchr = UCHARAT(locinput);
2271 break;
2272 }
d6a28714
JH
2273 if (!(OP(scan) == SPACEUTF8
2274 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2275 sayNO;
2276 nextchr = UCHARAT(++locinput);
2277 break;
d6a28714 2278 case NSPACEL:
3280af22 2279 PL_reg_flags |= RF_tainted;
bbce6d69 2280 /* FALL THROUGH */
d6a28714 2281 case NSPACE:
9442cb0e 2282 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2283 sayNO;
9442cb0e 2284 if (OP(scan) == NSPACE
d6a28714 2285 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2286 sayNO;
b8c5462f 2287 nextchr = UCHARAT(++locinput);
a0d0e21e 2288 break;
d6a28714 2289 case NSPACELUTF8:
a0ed51b3
LW
2290 PL_reg_flags |= RF_tainted;
2291 /* FALL THROUGH */
d6a28714 2292 case NSPACEUTF8:
9442cb0e 2293 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2294 sayNO;
2295 if (nextchr & 0x80) {
d6a28714 2296 if (OP(scan) == NSPACEUTF8
9442cb0e 2297 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2298 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2299 {
2300 sayNO;
2301 }
2302 locinput += PL_utf8skip[nextchr];
2303 nextchr = UCHARAT(locinput);
2304 break;
a0ed51b3 2305 }
d6a28714
JH
2306 if (OP(scan) == NSPACEUTF8
2307 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2308 sayNO;
76e3520e 2309 nextchr = UCHARAT(++locinput);
a0d0e21e 2310 break;
d6a28714 2311 case DIGITL:
a0ed51b3
LW
2312 PL_reg_flags |= RF_tainted;
2313 /* FALL THROUGH */
d6a28714 2314 case DIGIT:
9442cb0e 2315 if (!nextchr)
a0ed51b3 2316 sayNO;
d6a28714
JH
2317 if (!(OP(scan) == DIGIT
2318 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2319 sayNO;
76e3520e 2320 nextchr = UCHARAT(++locinput);
a0d0e21e 2321 break;
d6a28714 2322 case DIGITLUTF8:
a0ed51b3
LW
2323 PL_reg_flags |= RF_tainted;
2324 /* FALL THROUGH */
d6a28714 2325 case DIGITUTF8:
a0ed51b3
LW
2326 if (!nextchr)
2327 sayNO;
2328 if (nextchr & 0x80) {
9442cb0e
GS
2329 if (!(OP(scan) == DIGITUTF8
2330 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2331 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2332 {
a0ed51b3 2333 sayNO;
dfe13c55 2334 }
6f06b55f 2335 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2336 nextchr = UCHARAT(locinput);
2337 break;
2338 }
9442cb0e
GS
2339 if (!(OP(scan) == DIGITUTF8
2340 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2341 sayNO;
2342 nextchr = UCHARAT(++locinput);
2343 break;
d6a28714 2344 case NDIGITL:
b8c5462f
JH
2345 PL_reg_flags |= RF_tainted;
2346 /* FALL THROUGH */
d6a28714 2347 case NDIGIT:
9442cb0e 2348 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2349 sayNO;
9442cb0e 2350 if (OP(scan) == NDIGIT
d6a28714 2351 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2352 sayNO;
76e3520e 2353 nextchr = UCHARAT(++locinput);
a0d0e21e 2354 break;
d6a28714 2355 case NDIGITLUTF8:
b8c5462f
JH
2356 PL_reg_flags |= RF_tainted;
2357 /* FALL THROUGH */
d6a28714 2358 case NDIGITUTF8:
b8c5462f
JH
2359 if (!nextchr && locinput >= PL_regeol)
2360 sayNO;
a0ed51b3 2361 if (nextchr & 0x80) {
9442cb0e
GS
2362 if (OP(scan) == NDIGITUTF8
2363 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2364 : isDIGIT_LC_utf8((U8*)locinput))
2365 {
a0ed51b3 2366 sayNO;
9442cb0e 2367 }
6f06b55f 2368 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2369 nextchr = UCHARAT(locinput);
2370 break;
2371 }
9442cb0e
GS
2372 if (OP(scan) == NDIGITUTF8
2373 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2374 sayNO;
2375 nextchr = UCHARAT(++locinput);
2376 break;
2377 case CLUMP:
dfe13c55 2378 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2379 sayNO;
6f06b55f 2380 locinput += PL_utf8skip[nextchr];
dfe13c55 2381 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2382 locinput += UTF8SKIP(locinput);
2383 if (locinput > PL_regeol)
2384 sayNO;
2385 nextchr = UCHARAT(locinput);
2386 break;
c8756f30 2387 case REFFL:
3280af22 2388 PL_reg_flags |= RF_tainted;
c8756f30 2389 /* FALL THROUGH */
c277df42 2390 case REF:
c8756f30 2391 case REFF:
c277df42 2392 n = ARG(scan); /* which paren pair */
cf93c79d 2393 ln = PL_regstartp[n];
2c2d71f5 2394 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2395 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2396 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2397 if (ln == PL_regendp[n])
a0d0e21e 2398 break;
a0ed51b3 2399
cf93c79d 2400 s = PL_bostr + ln;
a0ed51b3
LW
2401 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2402 char *l = locinput;
cf93c79d 2403 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2404 /*
2405 * Note that we can't do the "other character" lookup trick as
2406 * in the 8-bit case (no pun intended) because in Unicode we
2407 * have to map both upper and title case to lower case.
2408 */
2409 if (OP(scan) == REFF) {
2410 while (s < e) {
2411 if (l >= PL_regeol)
2412 sayNO;
dfe13c55 2413 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2414 sayNO;
2415 s += UTF8SKIP(s);
2416 l += UTF8SKIP(l);
2417 }
2418 }
2419 else {
2420 while (s < e) {
2421 if (l >= PL_regeol)
2422 sayNO;
dfe13c55 2423 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2424 sayNO;
2425 s += UTF8SKIP(s);
2426 l += UTF8SKIP(l);
2427 }
2428 }
2429 locinput = l;
2430 nextchr = UCHARAT(locinput);
2431 break;
2432 }
2433
a0d0e21e 2434 /* Inline the first character, for speed. */
76e3520e 2435 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2436 (OP(scan) == REF ||
2437 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2438 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2439 sayNO;
cf93c79d 2440 ln = PL_regendp[n] - ln;
3280af22 2441 if (locinput + ln > PL_regeol)
4633a7c4 2442 sayNO;
c8756f30
AK
2443 if (ln > 1 && (OP(scan) == REF
2444 ? memNE(s, locinput, ln)
2445 : (OP(scan) == REFF
2446 ? ibcmp(s, locinput, ln)
2447 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2448 sayNO;
a0d0e21e 2449 locinput += ln;
76e3520e 2450 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2451 break;
2452
2453 case NOTHING:
c277df42 2454 case TAIL:
a0d0e21e
LW
2455 break;
2456 case BACK:
2457 break;
c277df42
IZ
2458 case EVAL:
2459 {
2460 dSP;
533c011a 2461 OP_4tree *oop = PL_op;
3280af22
NIS
2462 COP *ocurcop = PL_curcop;
2463 SV **ocurpad = PL_curpad;
c277df42
IZ
2464 SV *ret;
2465
2466 n = ARG(scan);
533c011a 2467 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2468 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2469 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2470 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2471
cea2e8a9 2472 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2473 SPAGAIN;
2474 ret = POPs;
2475 PUTBACK;
2476
0f5d15d6
IZ
2477 PL_op = oop;
2478 PL_curpad = ocurpad;
2479 PL_curcop = ocurcop;
c277df42 2480 if (logical) {
0f5d15d6
IZ
2481 if (logical == 2) { /* Postponed subexpression. */
2482 regexp *re;
22c35a8c 2483 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2484 re_cc_state state;
0f5d15d6
IZ
2485 CHECKPOINT cp, lastcp;
2486
2487 if(SvROK(ret) || SvRMAGICAL(ret)) {
2488 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2489
2490 if(SvMAGICAL(sv))
2491 mg = mg_find(sv, 'r');
2492 }
2493 if (mg) {
2494 re = (regexp *)mg->mg_obj;
df0003d4 2495 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2496 }
2497 else {
2498 STRLEN len;
2499 char *t = SvPV(ret, len);
2500 PMOP pm;
2501 char *oprecomp = PL_regprecomp;
2502 I32 osize = PL_regsize;
2503 I32 onpar = PL_regnpar;
2504
2505 pm.op_pmflags = 0;
393fec97 2506 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2507 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2508 if (!(SvFLAGS(ret)
2509 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2510 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2511 PL_regprecomp = oprecomp;
2512 PL_regsize = osize;
2513 PL_regnpar = onpar;
2514 }
2515 DEBUG_r(
2516 PerlIO_printf(Perl_debug_log,
2517 "Entering embedded `%s%.60s%s%s'\n",
2518 PL_colors[0],
2519 re->precomp,
2520 PL_colors[1],
2521 (strlen(re->precomp) > 60 ? "..." : ""))
2522 );
2523 state.node = next;
2524 state.prev = PL_reg_call_cc;
2525 state.cc = PL_regcc;
2526 state.re = PL_reg_re;
2527
2ab05381 2528 PL_regcc = 0;
0f5d15d6
IZ
2529
2530 cp = regcppush(0); /* Save *all* the positions. */
3c42a273 2531 REGCP_SET(lastcp);
0f5d15d6
IZ
2532 cache_re(re);
2533 state.ss = PL_savestack_ix;
2534 *PL_reglastparen = 0;
2535 PL_reg_call_cc = &state;
2536 PL_reginput = locinput;
2c2d71f5
JH
2537
2538 /* XXXX This is too dramatic a measure... */
2539 PL_reg_maxiter = 0;
2540
0f5d15d6 2541 if (regmatch(re->program + 1)) {
2c914db6
IZ
2542 /* Even though we succeeded, we need to restore
2543 global variables, since we may be wrapped inside
2544 SUSPEND, thus the match may be not finished yet. */
2545
2546 /* XXXX Do this only if SUSPENDed? */
2547 PL_reg_call_cc = state.prev;
2548 PL_regcc = state.cc;
2549 PL_reg_re = state.re;
2550 cache_re(PL_reg_re);
2551
2552 /* XXXX This is too dramatic a measure... */
2553 PL_reg_maxiter = 0;
2554
2555 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2556 ReREFCNT_dec(re);
2557 regcpblow(cp);
2558 sayYES;
2559 }
0f5d15d6 2560 ReREFCNT_dec(re);
3c42a273 2561 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2562 regcppop();
2563 PL_reg_call_cc = state.prev;
2564 PL_regcc = state.cc;
2565 PL_reg_re = state.re;
d3790889 2566 cache_re(PL_reg_re);
2c2d71f5
JH
2567
2568 /* XXXX This is too dramatic a measure... */
2569 PL_reg_maxiter = 0;
2570
0f5d15d6
IZ
2571 sayNO;
2572 }
c277df42 2573 sw = SvTRUE(ret);
0f5d15d6 2574 logical = 0;
a0ed51b3
LW
2575 }
2576 else
3280af22 2577 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2578 break;
2579 }
a0d0e21e 2580 case OPEN:
c277df42 2581 n = ARG(scan); /* which paren pair */
3280af22
NIS
2582 PL_reg_start_tmp[n] = locinput;
2583 if (n > PL_regsize)
2584 PL_regsize = n;
a0d0e21e
LW
2585 break;
2586 case CLOSE:
c277df42 2587 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2588 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2589 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2590 if (n > *PL_reglastparen)
2591 *PL_reglastparen = n;
a0d0e21e 2592 break;
c277df42
IZ
2593 case GROUPP:
2594 n = ARG(scan); /* which paren pair */
cf93c79d 2595 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2596 break;
2597 case IFTHEN:
2c2d71f5 2598 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2599 if (sw)
2600 next = NEXTOPER(NEXTOPER(scan));
2601 else {
2602 next = scan + ARG(scan);
2603 if (OP(next) == IFTHEN) /* Fake one. */
2604 next = NEXTOPER(NEXTOPER(next));
2605 }
2606 break;
2607 case LOGICAL:
0f5d15d6 2608 logical = scan->flags;
c277df42 2609 break;
2ab05381
IZ
2610/*******************************************************************
2611 PL_regcc contains infoblock about the innermost (...)* loop, and
2612 a pointer to the next outer infoblock.
2613
2614 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2615
2616 1) After matching X, regnode for CURLYX is processed;
2617
2618 2) This regnode creates infoblock on the stack, and calls
2619 regmatch() recursively with the starting point at WHILEM node;
2620
2621 3) Each hit of WHILEM node tries to match A and Z (in the order
2622 depending on the current iteration, min/max of {min,max} and
2623 greediness). The information about where are nodes for "A"
2624 and "Z" is read from the infoblock, as is info on how many times "A"
2625 was already matched, and greediness.
2626
2627 4) After A matches, the same WHILEM node is hit again.
2628
2629 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2630 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2631 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2632 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2633 of the external loop.
2634
2635 Currently present infoblocks form a tree with a stem formed by PL_curcc
2636 and whatever it mentions via ->next, and additional attached trees
2637 corresponding to temporarily unset infoblocks as in "5" above.
2638
2639 In the following picture infoblocks for outer loop of
2640 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2641 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2642 infoblocks are drawn below the "reset" infoblock.
2643
2644 In fact in the picture below we do not show failed matches for Z and T
2645 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2646 more obvious *why* one needs to *temporary* unset infoblocks.]
2647
2648 Matched REx position InfoBlocks Comment
2649 (Y(A)*?Z)*?T x
2650 Y(A)*?Z)*?T x <- O
2651 Y (A)*?Z)*?T x <- O
2652 Y A)*?Z)*?T x <- O <- I
2653 YA )*?Z)*?T x <- O <- I
2654 YA A)*?Z)*?T x <- O <- I
2655 YAA )*?Z)*?T x <- O <- I
2656 YAA Z)*?T x <- O # Temporary unset I
2657 I
2658
2659 YAAZ Y(A)*?Z)*?T x <- O
2660 I
2661
2662 YAAZY (A)*?Z)*?T x <- O
2663 I
2664
2665 YAAZY A)*?Z)*?T x <- O <- I
2666 I
2667
2668 YAAZYA )*?Z)*?T x <- O <- I
2669 I
2670
2671 YAAZYA Z)*?T x <- O # Temporary unset I
2672 I,I
2673
2674 YAAZYAZ )*?T x <- O
2675 I,I
2676
2677 YAAZYAZ T x # Temporary unset O
2678 O
2679 I,I
2680
2681 YAAZYAZT x
2682 O
2683 I,I
2684 *******************************************************************/
a0d0e21e
LW
2685 case CURLYX: {
2686 CURCUR cc;
3280af22 2687 CHECKPOINT cp = PL_savestack_ix;
64313985
GS
2688 /* No need to save/restore up to this paren */
2689 I32 parenfloor = scan->flags;
c277df42
IZ
2690
2691 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2692 next += ARG(next);
3280af22
NIS
2693 cc.oldcc = PL_regcc;
2694 PL_regcc = &cc;
64313985
GS
2695 /* XXXX Probably it is better to teach regpush to support
2696 parenfloor > PL_regsize... */
2697 if (parenfloor > *PL_reglastparen)
2698 parenfloor = *PL_reglastparen; /* Pessimization... */
2699 cc.parenfloor = parenfloor;
a0d0e21e
LW
2700 cc.cur = -1;
2701 cc.min = ARG1(scan);
2702 cc.max = ARG2(scan);
c277df42 2703 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2704 cc.next = next;
2705 cc.minmod = minmod;
2706 cc.lastloc = 0;
3280af22 2707 PL_reginput = locinput;
a0d0e21e
LW
2708 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2709 regcpblow(cp);
3280af22 2710 PL_regcc = cc.oldcc;
4633a7c4 2711 saySAME(n);
a0d0e21e
LW
2712 }
2713 /* NOT REACHED */
2714 case WHILEM: {
2715 /*
2716 * This is really hard to understand, because after we match
2717 * what we're trying to match, we must make sure the rest of
2c2d71f5 2718 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2719 * to go back UP the parse tree by recursing ever deeper. And
2720 * if it fails, we have to reset our parent's current state
2721 * that we can try again after backing off.
2722 */
2723
c277df42 2724 CHECKPOINT cp, lastcp;
3280af22 2725 CURCUR* cc = PL_regcc;
c277df42
IZ
2726 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2727
4633a7c4 2728 n = cc->cur + 1; /* how many we know we matched */
3280af22 2729 PL_reginput = locinput;
a0d0e21e 2730
c277df42
IZ
2731 DEBUG_r(
2732 PerlIO_printf(Perl_debug_log,
2733 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2734 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2735 (long)n, (long)cc->min,
2736 (long)cc->max, (long)cc)
2737 );
4633a7c4 2738
a0d0e21e
LW
2739 /* If degenerate scan matches "", assume scan done. */
2740
579cf2c3 2741 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2742 PL_regcc = cc->oldcc;
2ab05381
IZ
2743 if (PL_regcc)
2744 ln = PL_regcc->cur;
c277df42 2745 DEBUG_r(
c3464db5
DD
2746 PerlIO_printf(Perl_debug_log,
2747 "%*s empty match detected, try continuation...\n",
3280af22 2748 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2749 );
a0d0e21e 2750 if (regmatch(cc->next))
4633a7c4 2751 sayYES;
2ab05381
IZ
2752 if (PL_regcc)
2753 PL_regcc->cur = ln;
3280af22 2754 PL_regcc = cc;
4633a7c4 2755 sayNO;
a0d0e21e
LW
2756 }
2757
2758 /* First just match a string of min scans. */
2759
2760 if (n < cc->min) {
2761 cc->cur = n;
2762 cc->lastloc = locinput;
4633a7c4
LW
2763 if (regmatch(cc->scan))
2764 sayYES;
2765 cc->cur = n - 1;
c277df42 2766 cc->lastloc = lastloc;
4633a7c4 2767 sayNO;
a0d0e21e
LW
2768 }
2769
2c2d71f5
JH
2770 if (scan->flags) {
2771 /* Check whether we already were at this position.
2772 Postpone detection until we know the match is not
2773 *that* much linear. */
2774 if (!PL_reg_maxiter) {
2775 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2776 PL_reg_leftiter = PL_reg_maxiter;
2777 }
2778 if (PL_reg_leftiter-- == 0) {
2779 I32 size = (PL_reg_maxiter + 7)/8;
2780 if (PL_reg_poscache) {
2781 if (PL_reg_poscache_size < size) {
2782 Renew(PL_reg_poscache, size, char);
2783 PL_reg_poscache_size = size;
2784 }
2785 Zero(PL_reg_poscache, size, char);
2786 }
2787 else {
2788 PL_reg_poscache_size = size;
2789 Newz(29, PL_reg_poscache, size, char);
2790 }
2791 DEBUG_r(
2792 PerlIO_printf(Perl_debug_log,
2793 "%sDetected a super-linear match, switching on caching%s...\n",
2794 PL_colors[4], PL_colors[5])
2795 );
2796 }
2797 if (PL_reg_leftiter < 0) {
2798 I32 o = locinput - PL_bostr, b;
2799
2800 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2801 b = o % 8;
2802 o /= 8;
2803 if (PL_reg_poscache[o] & (1<<b)) {
2804 DEBUG_r(
2805 PerlIO_printf(Perl_debug_log,
2806 "%*s already tried at this position...\n",
2807 REPORT_CODE_OFF+PL_regindent*2, "")
2808 );
7821416a 2809 sayNO_SILENT;
2c2d71f5
JH
2810 }
2811 PL_reg_poscache[o] |= (1<<b);
2812 }
2813 }
2814
a0d0e21e
LW
2815 /* Prefer next over scan for minimal matching. */
2816
2817 if (cc->minmod) {
3280af22 2818 PL_regcc = cc->oldcc;
2ab05381
IZ
2819 if (PL_regcc)
2820 ln = PL_regcc->cur;
5f05dabc 2821 cp = regcppush(cc->parenfloor);
3c42a273 2822 REGCP_SET(lastcp);
5f05dabc 2823 if (regmatch(cc->next)) {
c277df42 2824 regcpblow(cp);
4633a7c4 2825 sayYES; /* All done. */
5f05dabc 2826 }
3c42a273 2827 REGCP_UNWIND(lastcp);
5f05dabc 2828 regcppop();
2ab05381
IZ
2829 if (PL_regcc)
2830 PL_regcc->cur = ln;
3280af22 2831 PL_regcc = cc;
a0d0e21e 2832
c277df42 2833 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2834 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2835 && !(PL_reg_flags & RF_warned)) {
2836 PL_reg_flags |= RF_warned;
e476b1b5 2837 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2838 "Complex regular subexpression recursion",
2839 REG_INFTY - 1);
c277df42 2840 }
4633a7c4 2841 sayNO;
c277df42 2842 }
a687059c 2843
c277df42 2844 DEBUG_r(
c3464db5
DD
2845 PerlIO_printf(Perl_debug_log,
2846 "%*s trying longer...\n",
3280af22 2847 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2848 );
a0d0e21e 2849 /* Try scanning more and see if it helps. */
3280af22 2850 PL_reginput = locinput;
a0d0e21e
LW
2851 cc->cur = n;
2852 cc->lastloc = locinput;
5f05dabc 2853 cp = regcppush(cc->parenfloor);
3c42a273 2854 REGCP_SET(lastcp);
5f05dabc 2855 if (regmatch(cc->scan)) {
c277df42 2856 regcpblow(cp);
4633a7c4 2857 sayYES;
5f05dabc 2858 }
3c42a273 2859 REGCP_UNWIND(lastcp);
5f05dabc 2860 regcppop();
4633a7c4 2861 cc->cur = n - 1;
c277df42 2862 cc->lastloc = lastloc;
4633a7c4 2863 sayNO;
a0d0e21e
LW
2864 }
2865
2866 /* Prefer scan over next for maximal matching. */
2867
2868 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2869 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2870 cc->cur = n;
2871 cc->lastloc = locinput;
3c42a273 2872 REGCP_SET(lastcp);
5f05dabc 2873 if (regmatch(cc->scan)) {
c277df42 2874 regcpblow(cp);
4633a7c4 2875 sayYES;
5f05dabc 2876 }
3c42a273 2877 REGCP_UNWIND(lastcp);
a0d0e21e 2878 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2879 PL_reginput = locinput;
c277df42 2880 DEBUG_r(
c3464db5
DD
2881 PerlIO_printf(Perl_debug_log,
2882 "%*s failed, try continuation...\n",
3280af22 2883 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2884 );
2885 }
e476b1b5 2886 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2887 && !(PL_reg_flags & RF_warned)) {
3280af22 2888 PL_reg_flags |= RF_warned;
e476b1b5 2889 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2890 "Complex regular subexpression recursion",
2891 REG_INFTY - 1);
a0d0e21e
LW
2892 }
2893
2894 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2895 PL_regcc = cc->oldcc;
2ab05381
IZ
2896 if (PL_regcc)
2897 ln = PL_regcc->cur;
a0d0e21e 2898 if (regmatch(cc->next))
4633a7c4 2899 sayYES;
2ab05381
IZ
2900 if (PL_regcc)
2901 PL_regcc->cur = ln;
3280af22 2902 PL_regcc = cc;
4633a7c4 2903 cc->cur = n - 1;
c277df42 2904 cc->lastloc = lastloc;
4633a7c4 2905 sayNO;
a0d0e21e
LW
2906 }
2907 /* NOT REACHED */
c277df42
IZ
2908 case BRANCHJ:
2909 next = scan + ARG(scan);
2910 if (next == scan)
2911 next = NULL;
2912 inner = NEXTOPER(NEXTOPER(scan));
2913 goto do_branch;
2914 case BRANCH:
2915 inner = NEXTOPER(scan);
2916 do_branch:
2917 {
2918 CHECKPOINT lastcp;
2919 c1 = OP(scan);
2920 if (OP(next) != c1) /* No choice. */
2921 next = inner; /* Avoid recursion. */
a0d0e21e 2922 else {
3c42a273
JH
2923 I32 lastparen = *PL_reglastparen;
2924 I32 unwind1;
2925 re_unwind_branch_t *uw;
2926
2927 /* Put unwinding data on stack */
2928 unwind1 = SSNEWt(1,re_unwind_branch_t);
2929 uw = SSPTRt(unwind1,re_unwind_branch_t);
2930 uw->prev = unwind;
2931 unwind = unwind1;
2932 uw->type = ((c1 == BRANCH)
2933 ? RE_UNWIND_BRANCH
2934 : RE_UNWIND_BRANCHJ);
2935 uw->lastparen = lastparen;
2936 uw->next = next;
2937 uw->locinput = locinput;
2938 uw->nextchr = nextchr;
2939#ifdef DEBUGGING
2940 uw->regindent = ++PL_regindent;
2941#endif
c277df42 2942
3c42a273
JH
2943 REGCP_SET(uw->lastcp);
2944
2945 /* Now go into the first branch */
2946 next = inner;
a687059c 2947 }
a0d0e21e
LW
2948 }
2949 break;
2950 case MINMOD:
2951 minmod = 1;
2952 break;
c277df42
IZ
2953 case CURLYM:
2954 {
00db4c45 2955 I32 l = 0;
c277df42
IZ
2956 CHECKPOINT lastcp;
2957
2958 /* We suppose that the next guy does not need
2959 backtracking: in particular, it is of constant length,
2960 and has no parenths to influence future backrefs. */
2961 ln = ARG1(scan); /* min to match */
2962 n = ARG2(scan); /* max to match */
c277df42
IZ
2963 paren = scan->flags;
2964 if (paren) {
3280af22
NIS
2965 if (paren > PL_regsize)
2966 PL_regsize = paren;
2967 if (paren > *PL_reglastparen)
2968 *PL_reglastparen = paren;
c277df42 2969 }
dc45a647 2970 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2971 if (paren)
2972 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2973 PL_reginput = locinput;
c277df42
IZ
2974 if (minmod) {
2975 minmod = 0;
2976 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2977 sayNO;
5f4b28b2 2978 if (ln && l == 0 && n >= ln
c277df42
IZ
2979 /* In fact, this is tricky. If paren, then the
2980 fact that we did/didnot match may influence
2981 future execution. */
2982 && !(paren && ln == 0))
2983 ln = n;
3280af22 2984 locinput = PL_reginput;
22c35a8c 2985 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2986 c1 = (U8)*STRING(next);
c277df42 2987 if (OP(next) == EXACTF)
22c35a8c 2988 c2 = PL_fold[c1];
c277df42 2989 else if (OP(next) == EXACTFL)
22c35a8c 2990 c2 = PL_fold_locale[c1];
c277df42
IZ
2991 else
2992 c2 = c1;
a0ed51b3
LW
2993 }
2994 else
c277df42 2995 c1 = c2 = -1000;
3c42a273 2996 REGCP_SET(lastcp);
5f4b28b2 2997 /* This may be improved if l == 0. */
c277df42
IZ
2998 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2999 /* If it could work, try it. */
3000 if (c1 == -1000 ||
3280af22
NIS
3001 UCHARAT(PL_reginput) == c1 ||
3002 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3003 {
3004 if (paren) {
3005 if (n) {
cf93c79d
IZ
3006 PL_regstartp[paren] =
3007 HOPc(PL_reginput, -l) - PL_bostr;
3008 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3009 }
3010 else
cf93c79d 3011 PL_regendp[paren] = -1;
c277df42
IZ
3012 }
3013 if (regmatch(next))
3014 sayYES;
3c42a273 3015 REGCP_UNWIND(lastcp);
c277df42
IZ
3016 }
3017 /* Couldn't or didn't -- move forward. */
3280af22 3018 PL_reginput = locinput;
c277df42
IZ
3019 if (regrepeat_hard(scan, 1, &l)) {
3020 ln++;
3280af22 3021 locinput = PL_reginput;
c277df42
IZ
3022 }
3023 else
3024 sayNO;
3025 }
a0ed51b3
LW
3026 }
3027 else {
c277df42
IZ
3028 n = regrepeat_hard(scan, n, &l);
3029 if (n != 0 && l == 0
3030 /* In fact, this is tricky. If paren, then the
3031 fact that we did/didnot match may influence
3032 future execution. */
3033 && !(paren && ln == 0))
3034 ln = n;
3280af22 3035 locinput = PL_reginput;
c277df42 3036 DEBUG_r(
5c0ca799 3037 PerlIO_printf(Perl_debug_log,
faccc32b 3038 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3039 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3040 (IV) n, (IV)l)
c277df42
IZ
3041 );
3042 if (n >= ln) {
22c35a8c 3043 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3044 c1 = (U8)*STRING(next);
c277df42 3045 if (OP(next) == EXACTF)
22c35a8c 3046 c2 = PL_fold[c1];
c277df42 3047 else if (OP(next) == EXACTFL)
22c35a8c 3048 c2 = PL_fold_locale[c1];
c277df42
IZ
3049 else
3050 c2 = c1;
a0ed51b3
LW
3051 }
3052 else
c277df42
IZ
3053 c1 = c2 = -1000;
3054 }
3c42a273 3055 REGCP_SET(lastcp);
c277df42
IZ
3056 while (n >= ln) {
3057 /* If it could work, try it. */
3058 if (c1 == -1000 ||
3280af22
NIS
3059 UCHARAT(PL_reginput) == c1 ||
3060 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3061 {
3062 DEBUG_r(
c3464db5 3063 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3064 "%*s trying tail with n=%"IVdf"...\n",
3065 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3066 );
3067 if (paren) {
3068 if (n) {
cf93c79d
IZ
3069 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3070 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3071 }
a0ed51b3 3072 else
cf93c79d 3073 PL_regendp[paren] = -1;
c277df42 3074 }
a0ed51b3
LW
3075 if (regmatch(next))
3076 sayYES;
3c42a273 3077 REGCP_UNWIND(lastcp);
a0ed51b3 3078 }
c277df42
IZ
3079 /* Couldn't or didn't -- back up. */
3080 n--;
dfe13c55 3081 locinput = HOPc(locinput, -l);
3280af22 3082 PL_reginput = locinput;
c277df42
IZ
3083 }
3084 }
3085 sayNO;
3086 break;
3087 }
3088 case CURLYN:
3089 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3090 if (paren > PL_regsize)
3091 PL_regsize = paren;
3092 if (paren > *PL_reglastparen)
3093 *PL_reglastparen = paren;
c277df42
IZ
3094 ln = ARG1(scan); /* min to match */
3095 n = ARG2(scan); /* max to match */
dc45a647 3096 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3097 goto repeat;
a0d0e21e 3098 case CURLY:
c277df42 3099 paren = 0;
a0d0e21e
LW
3100 ln = ARG1(scan); /* min to match */
3101 n = ARG2(scan); /* max to match */
dc45a647 3102 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3103 goto repeat;
3104 case STAR:
3105 ln = 0;
c277df42 3106 n = REG_INFTY;
a0d0e21e 3107 scan = NEXTOPER(scan);
c277df42 3108 paren = 0;
a0d0e21e
LW
3109 goto repeat;
3110 case PLUS:
c277df42
IZ
3111 ln = 1;
3112 n = REG_INFTY;
3113 scan = NEXTOPER(scan);
3114 paren = 0;
3115 repeat:
a0d0e21e
LW
3116 /*
3117 * Lookahead to avoid useless match attempts
3118 * when we know what character comes next.
3119 */
22c35a8c 3120 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3121 c1 = (U8)*STRING(next);
bbce6d69 3122 if (OP(next) == EXACTF)
22c35a8c 3123 c2 = PL_fold[c1];
bbce6d69 3124 else if (OP(next) == EXACTFL)
22c35a8c 3125 c2 = PL_fold_locale[c1];
bbce6d69 3126 else
3127 c2 = c1;
3128 }
a0d0e21e 3129 else
bbce6d69 3130 c1 = c2 = -1000;
3280af22 3131 PL_reginput = locinput;
a0d0e21e 3132 if (minmod) {
c277df42 3133 CHECKPOINT lastcp;
a0d0e21e
LW
3134 minmod = 0;
3135 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3136 sayNO;
a0ed51b3 3137 locinput = PL_reginput;
3c42a273 3138 REGCP_SET(lastcp);
0fe9bf95
IZ
3139 if (c1 != -1000) {
3140 char *e = locinput + n - ln; /* Should not check after this */
3141 char *old = locinput;
3142
3143 if (e >= PL_regeol || (n == REG_INFTY))
3144 e = PL_regeol - 1;
3145 while (1) {
3146 /* Find place 'next' could work */
3147 if (c1 == c2) {
3148 while (locinput <= e && *locinput != c1)
3149 locinput++;
3150 } else {
3151 while (locinput <= e
3152 && *locinput != c1
3153 && *locinput != c2)
3154 locinput++;
3155 }
3156 if (locinput > e)
3157 sayNO;
3158 /* PL_reginput == old now */
3159 if (locinput != old) {
3160 ln = 1; /* Did some */
3161 if (regrepeat(scan, locinput - old) <
3162 locinput - old)
3163 sayNO;
3164 }
3165 /* PL_reginput == locinput now */
67368f91 3166 TRYPAREN(paren, ln, locinput);
0fe9bf95 3167 PL_reginput = locinput; /* Could be reset... */
3c42a273 3168 REGCP_UNWIND(lastcp);
0fe9bf95
IZ
3169 /* Couldn't or didn't -- move forward. */
3170 old = locinput++;
3171 }
3172 }
3173 else
c277df42 3174 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3175 /* If it could work, try it. */
bbce6d69 3176 if (c1 == -1000 ||
3280af22
NIS
3177 UCHARAT(PL_reginput) == c1 ||
3178 UCHARAT(PL_reginput) == c2)
bbce6d69 3179 {
67368f91 3180 TRYPAREN(paren, n, PL_reginput);
3c42a273 3181 REGCP_UNWIND(lastcp);
bbce6d69 3182 }
c277df42 3183 /* Couldn't or didn't -- move forward. */
a0ed51b3 3184 PL_reginput = locinput;
a0d0e21e
LW
3185 if (regrepeat(scan, 1)) {
3186 ln++;
a0ed51b3
LW
3187 locinput = PL_reginput;
3188 }
3189 else
4633a7c4 3190 sayNO;
a0d0e21e
LW
3191 }
3192 }
3193 else {
c277df42 3194 CHECKPOINT lastcp;
a0d0e21e 3195 n = regrepeat(scan, n);
a0ed51b3 3196 locinput = PL_reginput;
22c35a8c 3197 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3198 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3199 ln = n; /* why back off? */
1aeab75a
GS
3200 /* ...because $ and \Z can match before *and* after
3201 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3202 We should back off by one in this case. */
3203 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3204 ln--;
3205 }
3c42a273 3206 REGCP_SET(lastcp);
c277df42
IZ
3207 if (paren) {
3208 while (n >= ln) {
3209 /* If it could work, try it. */
3210 if (c1 == -1000 ||
3280af22
NIS
3211 UCHARAT(PL_reginput) == c1 ||
3212 UCHARAT(PL_reginput) == c2)
c277df42 3213 {
67368f91 3214 TRYPAREN(paren, n, PL_reginput);
3c42a273 3215 REGCP_UNWIND(lastcp);
c277df42
IZ
3216 }
3217 /* Couldn't or didn't -- back up. */
3218 n--;
dfe13c55 3219 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3220 }
a0ed51b3
LW
3221 }
3222 else {
c277df42
IZ
3223 while (n >= ln) {
3224 /* If it could work, try it. */
3225 if (c1 == -1000 ||
3280af22
NIS
3226 UCHARAT(PL_reginput) == c1 ||
3227 UCHARAT(PL_reginput) == c2)
c277df42 3228 {
67368f91 3229 TRYPAREN(paren, n, PL_reginput);
3c42a273 3230 REGCP_UNWIND(lastcp);
c277df42
IZ
3231 }
3232 /* Couldn't or didn't -- back up. */
3233 n--;
dfe13c55 3234 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3235 }
a0d0e21e
LW
3236 }
3237 }
4633a7c4 3238 sayNO;
c277df42 3239 break;
a0d0e21e 3240 case END:
0f5d15d6
IZ
3241 if (PL_reg_call_cc) {
3242 re_cc_state *cur_call_cc = PL_reg_call_cc;
3243 CURCUR *cctmp = PL_regcc;
3244 regexp *re = PL_reg_re;
3245 CHECKPOINT cp, lastcp;
3246
3247 cp = regcppush(0); /* Save *all* the positions. */
3c42a273 3248 REGCP_SET(lastcp);
0f5d15d6
IZ
3249 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3250 the caller. */
3251 PL_reginput = locinput; /* Make position available to
3252 the callcc. */
3253 cache_re(PL_reg_call_cc->re);
3254 PL_regcc = PL_reg_call_cc->cc;
3255 PL_reg_call_cc = PL_reg_call_cc->prev;
3256 if (regmatch(cur_call_cc->node)) {
3257 PL_reg_call_cc = cur_call_cc;
3258 regcpblow(cp);
3259 sayYES;
3260 }
3c42a273 3261 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3262 regcppop();
3263 PL_reg_call_cc = cur_call_cc;
3264 PL_regcc = cctmp;
3265 PL_reg_re = re;
3266 cache_re(re);
3267
3268 DEBUG_r(
3269 PerlIO_printf(Perl_debug_log,
3270 "%*s continuation failed...\n",
3271 REPORT_CODE_OFF+PL_regindent*2, "")
3272 );
7821416a 3273 sayNO_SILENT;
0f5d15d6 3274 }
7821416a
IZ
3275 if (locinput < PL_regtill) {
3276 DEBUG_r(PerlIO_printf(Perl_debug_log,
3277 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3278 PL_colors[4],
3279 (long)(locinput - PL_reg_starttry),
3280 (long)(PL_regtill - PL_reg_starttry),
3281 PL_colors[5]));
3282 sayNO_FINAL; /* Cannot match: too short. */
3283 }
3284 PL_reginput = locinput; /* put where regtry can find it */
3285 sayYES_FINAL; /* Success! */
7e5428c5 3286 case SUCCEED:
3280af22 3287 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3288 sayYES_LOUD; /* Success! */
c277df42
IZ
3289 case SUSPEND:
3290 n = 1;
9fe1d20c 3291 PL_reginput = locinput;
c277df42 3292 goto do_ifmatch;
a0d0e21e 3293 case UNLESSM:
c277df42 3294 n = 0;
a0ed51b3 3295 if (scan->flags) {
0fe9bf95
IZ
3296 if (UTF) { /* XXXX This is absolutely
3297 broken, we read before
3298 start of string. */
3299 s = HOPMAYBEc(locinput, -scan->flags);
3300 if (!s)
3301 goto say_yes;
3302 PL_reginput = s;
3303 }
3304 else {
3305 if (locinput < PL_bostr + scan->flags)
3306 goto say_yes;
3307 PL_reginput = locinput - scan->flags;
3308 goto do_ifmatch;
3309 }
a0ed51b3
LW
3310 }
3311 else
3312 PL_reginput = locinput;
c277df42
IZ
3313 goto do_ifmatch;
3314 case IFMATCH:
3315 n = 1;
a0ed51b3 3316 if (scan->flags) {
0fe9bf95
IZ
3317 if (UTF) { /* XXXX This is absolutely
3318 broken, we read before
3319 start of string. */
3320 s = HOPMAYBEc(locinput, -scan->flags);
3321 if (!s || s < PL_bostr)
3322 goto say_no;
3323 PL_reginput = s;
3324 }
3325 else {
3326 if (locinput < PL_bostr + scan->flags)
3327 goto say_no;
3328 PL_reginput = locinput - scan->flags;
3329 goto do_ifmatch;
3330 }
a0ed51b3
LW
3331 }
3332 else
3333 PL_reginput = locinput;
3334
c277df42 3335 do_ifmatch:
c277df42
IZ
3336 inner = NEXTOPER(NEXTOPER(scan));
3337 if (regmatch(inner) != n) {
3338 say_no:
3339 if (logical) {
3340 logical = 0;
3341 sw = 0;
3342 goto do_longjump;
a0ed51b3
LW
3343 }
3344 else
c277df42
IZ
3345 sayNO;
3346 }
3347 say_yes:
3348 if (logical) {
3349 logical = 0;
3350 sw = 1;
3351 }
fe44a5e8 3352 if (OP(scan) == SUSPEND) {
3280af22 3353 locinput = PL_reginput;
565764a8 3354 nextchr = UCHARAT(locinput);
fe44a5e8 3355 }
c277df42
IZ
3356 /* FALL THROUGH. */
3357 case LONGJMP:
3358 do_longjump:
3359 next = scan + ARG(scan);
3360 if (next == scan)
3361 next = NULL;
a0d0e21e
LW
3362 break;
3363 default:
b900a521 3364 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3365 PTR2UV(scan), OP(scan));
cea2e8a9 3366 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3367 }
3c42a273 3368 reenter:
a0d0e21e
LW
3369 scan = next;
3370 }
a687059c 3371
a0d0e21e
LW
3372 /*
3373 * We get here only if there's trouble -- normally "case END" is
3374 * the terminating point.
3375 */
cea2e8a9 3376 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3377 /*NOTREACHED*/
4633a7c4
LW
3378 sayNO;
3379
7821416a
IZ
3380yes_loud:
3381 DEBUG_r(
3382 PerlIO_printf(Perl_debug_log,
3383 "%*s %scould match...%s\n",
3384 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3385 );
3386 goto yes;
3387yes_final:
3388 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3389 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3390yes:
3391#ifdef DEBUGGING
3280af22 3392 PL_regindent--;
4633a7c4 3393#endif
3c42a273
JH
3394
3395#if 0 /* Breaks $^R */
3396 if (unwind)
3397 regcpblow(firstcp);
3398#endif
4633a7c4
LW
3399 return 1;
3400
3401no:
7821416a
IZ
3402 DEBUG_r(
3403 PerlIO_printf(Perl_debug_log,
3404 "%*s %sfailed...%s\n",
3405 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3406 );
3407 goto do_no;
3408no_final:
3409do_no:
3c42a273
JH
3410 if (unwind) {
3411 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3412
3413 switch (uw->type) {
3414 case RE_UNWIND_BRANCH:
3415 case RE_UNWIND_BRANCHJ:
3416 {
3417 re_unwind_branch_t *uwb = &(uw->branch);
3418 I32 lastparen = uwb->lastparen;
3419
3420 REGCP_UNWIND(uwb->lastcp);
3421 for (n = *PL_reglastparen; n > lastparen; n--)
3422 PL_regendp[n] = -1;
3423 *PL_reglastparen = n;
3424 scan = next = uwb->next;
3425 if ( !scan ||
3426 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3427 ? BRANCH : BRANCHJ) ) { /* Failure */
3428 unwind = uwb->prev;
3429#ifdef DEBUGGING
3430 PL_regindent--;
3431#endif
3432 goto do_no;
3433 }
3434 /* Have more choice yet. Reuse the same uwb. */
3435 /*SUPPRESS 560*/
3436 if ((n = (uwb->type == RE_UNWIND_BRANCH
3437 ? NEXT_OFF(next) : ARG(next))))
3438 next += n;
3439 else
3440 next = NULL; /* XXXX Needn't unwinding in this case... */
3441 uwb->next = next;
3442 next = NEXTOPER(scan);
3443 if (uwb->type == RE_UNWIND_BRANCHJ)
3444 next = NEXTOPER(next);
3445 locinput = uwb->locinput;
3446 nextchr = uwb->nextchr;
3447#ifdef DEBUGGING
3448 PL_regindent = uwb->regindent;
3449#endif
3450
3451 goto reenter;
3452 }
3453 /* NOT REACHED */
3454 default:
3455 Perl_croak(aTHX_ "regexp unwind memory corruption");
3456 }
3457 /* NOT REACHED */
3458 }
4633a7c4 3459#ifdef DEBUGGING
3280af22 3460 PL_regindent--;
4633a7c4 3461#endif
a0d0e21e 3462 return 0;
a687059c
LW
3463}
3464
3465/*
3466 - regrepeat - repeatedly match something simple, report how many
3467 */
3468/*
3469 * [This routine now assumes that it will only match on things of length 1.
3470 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3471 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3472 */
76e3520e 3473STATIC I32
cea2e8a9 3474S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3475{
a0d0e21e 3476 register char *scan;
a0d0e21e 3477 register I32 c;
3280af22 3478 register char *loceol = PL_regeol;
a0ed51b3 3479 register I32 hardcount = 0;
a0d0e21e 3480
3280af22 3481 scan = PL_reginput;
c277df42 3482 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3483 loceol = scan + max;
a0d0e21e 3484 switch (OP(p)) {
22c35a8c 3485 case REG_ANY:
a0d0e21e
LW
3486 while (scan < loceol && *scan != '\n')
3487 scan++;
3488 break;
3489 case SANY:
3490 scan = loceol;
3491 break;
a0ed51b3
LW
3492 case ANYUTF8:
3493 loceol = PL_regeol;
3494 while (scan < loceol && *scan != '\n') {
3495 scan += UTF8SKIP(scan);
3496 hardcount++;
3497 }
3498 break;
3499 case SANYUTF8:
3500 loceol = PL_regeol;
3501 while (scan < loceol) {
3502 scan += UTF8SKIP(scan);
3503 hardcount++;
3504 }
3505 break;
bbce6d69 3506 case EXACT: /* length of string is 1 */
cd439c50 3507 c = (U8)*STRING(p);
bbce6d69 3508 while (scan < loceol && UCHARAT(scan) == c)
3509 scan++;
3510 break;
3511 case EXACTF: /* length of string is 1 */
cd439c50 3512 c = (U8)*STRING(p);
bbce6d69 3513 while (scan < loceol &&
22c35a8c 3514 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3515 scan++;
3516 break;
3517 case EXACTFL: /* length of string is 1 */
3280af22 3518 PL_reg_flags |= RF_tainted;
cd439c50 3519 c = (U8)*STRING(p);
bbce6d69 3520 while (scan < loceol &&
22c35a8c 3521 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3522 scan++;
3523 break;
a0ed51b3
LW
3524 case ANYOFUTF8:
3525 loceol = PL_regeol;
3526 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3527 scan += UTF8SKIP(scan);
3528 hardcount++;
3529 }
3530 break;
a0d0e21e 3531 case ANYOF:
936ed897 3532 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3533 scan++;
a0d0e21e
LW
3534 break;
3535 case ALNUM:
3536 while (scan < loceol && isALNUM(*scan))
3537 scan++;
3538 break;
a0ed51b3
LW
3539 case ALNUMUTF8:
3540 loceol = PL_regeol;
dfe13c55 3541 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3542 scan += UTF8SKIP(scan);
3543 hardcount++;
3544 }
3545 break;
bbce6d69 3546 case ALNUML:
3280af22 3547 PL_reg_flags |= RF_tainted;
bbce6d69 3548 while (scan < loceol && isALNUM_LC(*scan))
3549 scan++;
3550 break;
a0ed51b3
LW
3551 case ALNUMLUTF8:
3552 PL_reg_flags |= RF_tainted;
3553 loceol = PL_regeol;
dfe13c55 3554 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3555 scan += UTF8SKIP(scan);
3556 hardcount++;
3557 }
3558 break;
3559 break;
a0d0e21e
LW
3560 case NALNUM:
3561 while (scan < loceol && !isALNUM(*scan))
3562 scan++;
3563 break;
a0ed51b3
LW
3564 case NALNUMUTF8:
3565 loceol = PL_regeol;
dfe13c55 3566 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3567 scan += UTF8SKIP(scan);
3568 hardcount++;
3569 }
3570 break;
bbce6d69 3571 case NALNUML:
3280af22 3572 PL_reg_flags |= RF_tainted;
bbce6d69 3573 while (scan < loceol && !isALNUM_LC(*scan))
3574 scan++;
3575 break;
a0ed51b3
LW
3576 case NALNUMLUTF8:
3577 PL_reg_flags |= RF_tainted;
3578 loceol = PL_regeol;
dfe13c55 3579 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3580 scan += UTF8SKIP(scan);
3581 hardcount++;
3582 }
3583 break;
a0d0e21e
LW
3584 case SPACE:
3585 while (scan < loceol && isSPACE(*scan))
3586 scan++;
3587 break;
a0ed51b3
LW
3588 case SPACEUTF8:
3589 loceol = PL_regeol;
dfe13c55 3590 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3591 scan += UTF8SKIP(scan);
3592 hardcount++;
3593 }
3594 break;
bbce6d69 3595 case SPACEL:
3280af22 3596 PL_reg_flags |= RF_tainted;
bbce6d69 3597 while (scan < loceol && isSPACE_LC(*scan))
3598 scan++;
3599 break;
a0ed51b3
LW
3600 case SPACELUTF8:
3601 PL_reg_flags |= RF_tainted;
3602 loceol = PL_regeol;
dfe13c55 3603 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3604 scan += UTF8SKIP(scan);
3605 hardcount++;
3606 }
3607 break;
a0d0e21e
LW
3608 case NSPACE:
3609 while (scan < loceol && !isSPACE(*scan))
3610 scan++;
3611 break;
a0ed51b3
LW
3612 case NSPACEUTF8:
3613 loceol = PL_regeol;
dfe13c55 3614 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3615 scan += UTF8SKIP(scan);
3616 hardcount++;
3617 }
3618 break;
bbce6d69 3619 case NSPACEL:
3280af22 3620 PL_reg_flags |= RF_tainted;
bbce6d69 3621 while (scan < loceol && !isSPACE_LC(*scan))
3622 scan++;
3623 break;
a0ed51b3
LW
3624 case NSPACELUTF8:
3625 PL_reg_flags |= RF_tainted;
3626 loceol = PL_regeol;
dfe13c55 3627 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3628 scan += UTF8SKIP(scan);
3629 hardcount++;
3630 }
3631 break;
a0d0e21e
LW
3632 case DIGIT:
3633 while (scan < loceol && isDIGIT(*scan))
3634 scan++;
3635 break;
a0ed51b3
LW
3636 case DIGITUTF8:
3637 loceol = PL_regeol;
dfe13c55 3638 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3639 scan += UTF8SKIP(scan);
3640 hardcount++;
3641 }
3642 break;
3643 break;
a0d0e21e
LW
3644 case NDIGIT:
3645 while (scan < loceol && !isDIGIT(*scan))
3646 scan++;
3647 break;
a0ed51b3
LW
3648 case NDIGITUTF8:
3649 loceol = PL_regeol;
dfe13c55 3650 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3651 scan += UTF8SKIP(scan);
3652 hardcount++;
3653 }
3654 break;
a0d0e21e
LW
3655 default: /* Called on something of 0 width. */
3656 break; /* So match right here or not at all. */
3657 }
a687059c 3658
a0ed51b3
LW
3659 if (hardcount)
3660 c = hardcount;
3661 else
3662 c = scan - PL_reginput;
3280af22 3663 PL_reginput = scan;
a687059c 3664
c277df42
IZ
3665 DEBUG_r(
3666 {
3667 SV *prop = sv_newmortal();
3668
3669 regprop(prop, p);
3670 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3671 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3672 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3673 });
3674
a0d0e21e 3675 return(c);
a687059c
LW
3676}
3677
3678/*
c277df42
IZ
3679 - regrepeat_hard - repeatedly match something, report total lenth and length
3680 *
3681 * The repeater is supposed to have constant length.
3682 */
3683
76e3520e 3684STATIC I32
cea2e8a9 3685S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3686{
3687 register char *scan;
3688 register char *start;
3280af22 3689 register char *loceol = PL_regeol;
a0ed51b3 3690 I32 l = 0;
708e3b05 3691 I32 count = 0, res = 1;
a0ed51b3
LW
3692
3693 if (!max)
3694 return 0;
c277df42 3695
3280af22 3696 start = PL_reginput;
a0ed51b3 3697 if (UTF) {
708e3b05 3698 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3699 if (!count++) {
3700 l = 0;
3701 while (start < PL_reginput) {
3702 l++;
3703 start += UTF8SKIP(start);
3704 }
3705 *lp = l;
3706 if (l == 0)
3707 return max;
3708 }
3709 if (count == max)
3710 return count;
3711 }
3712 }
3713 else {
708e3b05 3714 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3715 if (!count++) {
3716 *lp = l = PL_reginput - start;
3717 if (max != REG_INFTY && l*max < loceol - scan)
3718 loceol = scan + l*max;
3719 if (l == 0)
3720 return max;
c277df42
IZ
3721 }
3722 }
3723 }
708e3b05 3724 if (!res)
3280af22 3725 PL_reginput = scan;
c277df42 3726
a0ed51b3 3727 return count;
c277df42
IZ
3728}
3729
3730/*
cb8d8820 3731 - reginclass - determine if a character falls into a character class
bbce6d69 3732 */
3733
76e3520e 3734STATIC bool
936ed897 3735S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3736{
b8c5462f 3737 char flags = ANYOF_FLAGS(p);
bbce6d69 3738 bool match = FALSE;
3739
3740 c &= 0xFF;
b8c5462f 3741 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3742 match = TRUE;
3743 else if (flags & ANYOF_FOLD) {
3744 I32 cf;
3745 if (flags & ANYOF_LOCALE) {
3280af22 3746 PL_reg_flags |= RF_tainted;
22c35a8c 3747 cf = PL_fold_locale[c];
bbce6d69 3748 }
3749 else
22c35a8c 3750 cf = PL_fold[c];
b8c5462f 3751 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3752 match = TRUE;
3753 }
3754
b8c5462f 3755 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3756 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3757 if (
3758 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3759 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3760 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3761 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3762 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3763 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3764 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3765 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3766 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3767 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3768 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3769 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3770 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3771 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3772 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3773 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3774 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3775 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3776 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3777 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3778 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3779 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3780 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3781 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3782 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3a6f1039
GS
3783 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3784 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3785 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3c42a273 3786 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3a6f1039 3787 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
b8c5462f 3788 ) /* How's that for a conditional? */
bbce6d69 3789 {
3790 match = TRUE;
3791 }
3792 }
3793
ae5c130c 3794 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3795}
3796
a0ed51b3 3797STATIC bool
cea2e8a9 3798S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607 3799{
a0ed51b3
LW
3800 char flags = ARG1(f);
3801 bool match = FALSE;
3c42a273
JH
3802#ifdef DEBUGGING
3803 SV *rv = (SV*)PL_regdata->data[ARG2(f)];
3804 AV *av = (AV*)SvRV((SV*)rv);
3805 SV *sw = *av_fetch(av, 0, FALSE);
3806 SV *lv = *av_fetch(av, 1, FALSE);
3807#else
3808 SV *sw = (SV*)PL_regdata->data[ARG2(f)];
3809#endif
a0ed51b3 3810
3c42a273 3811 if (swash_fetch(sw, p))
a0ed51b3
LW
3812 match = TRUE;
3813 else if (flags & ANYOF_FOLD) {
feb4a48f 3814 U8 tmpbuf[UTF8_MAXLEN+1];
a0ed51b3
LW
3815 if (flags & ANYOF_LOCALE) {
3816 PL_reg_flags |= RF_tainted;
3817 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3818 }
3819 else
3820 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3c42a273 3821 if (swash_fetch(sw, tmpbuf))
a0ed51b3
LW
3822 match = TRUE;
3823 }
3824
b8c5462f 3825 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3826
3827 return (flags & ANYOF_INVERT) ? !match : match;
3828}
161b471a 3829
dfe13c55 3830STATIC U8 *
cea2e8a9 3831S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3832{
a0ed51b3
LW
3833 if (off >= 0) {
3834 while (off-- && s < (U8*)PL_regeol)
3835 s += UTF8SKIP(s);
3836 }
3837 else {
3838 while (off++) {
3839 if (s > (U8*)PL_bostr) {
3840 s--;
3841 if (*s & 0x80) {
3842 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3843 s--;
3844 } /* XXX could check well-formedness here */
3845 }
3846 }
3847 }
3848 return s;
3849}
161b471a 3850
dfe13c55 3851STATIC U8 *
cea2e8a9 3852S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3
LW
3853{
3854 if (off >= 0) {
3855 while (off-- && s < (U8*)PL_regeol)
3856 s += UTF8SKIP(s);
3857 if (off >= 0)
3858 return 0;
3859 }
3860 else {
3861 while (off++) {
3862 if (s > (U8*)PL_bostr) {
3863 s--;
3864 if (*s & 0x80) {
3865 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3866 s--;
3867 } /* XXX could check well-formedness here */
3868 }
3869 else
3870 break;
3871 }
3872 if (off <= 0)
3873 return 0;
3874 }
3875 return s;
3876}
51371543
GS
3877
3878#ifdef PERL_OBJECT
51371543
GS
3879#include "XSUB.h"
3880#endif
3881
3882static void
3883restore_pos(pTHXo_ void *arg)
3884{
51371543
GS
3885 if (PL_reg_eval_set) {
3886 if (PL_reg_oldsaved) {
3887 PL_reg_re->subbeg = PL_reg_oldsaved;
3888 PL_reg_re->sublen = PL_reg_oldsavedlen;
3889 RX_MATCH_COPIED_on(PL_reg_re);
3890 }
3891 PL_reg_magic->mg_len = PL_reg_oldpos;
3892 PL_reg_eval_set = 0;
3893 PL_curpm = PL_reg_oldcurpm;
3894 }
3895}