This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
%#b in particular and %B in general were kaputt.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5
DD
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
182 PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5
JH
278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
281 regtry() should be needed. */
282
cad2e5aa
JH
283char *
284Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
285 char *strend, U32 flags, re_scream_pos_data *data)
286{
2c2d71f5 287 register I32 start_shift;
cad2e5aa 288 /* Should be nonnegative! */
2c2d71f5
JH
289 register I32 end_shift;
290 register char *s;
291 register SV *check;
cad2e5aa
JH
292 char *t;
293 I32 ml_anch;
2c2d71f5
JH
294 char *tmp;
295 register char *other_last = Nullch;
cad2e5aa
JH
296
297 DEBUG_r( if (!PL_colorset) reginitcolors() );
298 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 299 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
300 PL_colors[4],PL_colors[5],PL_colors[0],
301 prog->precomp,
302 PL_colors[1],
303 (strlen(prog->precomp) > 60 ? "..." : ""),
304 PL_colors[0],
305 (strend - strpos > 60 ? 60 : strend - strpos),
306 strpos, PL_colors[1],
307 (strend - strpos > 60 ? "..." : ""))
308 );
309
2c2d71f5
JH
310 if (prog->minlen > strend - strpos) {
311 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 312 goto fail;
2c2d71f5
JH
313 }
314 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
315 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
316 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 317 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
318
319 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 320 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
321 I32 slen;
322
323 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
324 && (sv && (strpos + SvCUR(sv) != strend)) ) {
325 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 326 goto fail;
2c2d71f5 327 }
adac82c7 328 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 329 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
330 if (SvTAIL(prog->check_substr)) {
331 slen = SvCUR(prog->check_substr); /* >= 1 */
332
2c2d71f5
JH
333 if ( strend - s > slen || strend - s < slen - 1
334 || (strend - s == slen && strend[-1] != '\n')) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
336 goto fail_finish;
cad2e5aa
JH
337 }
338 /* Now should match s[0..slen-2] */
339 slen--;
340 if (slen && (*SvPVX(prog->check_substr) != *s
341 || (slen > 1
2c2d71f5
JH
342 && memNE(SvPVX(prog->check_substr), s, slen)))) {
343 report_neq:
344 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
345 goto fail_finish;
346 }
cad2e5aa
JH
347 }
348 else if (*SvPVX(prog->check_substr) != *s
349 || ((slen = SvCUR(prog->check_substr)) > 1
350 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
351 goto report_neq;
352 goto success_at_start;
cad2e5aa 353 }
2c2d71f5 354 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 355 s = strpos;
2c2d71f5
JH
356 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
357 /* Should be nonnegative! */
358 end_shift = prog->minlen - start_shift -
359 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
360 if (!ml_anch) {
361 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
362 - (SvTAIL(prog->check_substr) != 0);
363 I32 eshift = strend - s - end;
364
365 if (end_shift < eshift)
366 end_shift = eshift;
367 }
cad2e5aa 368 }
2c2d71f5 369 else { /* Can match at random position */
cad2e5aa
JH
370 ml_anch = 0;
371 s = strpos;
2c2d71f5
JH
372 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
373 /* Should be nonnegative! */
374 end_shift = prog->minlen - start_shift -
375 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
376 }
377
2c2d71f5 378#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 379 if (end_shift < 0)
6bbae5e6 380 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
381#endif
382
383 check = prog->check_substr;
384 restart:
385 /* Find a possible match in the region s..strend by looking for
386 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 387 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
388 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
389 I32 p = -1; /* Internal iterator of scream. */
390 I32 *pp = data ? data->scream_pos : &p;
391
2c2d71f5
JH
392 if (PL_screamfirst[BmRARE(check)] >= 0
393 || ( BmRARE(check) == '\n'
394 && (BmPREVIOUS(check) == SvCUR(check) - 1)
395 && SvTAIL(check) ))
396 s = screaminstr(sv, check,
397 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 398 else
2c2d71f5 399 goto fail_finish;
cad2e5aa
JH
400 if (data)
401 *data->scream_olds = s;
402 }
403 else
404 s = fbm_instr((unsigned char*)s + start_shift,
405 (unsigned char*)strend - end_shift,
2c2d71f5 406 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
407
408 /* Update the count-of-usability, remove useless subpatterns,
409 unshift s. */
2c2d71f5
JH
410
411 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
412 (s ? "Found" : "Did not find"),
413 ((check == prog->anchored_substr) ? "anchored" : "floating"),
414 PL_colors[0],
415 SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
416 PL_colors[1], (SvTAIL(check) ? "$" : ""),
417 (s ? " at offset " : "...\n") ) );
418
419 if (!s)
420 goto fail_finish;
421
422 /* Finish the diagnostic message */
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - strpos)) );
424
425 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
426 Start with the other substr.
427 XXXX no SCREAM optimization yet - and a very coarse implementation
428 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
429 *always* match. Probably should be marked during compile...
430 Probably it is right to do no SCREAM here...
431 */
432
433 if (prog->float_substr && prog->anchored_substr) {
434 /* Take into account the anchored substring. */
435 /* XXXX May be hopelessly wrong for UTF... */
436 if (!other_last)
437 other_last = strpos - 1;
438 if (check == prog->float_substr) {
439 char *last = s - start_shift, *last1, *last2;
440 char *s1 = s;
441
442 tmp = PL_bostr;
443 t = s - prog->check_offset_max;
444 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
445 && (!(prog->reganch & ROPT_UTF8)
446 || (PL_bostr = strpos, /* Used in regcopmaybe() */
447 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
448 && t > strpos)))
449 ;
450 else
451 t = strpos;
452 t += prog->anchored_offset;
453 if (t <= other_last)
454 t = other_last + 1;
455 PL_bostr = tmp;
456 last2 = last1 = strend - prog->minlen;
457 if (last < last1)
458 last1 = last;
459 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
460 /* On end-of-str: see comment below. */
461 s = fbm_instr((unsigned char*)t,
462 (unsigned char*)last1 + prog->anchored_offset
463 + SvCUR(prog->anchored_substr)
464 - (SvTAIL(prog->anchored_substr)!=0),
465 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
466 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
467 (s ? "Found" : "Contradicts"),
468 PL_colors[0],
469 SvCUR(prog->anchored_substr)
470 - (SvTAIL(prog->anchored_substr)!=0),
471 SvPVX(prog->anchored_substr),
472 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
473 if (!s) {
474 if (last1 >= last2) {
475 DEBUG_r(PerlIO_printf(Perl_debug_log,
476 ", giving up...\n"));
477 goto fail_finish;
478 }
479 DEBUG_r(PerlIO_printf(Perl_debug_log,
480 ", trying floating at offset %ld...\n",
481 (long)(s1 + 1 - strpos)));
482 PL_regeol = strend; /* Used in HOP() */
483 other_last = last1 + prog->anchored_offset;
484 s = HOPc(last, 1);
485 goto restart;
486 }
487 else {
488 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
489 (long)(s - strpos)));
490 t = s - prog->anchored_offset;
491 other_last = s - 1;
492 if (t == strpos)
493 goto try_at_start;
494 s = s1;
495 goto try_at_offset;
496 }
497 }
498 else { /* Take into account the floating substring. */
499 char *last, *last1;
500 char *s1 = s;
501
502 t = s - start_shift;
503 last1 = last = strend - prog->minlen + prog->float_min_offset;
504 if (last - t > prog->float_max_offset)
505 last = t + prog->float_max_offset;
506 s = t + prog->float_min_offset;
507 if (s <= other_last)
508 s = other_last + 1;
509 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
510 /* fbm_instr() takes into account exact value of end-of-str
511 if the check is SvTAIL(ed). Since false positives are OK,
512 and end-of-str is not later than strend we are OK. */
513 s = fbm_instr((unsigned char*)s,
514 (unsigned char*)last + SvCUR(prog->float_substr)
515 - (SvTAIL(prog->float_substr)!=0),
516 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
517 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
518 (s ? "Found" : "Contradicts"),
519 PL_colors[0],
520 SvCUR(prog->float_substr)
521 - (SvTAIL(prog->float_substr)!=0),
522 SvPVX(prog->float_substr),
523 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
524 if (!s) {
525 if (last1 == last) {
526 DEBUG_r(PerlIO_printf(Perl_debug_log,
527 ", giving up...\n"));
528 goto fail_finish;
529 }
530 DEBUG_r(PerlIO_printf(Perl_debug_log,
531 ", trying anchored starting at offset %ld...\n",
532 (long)(s1 + 1 - strpos)));
533 other_last = last;
534 PL_regeol = strend; /* Used in HOP() */
535 s = HOPc(t, 1);
536 goto restart;
537 }
538 else {
539 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
540 (long)(s - strpos)));
541 other_last = s - 1;
542 if (t == strpos)
543 goto try_at_start;
544 s = s1;
545 goto try_at_offset;
546 }
547 }
cad2e5aa 548 }
2c2d71f5
JH
549
550 t = s - prog->check_offset_max;
551 tmp = PL_bostr;
552 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
553 && (!(prog->reganch & ROPT_UTF8)
554 || (PL_bostr = strpos, /* Used in regcopmaybe() */
555 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
556 && t > strpos)))) {
557 PL_bostr = tmp;
558 /* Fixed substring is found far enough so that the match
559 cannot start at strpos. */
560 try_at_offset:
cad2e5aa 561 if (ml_anch && t[-1] != '\n') {
2c2d71f5
JH
562 find_anchor: /* Eventually fbm_*() should handle this */
563 while (t < strend - prog->minlen) {
cad2e5aa
JH
564 if (*t == '\n') {
565 if (t < s - prog->check_offset_min) {
566 s = t + 1;
2c2d71f5
JH
567 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
568 PL_colors[0],PL_colors[1], (long)(s - strpos)));
cad2e5aa
JH
569 goto set_useful;
570 }
2c2d71f5
JH
571 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
572 PL_colors[0],PL_colors[1], (long)(t + 1 - strpos)));
cad2e5aa
JH
573 s = t + 1;
574 goto restart;
575 }
576 t++;
577 }
2c2d71f5
JH
578 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
579 PL_colors[0],PL_colors[1]));
580 goto fail_finish;
cad2e5aa
JH
581 }
582 s = t;
583 set_useful:
2c2d71f5 584 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
585 }
586 else {
2c2d71f5
JH
587 PL_bostr = tmp;
588 /* The found string does not prohibit matching at beg-of-str
589 - no optimization of calling REx engine can be performed,
590 unless it was an MBOL and we are not after MBOL. */
591 try_at_start:
592 /* Even in this situation we may use MBOL flag if strpos is offset
593 wrt the start of the string. */
594 if (ml_anch && sv
cad2e5aa
JH
595 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
596 t = strpos;
597 goto find_anchor;
598 }
2c2d71f5 599 success_at_start:
cad2e5aa
JH
600 if (!(prog->reganch & ROPT_NAUGHTY)
601 && --BmUSEFUL(prog->check_substr) < 0
602 && prog->check_substr == prog->float_substr) { /* boo */
603 /* If flags & SOMETHING - do not do it many times on the same match */
604 SvREFCNT_dec(prog->check_substr);
605 prog->check_substr = Nullsv; /* disable */
606 prog->float_substr = Nullsv; /* clear */
607 s = strpos;
608 prog->reganch &= ~RE_USE_INTUIT;
609 }
610 else
611 s = strpos;
612 }
613
2c2d71f5
JH
614 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
615 PL_colors[4], PL_colors[5], (long)(s - strpos)) );
cad2e5aa 616 return s;
2c2d71f5
JH
617
618 fail_finish: /* Substring not found */
619 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 620 fail:
2c2d71f5 621 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
622 PL_colors[4],PL_colors[5]));
623 return Nullch;
624}
9661b544 625
a687059c 626/*
c277df42 627 - regexec_flags - match a regexp against a string
a687059c 628 */
79072805 629I32
864dbfa3 630Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 631 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
632/* strend: pointer to null at end of string */
633/* strbeg: real beginning of string */
634/* minend: end of match must be >=minend after stringarg. */
635/* data: May be used for some additional optimizations. */
636/* nosave: For optimizations. */
a687059c 637{
5c0ca799 638 dTHR;
a0d0e21e 639 register char *s;
c277df42 640 register regnode *c;
a0d0e21e
LW
641 register char *startpos = stringarg;
642 register I32 tmp;
c277df42 643 I32 minlen; /* must match at least this many chars */
a0d0e21e 644 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 645 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
646 constant substr. */ /* CC */
647 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
648 I32 scream_pos = -1; /* Internal iterator of scream. */
649 char *scream_olds;
3280af22 650 SV* oreplsv = GvSV(PL_replgv);
a687059c 651
2ab05381 652 PL_regcc = 0;
a0d0e21e 653
0f5d15d6 654 cache_re(prog);
a0d0e21e 655#ifdef DEBUGGING
3280af22 656 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
657#endif
658
659 /* Be paranoid... */
660 if (prog == NULL || startpos == NULL) {
cea2e8a9 661 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
662 return 0;
663 }
664
c277df42
IZ
665 minlen = prog->minlen;
666 if (strend - startpos < minlen) goto phooey;
667
a0d0e21e 668 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 669 PL_regprev = '\n';
a0d0e21e 670 else {
a0ed51b3 671 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
672 if (!PL_multiline && PL_regprev == '\n')
673 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 674 }
bbce6d69 675
a0d0e21e 676 /* Check validity of program. */
22c35a8c 677 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 678 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
679 }
680
3280af22
NIS
681 PL_reg_flags = 0;
682 PL_reg_eval_set = 0;
2c2d71f5 683 PL_reg_maxiter = 0;
a0d0e21e 684
a0ed51b3
LW
685 if (prog->reganch & ROPT_UTF8)
686 PL_reg_flags |= RF_utf8;
687
688 /* Mark beginning of line for ^ and lookbehind. */
689 PL_regbol = startpos;
690 PL_bostr = strbeg;
9661b544 691 PL_reg_sv = sv;
a0ed51b3
LW
692
693 /* Mark end of line for $ (and such) */
694 PL_regeol = strend;
695
696 /* see how far we have to get to not match where we matched before */
697 PL_regtill = startpos+minend;
698
0f5d15d6
IZ
699 /* We start without call_cc context. */
700 PL_reg_call_cc = 0;
701
a0d0e21e
LW
702 /* If there is a "must appear" string, look for it. */
703 s = startpos;
cad2e5aa
JH
704
705 if (prog->reganch & ROPT_GPOS_SEEN) {
706 MAGIC *mg;
707
708 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
709 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
710 PL_reg_ganch = strbeg + mg->mg_len;
a0d0e21e 711 else
cad2e5aa
JH
712 PL_reg_ganch = startpos;
713 if (prog->reganch & ROPT_ANCH_GPOS) {
714 if (s > PL_reg_ganch)
715 goto phooey;
716 s = PL_reg_ganch;
a0ed51b3 717 }
a0d0e21e 718 }
a687059c 719
cad2e5aa
JH
720 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
721 re_scream_pos_data d;
722
723 d.scream_olds = &scream_olds;
724 d.scream_pos = &scream_pos;
725 s = re_intuit_start(prog, sv, s, strend, flags, &d);
726 if (!s)
727 goto phooey; /* not present */
728 }
729
730 DEBUG_r( if (!PL_colorset) reginitcolors() );
731 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 732 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32
GS
733 PL_colors[4],PL_colors[5],PL_colors[0],
734 prog->precomp,
735 PL_colors[1],
c277df42 736 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 737 PL_colors[0],
c277df42 738 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 739 startpos, PL_colors[1],
c277df42
IZ
740 (strend - startpos > 60 ? "..." : ""))
741 );
742
a0d0e21e 743 /* Simplest case: anchored match need be tried only once. */
774d564b 744 /* [unless only anchor is BOL and multiline is set] */
22e551b9 745 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 746 if (s == startpos && regtry(prog, startpos))
a0d0e21e 747 goto got_it;
22e551b9
IZ
748 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
749 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 750 {
cad2e5aa
JH
751 char *end;
752
a0d0e21e
LW
753 if (minlen)
754 dontbother = minlen - 1;
cad2e5aa 755 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 756 /* for multiline we only have to try after newlines */
cad2e5aa
JH
757 if (prog->check_substr) {
758 while (1) {
759 if (regtry(prog, s))
a0d0e21e 760 goto got_it;
cad2e5aa
JH
761 if (s >= end)
762 goto phooey;
763 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
764 if (!s)
765 goto phooey;
766 }
767 } else {
768 if (s > startpos)
769 s--;
770 while (s < end) {
771 if (*s++ == '\n') { /* don't need PL_utf8skip here */
772 if (regtry(prog, s))
773 goto got_it;
774 }
775 }
35c8bce7 776 }
35c8bce7 777 }
a0d0e21e 778 goto phooey;
22e551b9
IZ
779 } else if (prog->reganch & ROPT_ANCH_GPOS) {
780 if (regtry(prog, PL_reg_ganch))
781 goto got_it;
782 goto phooey;
a0d0e21e 783 }
35c8bce7 784
a0d0e21e 785 /* Messy cases: unanchored match. */
c277df42
IZ
786 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
787 /* we have /x+whatever/ */
cad2e5aa 788 /* it must be a one character string (XXXX Except UTF?) */
c277df42 789 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
790 if (UTF) {
791 while (s < strend) {
792 if (*s == ch) {
793 if (regtry(prog, s)) goto got_it;
794 s += UTF8SKIP(s);
795 while (s < strend && *s == ch)
796 s += UTF8SKIP(s);
797 }
798 s += UTF8SKIP(s);
799 }
800 }
801 else {
802 while (s < strend) {
803 if (*s == ch) {
804 if (regtry(prog, s)) goto got_it;
c277df42 805 s++;
a0ed51b3
LW
806 while (s < strend && *s == ch)
807 s++;
808 }
809 s++;
a0d0e21e 810 }
a687059c 811 }
c277df42
IZ
812 }
813 /*SUPPRESS 560*/
814 else if (prog->anchored_substr != Nullsv
815 || (prog->float_substr != Nullsv
816 && prog->float_max_offset < strend - s)) {
817 SV *must = prog->anchored_substr
818 ? prog->anchored_substr : prog->float_substr;
819 I32 back_max =
820 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
821 I32 back_min =
822 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
823 I32 delta = back_max - back_min;
cf93c79d 824 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
825 -(I32)(CHR_SVLEN(must)
826 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
827 char *last1; /* Last position checked before */
828
829 if (s > PL_bostr)
dfe13c55 830 last1 = HOPc(s, -1);
a0ed51b3
LW
831 else
832 last1 = s - 1; /* bogus */
c277df42
IZ
833
834 /* XXXX check_substr already used to find `s', can optimize if
835 check_substr==must. */
836 scream_pos = -1;
837 dontbother = end_shift;
dfe13c55 838 strend = HOPc(strend, -dontbother);
c277df42 839 while ( (s <= last) &&
22e551b9
IZ
840 ((flags & REXEC_SCREAM)
841 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 842 end_shift, &scream_pos, 0))
a0ed51b3 843 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
844 (unsigned char*)strend, must,
845 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
846 if (HOPc(s, -back_max) > last1) {
847 last1 = HOPc(s, -back_min);
848 s = HOPc(s, -back_max);
a0ed51b3
LW
849 }
850 else {
dfe13c55 851 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 852
dfe13c55 853 last1 = HOPc(s, -back_min);
c277df42 854 s = t;
a0d0e21e 855 }
a0ed51b3
LW
856 if (UTF) {
857 while (s <= last1) {
858 if (regtry(prog, s))
859 goto got_it;
860 s += UTF8SKIP(s);
861 }
862 }
863 else {
864 while (s <= last1) {
865 if (regtry(prog, s))
866 goto got_it;
867 s++;
868 }
a0d0e21e
LW
869 }
870 }
871 goto phooey;
a0ed51b3
LW
872 }
873 else if (c = prog->regstclass) {
a0d0e21e 874 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 875 char *cc;
a687059c 876
a0d0e21e
LW
877 if (minlen)
878 dontbother = minlen - 1;
dfe13c55 879 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
880 tmp = 1;
881 /* We know what class it must start with. */
882 switch (OP(c)) {
a0ed51b3 883 case ANYOFUTF8:
cd439c50 884 cc = MASK(c);
a0ed51b3
LW
885 while (s < strend) {
886 if (REGINCLASSUTF8(c, (U8*)s)) {
887 if (tmp && regtry(prog, s))
888 goto got_it;
889 else
890 tmp = doevery;
891 }
892 else
893 tmp = 1;
894 s += UTF8SKIP(s);
895 }
896 break;
a0d0e21e 897 case ANYOF:
cd439c50 898 cc = MASK(c);
a0d0e21e 899 while (s < strend) {
a0ed51b3 900 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
901 if (tmp && regtry(prog, s))
902 goto got_it;
903 else
904 tmp = doevery;
a687059c 905 }
a0d0e21e
LW
906 else
907 tmp = 1;
908 s++;
909 }
910 break;
bbce6d69 911 case BOUNDL:
3280af22 912 PL_reg_flags |= RF_tainted;
bbce6d69 913 /* FALL THROUGH */
a0d0e21e 914 case BOUND:
a0ed51b3
LW
915 if (minlen) {
916 dontbother++;
917 strend -= 1;
918 }
3280af22 919 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 920 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 921 while (s < strend) {
95bac841 922 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
923 tmp = !tmp;
924 if (regtry(prog, s))
925 goto got_it;
a687059c 926 }
a0d0e21e
LW
927 s++;
928 }
929 if ((minlen || tmp) && regtry(prog,s))
930 goto got_it;
931 break;
a0ed51b3
LW
932 case BOUNDLUTF8:
933 PL_reg_flags |= RF_tainted;
934 /* FALL THROUGH */
935 case BOUNDUTF8:
936 if (minlen) {
937 dontbother++;
dfe13c55 938 strend = reghop_c(strend, -1);
a0ed51b3 939 }
dfe13c55 940 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
941 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
942 while (s < strend) {
dfe13c55
GS
943 if (tmp == !(OP(c) == BOUND ?
944 swash_fetch(PL_utf8_alnum, (U8*)s) :
945 isALNUM_LC_utf8((U8*)s)))
946 {
a0ed51b3
LW
947 tmp = !tmp;
948 if (regtry(prog, s))
949 goto got_it;
950 }
951 s += UTF8SKIP(s);
952 }
953 if ((minlen || tmp) && regtry(prog,s))
954 goto got_it;
955 break;
bbce6d69 956 case NBOUNDL:
3280af22 957 PL_reg_flags |= RF_tainted;
bbce6d69 958 /* FALL THROUGH */
a0d0e21e 959 case NBOUND:
a0ed51b3
LW
960 if (minlen) {
961 dontbother++;
962 strend -= 1;
963 }
3280af22 964 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 965 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 966 while (s < strend) {
95bac841 967 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
968 tmp = !tmp;
969 else if (regtry(prog, s))
970 goto got_it;
971 s++;
972 }
973 if ((minlen || !tmp) && regtry(prog,s))
974 goto got_it;
975 break;
a0ed51b3
LW
976 case NBOUNDLUTF8:
977 PL_reg_flags |= RF_tainted;
978 /* FALL THROUGH */
979 case NBOUNDUTF8:
980 if (minlen) {
981 dontbother++;
dfe13c55 982 strend = reghop_c(strend, -1);
a0ed51b3 983 }
dfe13c55 984 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
985 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
986 while (s < strend) {
dfe13c55
GS
987 if (tmp == !(OP(c) == NBOUND ?
988 swash_fetch(PL_utf8_alnum, (U8*)s) :
989 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
990 tmp = !tmp;
991 else if (regtry(prog, s))
992 goto got_it;
993 s += UTF8SKIP(s);
994 }
995 if ((minlen || !tmp) && regtry(prog,s))
996 goto got_it;
997 break;
a0d0e21e
LW
998 case ALNUM:
999 while (s < strend) {
bbce6d69
PP
1000 if (isALNUM(*s)) {
1001 if (tmp && regtry(prog, s))
1002 goto got_it;
1003 else
1004 tmp = doevery;
1005 }
1006 else
1007 tmp = 1;
1008 s++;
1009 }
1010 break;
a0ed51b3
LW
1011 case ALNUMUTF8:
1012 while (s < strend) {
dfe13c55 1013 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1014 if (tmp && regtry(prog, s))
1015 goto got_it;
1016 else
1017 tmp = doevery;
1018 }
1019 else
1020 tmp = 1;
1021 s += UTF8SKIP(s);
1022 }
1023 break;
bbce6d69 1024 case ALNUML:
3280af22 1025 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1026 while (s < strend) {
1027 if (isALNUM_LC(*s)) {
a0d0e21e
LW
1028 if (tmp && regtry(prog, s))
1029 goto got_it;
a687059c 1030 else
a0d0e21e
LW
1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s++;
1036 }
1037 break;
a0ed51b3
LW
1038 case ALNUMLUTF8:
1039 PL_reg_flags |= RF_tainted;
1040 while (s < strend) {
dfe13c55 1041 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1042 if (tmp && regtry(prog, s))
1043 goto got_it;
1044 else
1045 tmp = doevery;
1046 }
1047 else
1048 tmp = 1;
1049 s += UTF8SKIP(s);
1050 }
1051 break;
a0d0e21e
LW
1052 case NALNUM:
1053 while (s < strend) {
bbce6d69
PP
1054 if (!isALNUM(*s)) {
1055 if (tmp && regtry(prog, s))
1056 goto got_it;
1057 else
1058 tmp = doevery;
1059 }
1060 else
1061 tmp = 1;
1062 s++;
1063 }
1064 break;
a0ed51b3
LW
1065 case NALNUMUTF8:
1066 while (s < strend) {
dfe13c55 1067 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1068 if (tmp && regtry(prog, s))
1069 goto got_it;
1070 else
1071 tmp = doevery;
1072 }
1073 else
1074 tmp = 1;
1075 s += UTF8SKIP(s);
1076 }
1077 break;
bbce6d69 1078 case NALNUML:
3280af22 1079 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1080 while (s < strend) {
1081 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
1082 if (tmp && regtry(prog, s))
1083 goto got_it;
a687059c 1084 else
a0d0e21e 1085 tmp = doevery;
a687059c 1086 }
a0d0e21e
LW
1087 else
1088 tmp = 1;
1089 s++;
1090 }
1091 break;
a0ed51b3
LW
1092 case NALNUMLUTF8:
1093 PL_reg_flags |= RF_tainted;
1094 while (s < strend) {
dfe13c55 1095 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1096 if (tmp && regtry(prog, s))
1097 goto got_it;
1098 else
1099 tmp = doevery;
1100 }
1101 else
1102 tmp = 1;
1103 s += UTF8SKIP(s);
1104 }
1105 break;
a0d0e21e
LW
1106 case SPACE:
1107 while (s < strend) {
1108 if (isSPACE(*s)) {
1109 if (tmp && regtry(prog, s))
1110 goto got_it;
1111 else
1112 tmp = doevery;
2304df62 1113 }
a0d0e21e
LW
1114 else
1115 tmp = 1;
1116 s++;
1117 }
1118 break;
a0ed51b3
LW
1119 case SPACEUTF8:
1120 while (s < strend) {
dfe13c55 1121 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
1122 if (tmp && regtry(prog, s))
1123 goto got_it;
1124 else
1125 tmp = doevery;
1126 }
1127 else
1128 tmp = 1;
1129 s += UTF8SKIP(s);
1130 }
1131 break;
bbce6d69 1132 case SPACEL:
3280af22 1133 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1134 while (s < strend) {
1135 if (isSPACE_LC(*s)) {
1136 if (tmp && regtry(prog, s))
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s++;
1144 }
1145 break;
a0ed51b3
LW
1146 case SPACELUTF8:
1147 PL_reg_flags |= RF_tainted;
1148 while (s < strend) {
dfe13c55 1149 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
1150 if (tmp && regtry(prog, s))
1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
1155 else
1156 tmp = 1;
1157 s += UTF8SKIP(s);
1158 }
1159 break;
a0d0e21e
LW
1160 case NSPACE:
1161 while (s < strend) {
1162 if (!isSPACE(*s)) {
1163 if (tmp && regtry(prog, s))
1164 goto got_it;
1165 else
1166 tmp = doevery;
a687059c 1167 }
a0d0e21e
LW
1168 else
1169 tmp = 1;
1170 s++;
1171 }
1172 break;
a0ed51b3
LW
1173 case NSPACEUTF8:
1174 while (s < strend) {
dfe13c55 1175 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
1176 if (tmp && regtry(prog, s))
1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s += UTF8SKIP(s);
1184 }
1185 break;
bbce6d69 1186 case NSPACEL:
3280af22 1187 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1188 while (s < strend) {
1189 if (!isSPACE_LC(*s)) {
1190 if (tmp && regtry(prog, s))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s++;
1198 }
1199 break;
a0ed51b3
LW
1200 case NSPACELUTF8:
1201 PL_reg_flags |= RF_tainted;
1202 while (s < strend) {
dfe13c55 1203 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1204 if (tmp && regtry(prog, s))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
1209 else
1210 tmp = 1;
1211 s += UTF8SKIP(s);
1212 }
1213 break;
a0d0e21e
LW
1214 case DIGIT:
1215 while (s < strend) {
1216 if (isDIGIT(*s)) {
1217 if (tmp && regtry(prog, s))
1218 goto got_it;
1219 else
1220 tmp = doevery;
2b69d0c2 1221 }
a0d0e21e
LW
1222 else
1223 tmp = 1;
1224 s++;
1225 }
1226 break;
a0ed51b3
LW
1227 case DIGITUTF8:
1228 while (s < strend) {
dfe13c55 1229 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1230 if (tmp && regtry(prog, s))
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s += UTF8SKIP(s);
1238 }
1239 break;
b8c5462f
JH
1240 case DIGITL:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
1243 if (isDIGIT_LC(*s)) {
1244 if (tmp && regtry(prog, s))
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s++;
1252 }
1253 break;
1254 case DIGITLUTF8:
1255 PL_reg_flags |= RF_tainted;
1256 while (s < strend) {
1257 if (isDIGIT_LC_utf8((U8*)s)) {
1258 if (tmp && regtry(prog, s))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
1263 else
1264 tmp = 1;
1265 s += UTF8SKIP(s);
1266 }
1267 break;
a0d0e21e
LW
1268 case NDIGIT:
1269 while (s < strend) {
1270 if (!isDIGIT(*s)) {
1271 if (tmp && regtry(prog, s))
1272 goto got_it;
1273 else
1274 tmp = doevery;
a687059c 1275 }
a0d0e21e
LW
1276 else
1277 tmp = 1;
1278 s++;
1279 }
1280 break;
a0ed51b3
LW
1281 case NDIGITUTF8:
1282 while (s < strend) {
dfe13c55 1283 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1284 if (tmp && regtry(prog, s))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s += UTF8SKIP(s);
1292 }
1293 break;
b8c5462f
JH
1294 case NDIGITL:
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (!isDIGIT_LC(*s)) {
1298 if (tmp && regtry(prog, s))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s++;
a0ed51b3 1306 }
b8c5462f
JH
1307 break;
1308 case NDIGITLUTF8:
1309 PL_reg_flags |= RF_tainted;
1310 while (s < strend) {
1311 if (!isDIGIT_LC_utf8((U8*)s)) {
1312 if (tmp && regtry(prog, s))
1313 goto got_it;
cf93c79d 1314 else
b8c5462f
JH
1315 tmp = doevery;
1316 }
1317 else
1318 tmp = 1;
1319 s += UTF8SKIP(s);
1320 }
1321 break;
d6a28714
JH
1322 }
1323 }
1324 else {
1325 dontbother = 0;
1326 if (prog->float_substr != Nullsv) { /* Trim the end. */
1327 char *last;
1328 I32 oldpos = scream_pos;
1329
1330 if (flags & REXEC_SCREAM) {
1331 last = screaminstr(sv, prog->float_substr, s - strbeg,
1332 end_shift, &scream_pos, 1); /* last one */
1333 if (!last)
1334 last = scream_olds; /* Only one occurence. */
b8c5462f 1335 }
d6a28714
JH
1336 else {
1337 STRLEN len;
1338 char *little = SvPV(prog->float_substr, len);
1339
1340 if (SvTAIL(prog->float_substr)) {
1341 if (memEQ(strend - len + 1, little, len - 1))
1342 last = strend - len + 1;
1343 else if (!PL_multiline)
1344 last = memEQ(strend - len, little, len)
1345 ? strend - len : Nullch;
b8c5462f 1346 else
d6a28714
JH
1347 goto find_last;
1348 } else {
1349 find_last:
1350 if (len)
1351 last = rninstr(s, strend, little, little + len);
b8c5462f 1352 else
d6a28714 1353 last = strend; /* matching `$' */
b8c5462f 1354 }
b8c5462f 1355 }
d6a28714
JH
1356 if (last == NULL) goto phooey; /* Should not happen! */
1357 dontbother = strend - last + prog->float_min_offset;
1358 }
1359 if (minlen && (dontbother < minlen))
1360 dontbother = minlen - 1;
1361 strend -= dontbother; /* this one's always in bytes! */
1362 /* We don't know much -- general case. */
1363 if (UTF) {
1364 for (;;) {
1365 if (regtry(prog, s))
1366 goto got_it;
1367 if (s >= strend)
1368 break;
b8c5462f 1369 s += UTF8SKIP(s);
d6a28714
JH
1370 };
1371 }
1372 else {
1373 do {
1374 if (regtry(prog, s))
1375 goto got_it;
1376 } while (s++ < strend);
1377 }
1378 }
1379
1380 /* Failure. */
1381 goto phooey;
1382
1383got_it:
1384 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1385
1386 if (PL_reg_eval_set) {
1387 /* Preserve the current value of $^R */
1388 if (oreplsv != GvSV(PL_replgv))
1389 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1390 restored, the value remains
1391 the same. */
1392 restore_pos(aTHXo_ 0);
1393 }
1394
1395 /* make sure $`, $&, $', and $digit will work later */
1396 if ( !(flags & REXEC_NOT_FIRST) ) {
1397 if (RX_MATCH_COPIED(prog)) {
1398 Safefree(prog->subbeg);
1399 RX_MATCH_COPIED_off(prog);
1400 }
1401 if (flags & REXEC_COPY_STR) {
1402 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1403
1404 s = savepvn(strbeg, i);
1405 prog->subbeg = s;
1406 prog->sublen = i;
1407 RX_MATCH_COPIED_on(prog);
1408 }
1409 else {
1410 prog->subbeg = strbeg;
1411 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1412 }
1413 }
1414
1415 return 1;
1416
1417phooey:
1418 if (PL_reg_eval_set)
1419 restore_pos(aTHXo_ 0);
1420 return 0;
1421}
1422
1423/*
1424 - regtry - try match at specific point
1425 */
1426STATIC I32 /* 0 failure, 1 success */
1427S_regtry(pTHX_ regexp *prog, char *startpos)
1428{
1429 dTHR;
1430 register I32 i;
1431 register I32 *sp;
1432 register I32 *ep;
1433 CHECKPOINT lastcp;
1434
1435 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1436 MAGIC *mg;
1437
1438 PL_reg_eval_set = RS_init;
1439 DEBUG_r(DEBUG_s(
1440 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
1441 PL_stack_sp - PL_stack_base);
1442 ));
1443 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1444 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1445 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1446 SAVETMPS;
1447 /* Apparently this is not needed, judging by wantarray. */
1448 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1449 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1450
1451 if (PL_reg_sv) {
1452 /* Make $_ available to executed code. */
1453 if (PL_reg_sv != DEFSV) {
1454 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1455 SAVESPTR(DEFSV);
1456 DEFSV = PL_reg_sv;
b8c5462f 1457 }
d6a28714
JH
1458
1459 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1460 && (mg = mg_find(PL_reg_sv, 'g')))) {
1461 /* prepare for quick setting of pos */
1462 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1463 mg = mg_find(PL_reg_sv, 'g');
1464 mg->mg_len = -1;
b8c5462f 1465 }
d6a28714
JH
1466 PL_reg_magic = mg;
1467 PL_reg_oldpos = mg->mg_len;
1468 SAVEDESTRUCTOR(restore_pos, 0);
1469 }
1470 if (!PL_reg_curpm)
1471 New(22,PL_reg_curpm, 1, PMOP);
1472 PL_reg_curpm->op_pmregexp = prog;
1473 PL_reg_oldcurpm = PL_curpm;
1474 PL_curpm = PL_reg_curpm;
1475 if (RX_MATCH_COPIED(prog)) {
1476 /* Here is a serious problem: we cannot rewrite subbeg,
1477 since it may be needed if this match fails. Thus
1478 $` inside (?{}) could fail... */
1479 PL_reg_oldsaved = prog->subbeg;
1480 PL_reg_oldsavedlen = prog->sublen;
1481 RX_MATCH_COPIED_off(prog);
1482 }
1483 else
1484 PL_reg_oldsaved = Nullch;
1485 prog->subbeg = PL_bostr;
1486 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1487 }
1488 prog->startp[0] = startpos - PL_bostr;
1489 PL_reginput = startpos;
1490 PL_regstartp = prog->startp;
1491 PL_regendp = prog->endp;
1492 PL_reglastparen = &prog->lastparen;
1493 prog->lastparen = 0;
1494 PL_regsize = 0;
1495 DEBUG_r(PL_reg_starttry = startpos);
1496 if (PL_reg_start_tmpl <= prog->nparens) {
1497 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1498 if(PL_reg_start_tmp)
1499 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1500 else
1501 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1502 }
1503
1504 /* XXXX What this code is doing here?!!! There should be no need
1505 to do this again and again, PL_reglastparen should take care of
1506 this! */
1507 sp = prog->startp;
1508 ep = prog->endp;
1509 if (prog->nparens) {
1510 for (i = prog->nparens; i >= 1; i--) {
1511 *++sp = -1;
1512 *++ep = -1;
1513 }
1514 }
1515 REGCP_SET;
1516 if (regmatch(prog->program + 1)) {
1517 prog->endp[0] = PL_reginput - PL_bostr;
1518 return 1;
1519 }
1520 REGCP_UNWIND;
1521 return 0;
1522}
1523
1524/*
1525 - regmatch - main matching routine
1526 *
1527 * Conceptually the strategy is simple: check to see whether the current
1528 * node matches, call self recursively to see whether the rest matches,
1529 * and then act accordingly. In practice we make some effort to avoid
1530 * recursion, in particular by going through "ordinary" nodes (that don't
1531 * need to know whether the rest of the match failed) by a loop instead of
1532 * by recursion.
1533 */
1534/* [lwall] I've hoisted the register declarations to the outer block in order to
1535 * maybe save a little bit of pushing and popping on the stack. It also takes
1536 * advantage of machines that use a register save mask on subroutine entry.
1537 */
1538STATIC I32 /* 0 failure, 1 success */
1539S_regmatch(pTHX_ regnode *prog)
1540{
1541 dTHR;
1542 register regnode *scan; /* Current node. */
1543 regnode *next; /* Next node. */
1544 regnode *inner; /* Next node in internal branch. */
1545 register I32 nextchr; /* renamed nextchr - nextchar colides with
1546 function of same name */
1547 register I32 n; /* no or next */
1548 register I32 ln; /* len or last */
1549 register char *s; /* operand or save */
1550 register char *locinput = PL_reginput;
1551 register I32 c1, c2, paren; /* case fold search, parenth */
1552 int minmod = 0, sw = 0, logical = 0;
1553#ifdef DEBUGGING
1554 PL_regindent++;
1555#endif
1556
1557 /* Note that nextchr is a byte even in UTF */
1558 nextchr = UCHARAT(locinput);
1559 scan = prog;
1560 while (scan != NULL) {
1561#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1562#ifdef DEBUGGING
1563# define sayYES goto yes
1564# define sayNO goto no
1565# define saySAME(x) if (x) goto yes; else goto no
1566# define REPORT_CODE_OFF 24
1567#else
1568# define sayYES return 1
1569# define sayNO return 0
1570# define saySAME(x) return x
1571#endif
1572 DEBUG_r( {
1573 SV *prop = sv_newmortal();
1574 int docolor = *PL_colors[0];
1575 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1576 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1577 /* The part of the string before starttry has one color
1578 (pref0_len chars), between starttry and current
1579 position another one (pref_len - pref0_len chars),
1580 after the current position the third one.
1581 We assume that pref0_len <= pref_len, otherwise we
1582 decrease pref0_len. */
1583 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1584 ? (5 + taill) - l : locinput - PL_bostr);
1585 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1586
1587 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1588 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1589 ? (5 + taill) - pref_len : PL_regeol - locinput);
1590 if (pref0_len < 0)
1591 pref0_len = 0;
1592 if (pref0_len > pref_len)
1593 pref0_len = pref_len;
1594 regprop(prop, scan);
1595 PerlIO_printf(Perl_debug_log,
1596 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
1597 locinput - PL_bostr,
1598 PL_colors[4], pref0_len,
1599 locinput - pref_len, PL_colors[5],
1600 PL_colors[2], pref_len - pref0_len,
1601 locinput - pref_len + pref0_len, PL_colors[3],
1602 (docolor ? "" : "> <"),
1603 PL_colors[0], l, locinput, PL_colors[1],
1604 15 - l - pref_len + 1,
1605 "",
1606 scan - PL_regprogram, PL_regindent*2, "",
1607 SvPVX(prop));
1608 } );
1609
1610 next = scan + NEXT_OFF(scan);
1611 if (next == scan)
1612 next = NULL;
1613
1614 switch (OP(scan)) {
1615 case BOL:
1616 if (locinput == PL_bostr
1617 ? PL_regprev == '\n'
1618 : (PL_multiline &&
1619 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1620 {
1621 /* regtill = regbol; */
b8c5462f
JH
1622 break;
1623 }
d6a28714
JH
1624 sayNO;
1625 case MBOL:
1626 if (locinput == PL_bostr
1627 ? PL_regprev == '\n'
1628 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1629 {
b8c5462f
JH
1630 break;
1631 }
d6a28714
JH
1632 sayNO;
1633 case SBOL:
1634 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1635 break;
d6a28714
JH
1636 sayNO;
1637 case GPOS:
1638 if (locinput == PL_reg_ganch)
1639 break;
1640 sayNO;
1641 case EOL:
1642 if (PL_multiline)
1643 goto meol;
1644 else
1645 goto seol;
1646 case MEOL:
1647 meol:
1648 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1649 sayNO;
b8c5462f 1650 break;
d6a28714
JH
1651 case SEOL:
1652 seol:
1653 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1654 sayNO;
d6a28714 1655 if (PL_regeol - locinput > 1)
b8c5462f 1656 sayNO;
b8c5462f 1657 break;
d6a28714
JH
1658 case EOS:
1659 if (PL_regeol != locinput)
b8c5462f 1660 sayNO;
d6a28714
JH
1661 break;
1662 case SANYUTF8:
b8c5462f 1663 if (nextchr & 0x80) {
b8c5462f 1664 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1665 if (locinput > PL_regeol)
1666 sayNO;
b8c5462f
JH
1667 nextchr = UCHARAT(locinput);
1668 break;
1669 }
d6a28714 1670 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1671 sayNO;
b8c5462f 1672 nextchr = UCHARAT(++locinput);
a0d0e21e 1673 break;
d6a28714
JH
1674 case SANY:
1675 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1676 sayNO;
1677 nextchr = UCHARAT(++locinput);
b85d18e9 1678 break;
d6a28714 1679 case ANYUTF8:
a0ed51b3 1680 if (nextchr & 0x80) {
b8c5462f 1681 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1682 if (locinput > PL_regeol)
1683 sayNO;
a0ed51b3
LW
1684 nextchr = UCHARAT(locinput);
1685 break;
1686 }
d6a28714 1687 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1688 sayNO;
1689 nextchr = UCHARAT(++locinput);
1690 break;
d6a28714
JH
1691 case REG_ANY:
1692 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1693 sayNO;
76e3520e 1694 nextchr = UCHARAT(++locinput);
a0d0e21e 1695 break;
d6a28714 1696 case EXACT:
cd439c50
IZ
1697 s = STRING(scan);
1698 ln = STR_LEN(scan);
d6a28714
JH
1699 /* Inline the first character, for speed. */
1700 if (UCHARAT(s) != nextchr)
1701 sayNO;
1702 if (PL_regeol - locinput < ln)
1703 sayNO;
1704 if (ln > 1 && memNE(s, locinput, ln))
1705 sayNO;
1706 locinput += ln;
1707 nextchr = UCHARAT(locinput);
1708 break;
1709 case EXACTFL:
b8c5462f
JH
1710 PL_reg_flags |= RF_tainted;
1711 /* FALL THROUGH */
d6a28714 1712 case EXACTF:
cd439c50
IZ
1713 s = STRING(scan);
1714 ln = STR_LEN(scan);
d6a28714
JH
1715
1716 if (UTF) {
1717 char *l = locinput;
1718 char *e = s + ln;
1719 c1 = OP(scan) == EXACTF;
1720 while (s < e) {
1721 if (l >= PL_regeol)
1722 sayNO;
1723 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1724 toLOWER_utf8((U8*)l) :
1725 toLOWER_LC_utf8((U8*)l)))
1726 {
1727 sayNO;
1728 }
1729 s += UTF8SKIP(s);
1730 l += UTF8SKIP(l);
b8c5462f 1731 }
d6a28714 1732 locinput = l;
a0ed51b3
LW
1733 nextchr = UCHARAT(locinput);
1734 break;
1735 }
d6a28714
JH
1736
1737 /* Inline the first character, for speed. */
1738 if (UCHARAT(s) != nextchr &&
1739 UCHARAT(s) != ((OP(scan) == EXACTF)
1740 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1741 sayNO;
d6a28714 1742 if (PL_regeol - locinput < ln)
b8c5462f 1743 sayNO;
d6a28714
JH
1744 if (ln > 1 && (OP(scan) == EXACTF
1745 ? ibcmp(s, locinput, ln)
1746 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1747 sayNO;
d6a28714
JH
1748 locinput += ln;
1749 nextchr = UCHARAT(locinput);
a0d0e21e 1750 break;
d6a28714 1751 case ANYOFUTF8:
cd439c50 1752 s = MASK(scan);
d6a28714 1753 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1754 sayNO;
d6a28714
JH
1755 if (locinput >= PL_regeol)
1756 sayNO;
1757 locinput += PL_utf8skip[nextchr];
1758 nextchr = UCHARAT(locinput);
1759 break;
1760 case ANYOF:
cd439c50 1761 s = MASK(scan);
d6a28714 1762 if (nextchr < 0)
b8c5462f 1763 nextchr = UCHARAT(locinput);
d6a28714
JH
1764 if (!REGINCLASS(s, nextchr))
1765 sayNO;
1766 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1767 sayNO;
b8c5462f
JH
1768 nextchr = UCHARAT(++locinput);
1769 break;
d6a28714 1770 case ALNUML:
b8c5462f
JH
1771 PL_reg_flags |= RF_tainted;
1772 /* FALL THROUGH */
d6a28714 1773 case ALNUM:
b8c5462f 1774 if (!nextchr)
4633a7c4 1775 sayNO;
d6a28714
JH
1776 if (!(OP(scan) == ALNUM
1777 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1778 sayNO;
1779 nextchr = UCHARAT(++locinput);
bbce6d69 1780 break;
d6a28714 1781 case ALNUMLUTF8:
3280af22 1782 PL_reg_flags |= RF_tainted;
bbce6d69 1783 /* FALL THROUGH */
d6a28714 1784 case ALNUMUTF8:
b8c5462f
JH
1785 if (!nextchr)
1786 sayNO;
1787 if (nextchr & 0x80) {
d6a28714
JH
1788 if (!(OP(scan) == ALNUMUTF8
1789 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1790 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1791 {
1792 sayNO;
a0ed51b3 1793 }
b8c5462f 1794 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1795 nextchr = UCHARAT(locinput);
1796 break;
1797 }
d6a28714
JH
1798 if (!(OP(scan) == ALNUMUTF8
1799 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1800 sayNO;
b8c5462f 1801 nextchr = UCHARAT(++locinput);
a0d0e21e 1802 break;
d6a28714 1803 case NALNUML:
b8c5462f
JH
1804 PL_reg_flags |= RF_tainted;
1805 /* FALL THROUGH */
d6a28714
JH
1806 case NALNUM:
1807 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1808 sayNO;
d6a28714
JH
1809 if (OP(scan) == NALNUM
1810 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1811 sayNO;
b8c5462f 1812 nextchr = UCHARAT(++locinput);
a0ed51b3 1813 break;
d6a28714 1814 case NALNUMLUTF8:
b8c5462f
JH
1815 PL_reg_flags |= RF_tainted;
1816 /* FALL THROUGH */
d6a28714 1817 case NALNUMUTF8:
3280af22 1818 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1819 sayNO;
b8c5462f 1820 if (nextchr & 0x80) {
d6a28714
JH
1821 if (OP(scan) == NALNUMUTF8
1822 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1823 : isALNUM_LC_utf8((U8*)locinput))
1824 {
b8c5462f 1825 sayNO;
d6a28714 1826 }
b8c5462f
JH
1827 locinput += PL_utf8skip[nextchr];
1828 nextchr = UCHARAT(locinput);
1829 break;
1830 }
d6a28714
JH
1831 if (OP(scan) == NALNUMUTF8
1832 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1833 sayNO;
76e3520e 1834 nextchr = UCHARAT(++locinput);
a0d0e21e 1835 break;
d6a28714
JH
1836 case BOUNDL:
1837 case NBOUNDL:
3280af22 1838 PL_reg_flags |= RF_tainted;
bbce6d69 1839 /* FALL THROUGH */
d6a28714
JH
1840 case BOUND:
1841 case NBOUND:
1842 /* was last char in word? */
1843 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1844 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1845 ln = isALNUM(ln);
1846 n = isALNUM(nextchr);
1847 }
1848 else {
1849 ln = isALNUM_LC(ln);
1850 n = isALNUM_LC(nextchr);
1851 }
1852 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1853 sayNO;
a0d0e21e 1854 break;
d6a28714
JH
1855 case BOUNDLUTF8:
1856 case NBOUNDLUTF8:
a0ed51b3
LW
1857 PL_reg_flags |= RF_tainted;
1858 /* FALL THROUGH */
d6a28714
JH
1859 case BOUNDUTF8:
1860 case NBOUNDUTF8:
1861 /* was last char in word? */
1862 ln = (locinput != PL_regbol)
1863 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1864 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1865 ln = isALNUM_uni(ln);
1866 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1867 }
d6a28714
JH
1868 else {
1869 ln = isALNUM_LC_uni(ln);
1870 n = isALNUM_LC_utf8((U8*)locinput);
1871 }
1872 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1873 sayNO;
a0ed51b3 1874 break;
d6a28714 1875 case SPACEL:
3280af22 1876 PL_reg_flags |= RF_tainted;
bbce6d69 1877 /* FALL THROUGH */
d6a28714
JH
1878 case SPACE:
1879 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1880 sayNO;
d6a28714
JH
1881 if (!(OP(scan) == SPACE
1882 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1883 sayNO;
76e3520e 1884 nextchr = UCHARAT(++locinput);
a0d0e21e 1885 break;
d6a28714 1886 case SPACELUTF8:
a0ed51b3
LW
1887 PL_reg_flags |= RF_tainted;
1888 /* FALL THROUGH */
d6a28714 1889 case SPACEUTF8:
a0ed51b3
LW
1890 if (!nextchr && locinput >= PL_regeol)
1891 sayNO;
1892 if (nextchr & 0x80) {
d6a28714
JH
1893 if (!(OP(scan) == SPACEUTF8
1894 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1895 : isSPACE_LC_utf8((U8*)locinput)))
1896 {
a0ed51b3 1897 sayNO;
d6a28714 1898 }
6f06b55f 1899 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1900 nextchr = UCHARAT(locinput);
1901 break;
1902 }
d6a28714
JH
1903 if (!(OP(scan) == SPACEUTF8
1904 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
1905 sayNO;
1906 nextchr = UCHARAT(++locinput);
1907 break;
d6a28714 1908 case NSPACEL:
3280af22 1909 PL_reg_flags |= RF_tainted;
bbce6d69 1910 /* FALL THROUGH */
d6a28714 1911 case NSPACE:
b8c5462f
JH
1912 if (!nextchr)
1913 sayNO;
d6a28714
JH
1914 if (OP(scan) == SPACE
1915 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1916 sayNO;
b8c5462f 1917 nextchr = UCHARAT(++locinput);
a0d0e21e 1918 break;
d6a28714 1919 case NSPACELUTF8:
a0ed51b3
LW
1920 PL_reg_flags |= RF_tainted;
1921 /* FALL THROUGH */
d6a28714 1922 case NSPACEUTF8:
b8c5462f
JH
1923 if (!nextchr)
1924 sayNO;
1925 if (nextchr & 0x80) {
d6a28714
JH
1926 if (OP(scan) == NSPACEUTF8
1927 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1928 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
1929 {
1930 sayNO;
1931 }
1932 locinput += PL_utf8skip[nextchr];
1933 nextchr = UCHARAT(locinput);
1934 break;
a0ed51b3 1935 }
d6a28714
JH
1936 if (OP(scan) == NSPACEUTF8
1937 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1938 sayNO;
76e3520e 1939 nextchr = UCHARAT(++locinput);
a0d0e21e 1940 break;
d6a28714 1941 case DIGITL:
a0ed51b3
LW
1942 PL_reg_flags |= RF_tainted;
1943 /* FALL THROUGH */
d6a28714 1944 case DIGIT:
a0ed51b3
LW
1945 if (!nextchr && locinput >= PL_regeol)
1946 sayNO;
d6a28714
JH
1947 if (!(OP(scan) == DIGIT
1948 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 1949 sayNO;
76e3520e 1950 nextchr = UCHARAT(++locinput);
a0d0e21e 1951 break;
d6a28714 1952 case DIGITLUTF8:
a0ed51b3
LW
1953 PL_reg_flags |= RF_tainted;
1954 /* FALL THROUGH */
d6a28714 1955 case DIGITUTF8:
a0ed51b3
LW
1956 if (!nextchr)
1957 sayNO;
1958 if (nextchr & 0x80) {
d6a28714
JH
1959 if (OP(scan) == NDIGITUTF8
1960 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
1961 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 1962 {
a0ed51b3 1963 sayNO;
dfe13c55 1964 }
6f06b55f 1965 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1966 nextchr = UCHARAT(locinput);
1967 break;
1968 }
d6a28714 1969 if (!isDIGIT(nextchr))
a0ed51b3
LW
1970 sayNO;
1971 nextchr = UCHARAT(++locinput);
1972 break;
d6a28714 1973 case NDIGITL:
b8c5462f
JH
1974 PL_reg_flags |= RF_tainted;
1975 /* FALL THROUGH */
d6a28714 1976 case NDIGIT:
b8c5462f
JH
1977 if (!nextchr)
1978 sayNO;
d6a28714
JH
1979 if (OP(scan) == DIGIT
1980 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 1981 sayNO;
76e3520e 1982 nextchr = UCHARAT(++locinput);
a0d0e21e 1983 break;
d6a28714 1984 case NDIGITLUTF8:
b8c5462f
JH
1985 PL_reg_flags |= RF_tainted;
1986 /* FALL THROUGH */
d6a28714 1987 case NDIGITUTF8:
b8c5462f
JH
1988 if (!nextchr && locinput >= PL_regeol)
1989 sayNO;
a0ed51b3 1990 if (nextchr & 0x80) {
d6a28714 1991 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 1992 sayNO;
6f06b55f 1993 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1994 nextchr = UCHARAT(locinput);
1995 break;
1996 }
d6a28714 1997 if (isDIGIT(nextchr))
a0ed51b3
LW
1998 sayNO;
1999 nextchr = UCHARAT(++locinput);
2000 break;
2001 case CLUMP:
dfe13c55 2002 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2003 sayNO;
6f06b55f 2004 locinput += PL_utf8skip[nextchr];
dfe13c55 2005 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2006 locinput += UTF8SKIP(locinput);
2007 if (locinput > PL_regeol)
2008 sayNO;
2009 nextchr = UCHARAT(locinput);
2010 break;
c8756f30 2011 case REFFL:
3280af22 2012 PL_reg_flags |= RF_tainted;
c8756f30 2013 /* FALL THROUGH */
c277df42 2014 case REF:
c8756f30 2015 case REFF:
c277df42 2016 n = ARG(scan); /* which paren pair */
cf93c79d 2017 ln = PL_regstartp[n];
2c2d71f5 2018 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2019 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2020 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2021 if (ln == PL_regendp[n])
a0d0e21e 2022 break;
a0ed51b3 2023
cf93c79d 2024 s = PL_bostr + ln;
a0ed51b3
LW
2025 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2026 char *l = locinput;
cf93c79d 2027 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2028 /*
2029 * Note that we can't do the "other character" lookup trick as
2030 * in the 8-bit case (no pun intended) because in Unicode we
2031 * have to map both upper and title case to lower case.
2032 */
2033 if (OP(scan) == REFF) {
2034 while (s < e) {
2035 if (l >= PL_regeol)
2036 sayNO;
dfe13c55 2037 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2038 sayNO;
2039 s += UTF8SKIP(s);
2040 l += UTF8SKIP(l);
2041 }
2042 }
2043 else {
2044 while (s < e) {
2045 if (l >= PL_regeol)
2046 sayNO;
dfe13c55 2047 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2048 sayNO;
2049 s += UTF8SKIP(s);
2050 l += UTF8SKIP(l);
2051 }
2052 }
2053 locinput = l;
2054 nextchr = UCHARAT(locinput);
2055 break;
2056 }
2057
a0d0e21e 2058 /* Inline the first character, for speed. */
76e3520e 2059 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2060 (OP(scan) == REF ||
2061 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2062 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2063 sayNO;
cf93c79d 2064 ln = PL_regendp[n] - ln;
3280af22 2065 if (locinput + ln > PL_regeol)
4633a7c4 2066 sayNO;
c8756f30
AK
2067 if (ln > 1 && (OP(scan) == REF
2068 ? memNE(s, locinput, ln)
2069 : (OP(scan) == REFF
2070 ? ibcmp(s, locinput, ln)
2071 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2072 sayNO;
a0d0e21e 2073 locinput += ln;
76e3520e 2074 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2075 break;
2076
2077 case NOTHING:
c277df42 2078 case TAIL:
a0d0e21e
LW
2079 break;
2080 case BACK:
2081 break;
c277df42
IZ
2082 case EVAL:
2083 {
2084 dSP;
533c011a 2085 OP_4tree *oop = PL_op;
3280af22
NIS
2086 COP *ocurcop = PL_curcop;
2087 SV **ocurpad = PL_curpad;
c277df42
IZ
2088 SV *ret;
2089
2090 n = ARG(scan);
533c011a
NIS
2091 PL_op = (OP_4tree*)PL_regdata->data[n];
2092 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 2093 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2094 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2095
cea2e8a9 2096 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2097 SPAGAIN;
2098 ret = POPs;
2099 PUTBACK;
2100
0f5d15d6
IZ
2101 PL_op = oop;
2102 PL_curpad = ocurpad;
2103 PL_curcop = ocurcop;
c277df42 2104 if (logical) {
0f5d15d6
IZ
2105 if (logical == 2) { /* Postponed subexpression. */
2106 regexp *re;
22c35a8c 2107 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2108 re_cc_state state;
0f5d15d6
IZ
2109 CHECKPOINT cp, lastcp;
2110
2111 if(SvROK(ret) || SvRMAGICAL(ret)) {
2112 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2113
2114 if(SvMAGICAL(sv))
2115 mg = mg_find(sv, 'r');
2116 }
2117 if (mg) {
2118 re = (regexp *)mg->mg_obj;
df0003d4 2119 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2120 }
2121 else {
2122 STRLEN len;
2123 char *t = SvPV(ret, len);
2124 PMOP pm;
2125 char *oprecomp = PL_regprecomp;
2126 I32 osize = PL_regsize;
2127 I32 onpar = PL_regnpar;
2128
2129 pm.op_pmflags = 0;
cea2e8a9 2130 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2131 if (!(SvFLAGS(ret)
2132 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2133 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2134 PL_regprecomp = oprecomp;
2135 PL_regsize = osize;
2136 PL_regnpar = onpar;
2137 }
2138 DEBUG_r(
2139 PerlIO_printf(Perl_debug_log,
2140 "Entering embedded `%s%.60s%s%s'\n",
2141 PL_colors[0],
2142 re->precomp,
2143 PL_colors[1],
2144 (strlen(re->precomp) > 60 ? "..." : ""))
2145 );
2146 state.node = next;
2147 state.prev = PL_reg_call_cc;
2148 state.cc = PL_regcc;
2149 state.re = PL_reg_re;
2150
2ab05381 2151 PL_regcc = 0;
0f5d15d6
IZ
2152
2153 cp = regcppush(0); /* Save *all* the positions. */
2154 REGCP_SET;
2155 cache_re(re);
2156 state.ss = PL_savestack_ix;
2157 *PL_reglastparen = 0;
2158 PL_reg_call_cc = &state;
2159 PL_reginput = locinput;
2c2d71f5
JH
2160
2161 /* XXXX This is too dramatic a measure... */
2162 PL_reg_maxiter = 0;
2163
0f5d15d6
IZ
2164 if (regmatch(re->program + 1)) {
2165 ReREFCNT_dec(re);
2166 regcpblow(cp);
2167 sayYES;
2168 }
2169 DEBUG_r(
2170 PerlIO_printf(Perl_debug_log,
2171 "%*s failed...\n",
2172 REPORT_CODE_OFF+PL_regindent*2, "")
2173 );
2174 ReREFCNT_dec(re);
2175 REGCP_UNWIND;
2176 regcppop();
2177 PL_reg_call_cc = state.prev;
2178 PL_regcc = state.cc;
2179 PL_reg_re = state.re;
d3790889 2180 cache_re(PL_reg_re);
2c2d71f5
JH
2181
2182 /* XXXX This is too dramatic a measure... */
2183 PL_reg_maxiter = 0;
2184
0f5d15d6
IZ
2185 sayNO;
2186 }
c277df42 2187 sw = SvTRUE(ret);
0f5d15d6 2188 logical = 0;
a0ed51b3
LW
2189 }
2190 else
3280af22 2191 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2192 break;
2193 }
a0d0e21e 2194 case OPEN:
c277df42 2195 n = ARG(scan); /* which paren pair */
3280af22
NIS
2196 PL_reg_start_tmp[n] = locinput;
2197 if (n > PL_regsize)
2198 PL_regsize = n;
a0d0e21e
LW
2199 break;
2200 case CLOSE:
c277df42 2201 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2202 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2203 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2204 if (n > *PL_reglastparen)
2205 *PL_reglastparen = n;
a0d0e21e 2206 break;
c277df42
IZ
2207 case GROUPP:
2208 n = ARG(scan); /* which paren pair */
cf93c79d 2209 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2210 break;
2211 case IFTHEN:
2c2d71f5 2212 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2213 if (sw)
2214 next = NEXTOPER(NEXTOPER(scan));
2215 else {
2216 next = scan + ARG(scan);
2217 if (OP(next) == IFTHEN) /* Fake one. */
2218 next = NEXTOPER(NEXTOPER(next));
2219 }
2220 break;
2221 case LOGICAL:
0f5d15d6 2222 logical = scan->flags;
c277df42 2223 break;
2ab05381
IZ
2224/*******************************************************************
2225 PL_regcc contains infoblock about the innermost (...)* loop, and
2226 a pointer to the next outer infoblock.
2227
2228 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2229
2230 1) After matching X, regnode for CURLYX is processed;
2231
2232 2) This regnode creates infoblock on the stack, and calls
2233 regmatch() recursively with the starting point at WHILEM node;
2234
2235 3) Each hit of WHILEM node tries to match A and Z (in the order
2236 depending on the current iteration, min/max of {min,max} and
2237 greediness). The information about where are nodes for "A"
2238 and "Z" is read from the infoblock, as is info on how many times "A"
2239 was already matched, and greediness.
2240
2241 4) After A matches, the same WHILEM node is hit again.
2242
2243 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2244 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2245 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2246 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2247 of the external loop.
2248
2249 Currently present infoblocks form a tree with a stem formed by PL_curcc
2250 and whatever it mentions via ->next, and additional attached trees
2251 corresponding to temporarily unset infoblocks as in "5" above.
2252
2253 In the following picture infoblocks for outer loop of
2254 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2255 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2256 infoblocks are drawn below the "reset" infoblock.
2257
2258 In fact in the picture below we do not show failed matches for Z and T
2259 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2260 more obvious *why* one needs to *temporary* unset infoblocks.]
2261
2262 Matched REx position InfoBlocks Comment
2263 (Y(A)*?Z)*?T x
2264 Y(A)*?Z)*?T x <- O
2265 Y (A)*?Z)*?T x <- O
2266 Y A)*?Z)*?T x <- O <- I
2267 YA )*?Z)*?T x <- O <- I
2268 YA A)*?Z)*?T x <- O <- I
2269 YAA )*?Z)*?T x <- O <- I
2270 YAA Z)*?T x <- O # Temporary unset I
2271 I
2272
2273 YAAZ Y(A)*?Z)*?T x <- O
2274 I
2275
2276 YAAZY (A)*?Z)*?T x <- O
2277 I
2278
2279 YAAZY A)*?Z)*?T x <- O <- I
2280 I
2281
2282 YAAZYA )*?Z)*?T x <- O <- I
2283 I
2284
2285 YAAZYA Z)*?T x <- O # Temporary unset I
2286 I,I
2287
2288 YAAZYAZ )*?T x <- O
2289 I,I
2290
2291 YAAZYAZ T x # Temporary unset O
2292 O
2293 I,I
2294
2295 YAAZYAZT x
2296 O
2297 I,I
2298 *******************************************************************/
a0d0e21e
LW
2299 case CURLYX: {
2300 CURCUR cc;
3280af22 2301 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2302
2303 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2304 next += ARG(next);
3280af22
NIS
2305 cc.oldcc = PL_regcc;
2306 PL_regcc = &cc;
2307 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2308 cc.cur = -1;
2309 cc.min = ARG1(scan);
2310 cc.max = ARG2(scan);
c277df42 2311 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2312 cc.next = next;
2313 cc.minmod = minmod;
2314 cc.lastloc = 0;
3280af22 2315 PL_reginput = locinput;
a0d0e21e
LW
2316 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2317 regcpblow(cp);
3280af22 2318 PL_regcc = cc.oldcc;
4633a7c4 2319 saySAME(n);
a0d0e21e
LW
2320 }
2321 /* NOT REACHED */
2322 case WHILEM: {
2323 /*
2324 * This is really hard to understand, because after we match
2325 * what we're trying to match, we must make sure the rest of
2c2d71f5 2326 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2327 * to go back UP the parse tree by recursing ever deeper. And
2328 * if it fails, we have to reset our parent's current state
2329 * that we can try again after backing off.
2330 */
2331
c277df42 2332 CHECKPOINT cp, lastcp;
3280af22 2333 CURCUR* cc = PL_regcc;
c277df42
IZ
2334 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2335
4633a7c4 2336 n = cc->cur + 1; /* how many we know we matched */
3280af22 2337 PL_reginput = locinput;
a0d0e21e 2338
c277df42
IZ
2339 DEBUG_r(
2340 PerlIO_printf(Perl_debug_log,
2341 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2342 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2343 (long)n, (long)cc->min,
2344 (long)cc->max, (long)cc)
2345 );
4633a7c4 2346
a0d0e21e
LW
2347 /* If degenerate scan matches "", assume scan done. */
2348
579cf2c3 2349 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2350 PL_regcc = cc->oldcc;
2ab05381
IZ
2351 if (PL_regcc)
2352 ln = PL_regcc->cur;
c277df42 2353 DEBUG_r(
c3464db5
DD
2354 PerlIO_printf(Perl_debug_log,
2355 "%*s empty match detected, try continuation...\n",
3280af22 2356 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2357 );
a0d0e21e 2358 if (regmatch(cc->next))
4633a7c4 2359 sayYES;
c277df42 2360 DEBUG_r(
c3464db5
DD
2361 PerlIO_printf(Perl_debug_log,
2362 "%*s failed...\n",
3280af22 2363 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2364 );
2ab05381
IZ
2365 if (PL_regcc)
2366 PL_regcc->cur = ln;
3280af22 2367 PL_regcc = cc;
4633a7c4 2368 sayNO;
a0d0e21e
LW
2369 }
2370
2371 /* First just match a string of min scans. */
2372
2373 if (n < cc->min) {
2374 cc->cur = n;
2375 cc->lastloc = locinput;
4633a7c4
LW
2376 if (regmatch(cc->scan))
2377 sayYES;
2378 cc->cur = n - 1;
c277df42
IZ
2379 cc->lastloc = lastloc;
2380 DEBUG_r(
c3464db5
DD
2381 PerlIO_printf(Perl_debug_log,
2382 "%*s failed...\n",
3280af22 2383 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2384 );
4633a7c4 2385 sayNO;
a0d0e21e
LW
2386 }
2387
2c2d71f5
JH
2388 if (scan->flags) {
2389 /* Check whether we already were at this position.
2390 Postpone detection until we know the match is not
2391 *that* much linear. */
2392 if (!PL_reg_maxiter) {
2393 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2394 PL_reg_leftiter = PL_reg_maxiter;
2395 }
2396 if (PL_reg_leftiter-- == 0) {
2397 I32 size = (PL_reg_maxiter + 7)/8;
2398 if (PL_reg_poscache) {
2399 if (PL_reg_poscache_size < size) {
2400 Renew(PL_reg_poscache, size, char);
2401 PL_reg_poscache_size = size;
2402 }
2403 Zero(PL_reg_poscache, size, char);
2404 }
2405 else {
2406 PL_reg_poscache_size = size;
2407 Newz(29, PL_reg_poscache, size, char);
2408 }
2409 DEBUG_r(
2410 PerlIO_printf(Perl_debug_log,
2411 "%sDetected a super-linear match, switching on caching%s...\n",
2412 PL_colors[4], PL_colors[5])
2413 );
2414 }
2415 if (PL_reg_leftiter < 0) {
2416 I32 o = locinput - PL_bostr, b;
2417
2418 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2419 b = o % 8;
2420 o /= 8;
2421 if (PL_reg_poscache[o] & (1<<b)) {
2422 DEBUG_r(
2423 PerlIO_printf(Perl_debug_log,
2424 "%*s already tried at this position...\n",
2425 REPORT_CODE_OFF+PL_regindent*2, "")
2426 );
2427 sayNO;
2428 }
2429 PL_reg_poscache[o] |= (1<<b);
2430 }
2431 }
2432
a0d0e21e
LW
2433 /* Prefer next over scan for minimal matching. */
2434
2435 if (cc->minmod) {
3280af22 2436 PL_regcc = cc->oldcc;
2ab05381
IZ
2437 if (PL_regcc)
2438 ln = PL_regcc->cur;
5f05dabc 2439 cp = regcppush(cc->parenfloor);
c277df42 2440 REGCP_SET;
5f05dabc 2441 if (regmatch(cc->next)) {
c277df42 2442 regcpblow(cp);
4633a7c4 2443 sayYES; /* All done. */
5f05dabc 2444 }
c277df42 2445 REGCP_UNWIND;
5f05dabc 2446 regcppop();
2ab05381
IZ
2447 if (PL_regcc)
2448 PL_regcc->cur = ln;
3280af22 2449 PL_regcc = cc;
a0d0e21e 2450
c277df42 2451 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2452 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2453 && !(PL_reg_flags & RF_warned)) {
2454 PL_reg_flags |= RF_warned;
cea2e8a9 2455 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2456 "Complex regular subexpression recursion",
2457 REG_INFTY - 1);
c277df42 2458 }
4633a7c4 2459 sayNO;
c277df42 2460 }
a687059c 2461
c277df42 2462 DEBUG_r(
c3464db5
DD
2463 PerlIO_printf(Perl_debug_log,
2464 "%*s trying longer...\n",
3280af22 2465 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2466 );
a0d0e21e 2467 /* Try scanning more and see if it helps. */
3280af22 2468 PL_reginput = locinput;
a0d0e21e
LW
2469 cc->cur = n;
2470 cc->lastloc = locinput;
5f05dabc 2471 cp = regcppush(cc->parenfloor);
c277df42 2472 REGCP_SET;
5f05dabc 2473 if (regmatch(cc->scan)) {
c277df42 2474 regcpblow(cp);
4633a7c4 2475 sayYES;
5f05dabc 2476 }
c277df42 2477 DEBUG_r(
c3464db5
DD
2478 PerlIO_printf(Perl_debug_log,
2479 "%*s failed...\n",
3280af22 2480 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2481 );
2482 REGCP_UNWIND;
5f05dabc 2483 regcppop();
4633a7c4 2484 cc->cur = n - 1;
c277df42 2485 cc->lastloc = lastloc;
4633a7c4 2486 sayNO;
a0d0e21e
LW
2487 }
2488
2489 /* Prefer scan over next for maximal matching. */
2490
2491 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2492 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2493 cc->cur = n;
2494 cc->lastloc = locinput;
c277df42 2495 REGCP_SET;
5f05dabc 2496 if (regmatch(cc->scan)) {
c277df42 2497 regcpblow(cp);
4633a7c4 2498 sayYES;
5f05dabc 2499 }
c277df42 2500 REGCP_UNWIND;
a0d0e21e 2501 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2502 PL_reginput = locinput;
c277df42 2503 DEBUG_r(
c3464db5
DD
2504 PerlIO_printf(Perl_debug_log,
2505 "%*s failed, try continuation...\n",
3280af22 2506 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2507 );
2508 }
599cee73
PM
2509 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2510 && !(PL_reg_flags & RF_warned)) {
3280af22 2511 PL_reg_flags |= RF_warned;
cea2e8a9 2512 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2513 "Complex regular subexpression recursion",
2514 REG_INFTY - 1);
a0d0e21e
LW
2515 }
2516
2517 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2518 PL_regcc = cc->oldcc;
2ab05381
IZ
2519 if (PL_regcc)
2520 ln = PL_regcc->cur;
a0d0e21e 2521 if (regmatch(cc->next))
4633a7c4 2522 sayYES;
c277df42 2523 DEBUG_r(
c3464db5 2524 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 2525 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2526 );
2ab05381
IZ
2527 if (PL_regcc)
2528 PL_regcc->cur = ln;
3280af22 2529 PL_regcc = cc;
4633a7c4 2530 cc->cur = n - 1;
c277df42 2531 cc->lastloc = lastloc;
4633a7c4 2532 sayNO;
a0d0e21e
LW
2533 }
2534 /* NOT REACHED */
c277df42
IZ
2535 case BRANCHJ:
2536 next = scan + ARG(scan);
2537 if (next == scan)
2538 next = NULL;
2539 inner = NEXTOPER(NEXTOPER(scan));
2540 goto do_branch;
2541 case BRANCH:
2542 inner = NEXTOPER(scan);
2543 do_branch:
2544 {
2545 CHECKPOINT lastcp;
2546 c1 = OP(scan);
2547 if (OP(next) != c1) /* No choice. */
2548 next = inner; /* Avoid recursion. */
a0d0e21e 2549 else {
3280af22 2550 int lastparen = *PL_reglastparen;
c277df42
IZ
2551
2552 REGCP_SET;
a0d0e21e 2553 do {
3280af22 2554 PL_reginput = locinput;
c277df42 2555 if (regmatch(inner))
4633a7c4 2556 sayYES;
c277df42 2557 REGCP_UNWIND;
3280af22 2558 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2559 PL_regendp[n] = -1;
3280af22 2560 *PL_reglastparen = n;
c277df42 2561 scan = next;
a0d0e21e 2562 /*SUPPRESS 560*/
c277df42
IZ
2563 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2564 next += n;
a0d0e21e 2565 else
c277df42 2566 next = NULL;
c277df42
IZ
2567 inner = NEXTOPER(scan);
2568 if (c1 == BRANCHJ) {
2569 inner = NEXTOPER(inner);
2570 }
2571 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2572 sayNO;
a0d0e21e 2573 /* NOTREACHED */
a687059c 2574 }
a0d0e21e
LW
2575 }
2576 break;
2577 case MINMOD:
2578 minmod = 1;
2579 break;
c277df42
IZ
2580 case CURLYM:
2581 {
00db4c45 2582 I32 l = 0;
c277df42
IZ
2583 CHECKPOINT lastcp;
2584
2585 /* We suppose that the next guy does not need
2586 backtracking: in particular, it is of constant length,
2587 and has no parenths to influence future backrefs. */
2588 ln = ARG1(scan); /* min to match */
2589 n = ARG2(scan); /* max to match */
c277df42
IZ
2590 paren = scan->flags;
2591 if (paren) {
3280af22
NIS
2592 if (paren > PL_regsize)
2593 PL_regsize = paren;
2594 if (paren > *PL_reglastparen)
2595 *PL_reglastparen = paren;
c277df42 2596 }
dc45a647 2597 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2598 if (paren)
2599 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2600 PL_reginput = locinput;
c277df42
IZ
2601 if (minmod) {
2602 minmod = 0;
2603 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2604 sayNO;
5f4b28b2 2605 if (ln && l == 0 && n >= ln
c277df42
IZ
2606 /* In fact, this is tricky. If paren, then the
2607 fact that we did/didnot match may influence
2608 future execution. */
2609 && !(paren && ln == 0))
2610 ln = n;
3280af22 2611 locinput = PL_reginput;
22c35a8c 2612 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2613 c1 = (U8)*STRING(next);
c277df42 2614 if (OP(next) == EXACTF)
22c35a8c 2615 c2 = PL_fold[c1];
c277df42 2616 else if (OP(next) == EXACTFL)
22c35a8c 2617 c2 = PL_fold_locale[c1];
c277df42
IZ
2618 else
2619 c2 = c1;
a0ed51b3
LW
2620 }
2621 else
c277df42
IZ
2622 c1 = c2 = -1000;
2623 REGCP_SET;
5f4b28b2 2624 /* This may be improved if l == 0. */
c277df42
IZ
2625 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2626 /* If it could work, try it. */
2627 if (c1 == -1000 ||
3280af22
NIS
2628 UCHARAT(PL_reginput) == c1 ||
2629 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2630 {
2631 if (paren) {
2632 if (n) {
cf93c79d
IZ
2633 PL_regstartp[paren] =
2634 HOPc(PL_reginput, -l) - PL_bostr;
2635 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2636 }
2637 else
cf93c79d 2638 PL_regendp[paren] = -1;
c277df42
IZ
2639 }
2640 if (regmatch(next))
2641 sayYES;
2642 REGCP_UNWIND;
2643 }
2644 /* Couldn't or didn't -- move forward. */
3280af22 2645 PL_reginput = locinput;
c277df42
IZ
2646 if (regrepeat_hard(scan, 1, &l)) {
2647 ln++;
3280af22 2648 locinput = PL_reginput;
c277df42
IZ
2649 }
2650 else
2651 sayNO;
2652 }
a0ed51b3
LW
2653 }
2654 else {
c277df42
IZ
2655 n = regrepeat_hard(scan, n, &l);
2656 if (n != 0 && l == 0
2657 /* In fact, this is tricky. If paren, then the
2658 fact that we did/didnot match may influence
2659 future execution. */
2660 && !(paren && ln == 0))
2661 ln = n;
3280af22 2662 locinput = PL_reginput;
c277df42 2663 DEBUG_r(
5c0ca799
GS
2664 PerlIO_printf(Perl_debug_log,
2665 "%*s matched %ld times, len=%ld...\n",
3280af22 2666 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
2667 );
2668 if (n >= ln) {
22c35a8c 2669 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2670 c1 = (U8)*STRING(next);
c277df42 2671 if (OP(next) == EXACTF)
22c35a8c 2672 c2 = PL_fold[c1];
c277df42 2673 else if (OP(next) == EXACTFL)
22c35a8c 2674 c2 = PL_fold_locale[c1];
c277df42
IZ
2675 else
2676 c2 = c1;
a0ed51b3
LW
2677 }
2678 else
c277df42
IZ
2679 c1 = c2 = -1000;
2680 }
2681 REGCP_SET;
2682 while (n >= ln) {
2683 /* If it could work, try it. */
2684 if (c1 == -1000 ||
3280af22
NIS
2685 UCHARAT(PL_reginput) == c1 ||
2686 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2687 {
2688 DEBUG_r(
c3464db5
DD
2689 PerlIO_printf(Perl_debug_log,
2690 "%*s trying tail with n=%ld...\n",
3280af22 2691 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
2692 );
2693 if (paren) {
2694 if (n) {
cf93c79d
IZ
2695 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2696 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2697 }
a0ed51b3 2698 else
cf93c79d 2699 PL_regendp[paren] = -1;
c277df42 2700 }
a0ed51b3
LW
2701 if (regmatch(next))
2702 sayYES;
2703 REGCP_UNWIND;
2704 }
c277df42
IZ
2705 /* Couldn't or didn't -- back up. */
2706 n--;
dfe13c55 2707 locinput = HOPc(locinput, -l);
3280af22 2708 PL_reginput = locinput;
c277df42
IZ
2709 }
2710 }
2711 sayNO;
2712 break;
2713 }
2714 case CURLYN:
2715 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2716 if (paren > PL_regsize)
2717 PL_regsize = paren;
2718 if (paren > *PL_reglastparen)
2719 *PL_reglastparen = paren;
c277df42
IZ
2720 ln = ARG1(scan); /* min to match */
2721 n = ARG2(scan); /* max to match */
dc45a647 2722 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2723 goto repeat;
a0d0e21e 2724 case CURLY:
c277df42 2725 paren = 0;
a0d0e21e
LW
2726 ln = ARG1(scan); /* min to match */
2727 n = ARG2(scan); /* max to match */
dc45a647 2728 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2729 goto repeat;
2730 case STAR:
2731 ln = 0;
c277df42 2732 n = REG_INFTY;
a0d0e21e 2733 scan = NEXTOPER(scan);
c277df42 2734 paren = 0;
a0d0e21e
LW
2735 goto repeat;
2736 case PLUS:
c277df42
IZ
2737 ln = 1;
2738 n = REG_INFTY;
2739 scan = NEXTOPER(scan);
2740 paren = 0;
2741 repeat:
a0d0e21e
LW
2742 /*
2743 * Lookahead to avoid useless match attempts
2744 * when we know what character comes next.
2745 */
22c35a8c 2746 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2747 c1 = (U8)*STRING(next);
bbce6d69 2748 if (OP(next) == EXACTF)
22c35a8c 2749 c2 = PL_fold[c1];
bbce6d69 2750 else if (OP(next) == EXACTFL)
22c35a8c 2751 c2 = PL_fold_locale[c1];
bbce6d69
PP
2752 else
2753 c2 = c1;
2754 }
a0d0e21e 2755 else
bbce6d69 2756 c1 = c2 = -1000;
3280af22 2757 PL_reginput = locinput;
a0d0e21e 2758 if (minmod) {
c277df42 2759 CHECKPOINT lastcp;
a0d0e21e
LW
2760 minmod = 0;
2761 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2762 sayNO;
a0ed51b3 2763 locinput = PL_reginput;
c277df42 2764 REGCP_SET;
0fe9bf95
IZ
2765 if (c1 != -1000) {
2766 char *e = locinput + n - ln; /* Should not check after this */
2767 char *old = locinput;
2768
2769 if (e >= PL_regeol || (n == REG_INFTY))
2770 e = PL_regeol - 1;
2771 while (1) {
2772 /* Find place 'next' could work */
2773 if (c1 == c2) {
2774 while (locinput <= e && *locinput != c1)
2775 locinput++;
2776 } else {
2777 while (locinput <= e
2778 && *locinput != c1
2779 && *locinput != c2)
2780 locinput++;
2781 }
2782 if (locinput > e)
2783 sayNO;
2784 /* PL_reginput == old now */
2785 if (locinput != old) {
2786 ln = 1; /* Did some */
2787 if (regrepeat(scan, locinput - old) <
2788 locinput - old)
2789 sayNO;
2790 }
2791 /* PL_reginput == locinput now */
2792 if (paren) {
2793 if (ln) {
cf93c79d
IZ
2794 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2795 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2796 }
2797 else
cf93c79d 2798 PL_regendp[paren] = -1;
0fe9bf95
IZ
2799 }
2800 if (regmatch(next))
2801 sayYES;
2802 PL_reginput = locinput; /* Could be reset... */
2803 REGCP_UNWIND;
2804 /* Couldn't or didn't -- move forward. */
2805 old = locinput++;
2806 }
2807 }
2808 else
c277df42 2809 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2810 /* If it could work, try it. */
bbce6d69 2811 if (c1 == -1000 ||
3280af22
NIS
2812 UCHARAT(PL_reginput) == c1 ||
2813 UCHARAT(PL_reginput) == c2)
bbce6d69 2814 {
c277df42
IZ
2815 if (paren) {
2816 if (n) {
cf93c79d
IZ
2817 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2818 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2819 }
2820 else
cf93c79d 2821 PL_regendp[paren] = -1;
c277df42 2822 }
a0d0e21e 2823 if (regmatch(next))
4633a7c4 2824 sayYES;
c277df42 2825 REGCP_UNWIND;
bbce6d69 2826 }
c277df42 2827 /* Couldn't or didn't -- move forward. */
a0ed51b3 2828 PL_reginput = locinput;
a0d0e21e
LW
2829 if (regrepeat(scan, 1)) {
2830 ln++;
a0ed51b3
LW
2831 locinput = PL_reginput;
2832 }
2833 else
4633a7c4 2834 sayNO;
a0d0e21e
LW
2835 }
2836 }
2837 else {
c277df42 2838 CHECKPOINT lastcp;
a0d0e21e 2839 n = regrepeat(scan, n);
a0ed51b3 2840 locinput = PL_reginput;
22c35a8c 2841 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2842 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2843 ln = n; /* why back off? */
c277df42
IZ
2844 REGCP_SET;
2845 if (paren) {
2846 while (n >= ln) {
2847 /* If it could work, try it. */
2848 if (c1 == -1000 ||
3280af22
NIS
2849 UCHARAT(PL_reginput) == c1 ||
2850 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2851 {
2852 if (paren && n) {
2853 if (n) {
cf93c79d
IZ
2854 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2855 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2856 }
2857 else
cf93c79d 2858 PL_regendp[paren] = -1;
c277df42
IZ
2859 }
2860 if (regmatch(next))
2861 sayYES;
2862 REGCP_UNWIND;
2863 }
2864 /* Couldn't or didn't -- back up. */
2865 n--;
dfe13c55 2866 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2867 }
a0ed51b3
LW
2868 }
2869 else {
c277df42
IZ
2870 while (n >= ln) {
2871 /* If it could work, try it. */
2872 if (c1 == -1000 ||
3280af22
NIS
2873 UCHARAT(PL_reginput) == c1 ||
2874 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2875 {
2876 if (regmatch(next))
2877 sayYES;
2878 REGCP_UNWIND;
2879 }
2880 /* Couldn't or didn't -- back up. */
2881 n--;
dfe13c55 2882 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2883 }
a0d0e21e
LW
2884 }
2885 }
4633a7c4 2886 sayNO;
c277df42 2887 break;
a0d0e21e 2888 case END:
0f5d15d6
IZ
2889 if (PL_reg_call_cc) {
2890 re_cc_state *cur_call_cc = PL_reg_call_cc;
2891 CURCUR *cctmp = PL_regcc;
2892 regexp *re = PL_reg_re;
2893 CHECKPOINT cp, lastcp;
2894
2895 cp = regcppush(0); /* Save *all* the positions. */
2896 REGCP_SET;
2897 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2898 the caller. */
2899 PL_reginput = locinput; /* Make position available to
2900 the callcc. */
2901 cache_re(PL_reg_call_cc->re);
2902 PL_regcc = PL_reg_call_cc->cc;
2903 PL_reg_call_cc = PL_reg_call_cc->prev;
2904 if (regmatch(cur_call_cc->node)) {
2905 PL_reg_call_cc = cur_call_cc;
2906 regcpblow(cp);
2907 sayYES;
2908 }
2909 REGCP_UNWIND;
2910 regcppop();
2911 PL_reg_call_cc = cur_call_cc;
2912 PL_regcc = cctmp;
2913 PL_reg_re = re;
2914 cache_re(re);
2915
2916 DEBUG_r(
2917 PerlIO_printf(Perl_debug_log,
2918 "%*s continuation failed...\n",
2919 REPORT_CODE_OFF+PL_regindent*2, "")
2920 );
2921 sayNO;
2922 }
3280af22 2923 if (locinput < PL_regtill)
7e5428c5
IZ
2924 sayNO; /* Cannot match: too short. */
2925 /* Fall through */
2926 case SUCCEED:
3280af22 2927 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2928 sayYES; /* Success! */
c277df42
IZ
2929 case SUSPEND:
2930 n = 1;
9fe1d20c 2931 PL_reginput = locinput;
c277df42 2932 goto do_ifmatch;
a0d0e21e 2933 case UNLESSM:
c277df42 2934 n = 0;
a0ed51b3 2935 if (scan->flags) {
0fe9bf95
IZ
2936 if (UTF) { /* XXXX This is absolutely
2937 broken, we read before
2938 start of string. */
2939 s = HOPMAYBEc(locinput, -scan->flags);
2940 if (!s)
2941 goto say_yes;
2942 PL_reginput = s;
2943 }
2944 else {
2945 if (locinput < PL_bostr + scan->flags)
2946 goto say_yes;
2947 PL_reginput = locinput - scan->flags;
2948 goto do_ifmatch;
2949 }
a0ed51b3
LW
2950 }
2951 else
2952 PL_reginput = locinput;
c277df42
IZ
2953 goto do_ifmatch;
2954 case IFMATCH:
2955 n = 1;
a0ed51b3 2956 if (scan->flags) {
0fe9bf95
IZ
2957 if (UTF) { /* XXXX This is absolutely
2958 broken, we read before
2959 start of string. */
2960 s = HOPMAYBEc(locinput, -scan->flags);
2961 if (!s || s < PL_bostr)
2962 goto say_no;
2963 PL_reginput = s;
2964 }
2965 else {
2966 if (locinput < PL_bostr + scan->flags)
2967 goto say_no;
2968 PL_reginput = locinput - scan->flags;
2969 goto do_ifmatch;
2970 }
a0ed51b3
LW
2971 }
2972 else
2973 PL_reginput = locinput;
2974
c277df42 2975 do_ifmatch:
c277df42
IZ
2976 inner = NEXTOPER(NEXTOPER(scan));
2977 if (regmatch(inner) != n) {
2978 say_no:
2979 if (logical) {
2980 logical = 0;
2981 sw = 0;
2982 goto do_longjump;
a0ed51b3
LW
2983 }
2984 else
c277df42
IZ
2985 sayNO;
2986 }
2987 say_yes:
2988 if (logical) {
2989 logical = 0;
2990 sw = 1;
2991 }
fe44a5e8 2992 if (OP(scan) == SUSPEND) {
3280af22 2993 locinput = PL_reginput;
565764a8 2994 nextchr = UCHARAT(locinput);
fe44a5e8 2995 }
c277df42
IZ
2996 /* FALL THROUGH. */
2997 case LONGJMP:
2998 do_longjump:
2999 next = scan + ARG(scan);
3000 if (next == scan)
3001 next = NULL;
a0d0e21e
LW
3002 break;
3003 default:
c030ccd9 3004 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 3005 (unsigned long)scan, OP(scan));
cea2e8a9 3006 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3007 }
a0d0e21e
LW
3008 scan = next;
3009 }
a687059c 3010
a0d0e21e
LW
3011 /*
3012 * We get here only if there's trouble -- normally "case END" is
3013 * the terminating point.
3014 */
cea2e8a9 3015 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3016 /*NOTREACHED*/
4633a7c4
LW
3017 sayNO;
3018
3019yes:
3020#ifdef DEBUGGING
3280af22 3021 PL_regindent--;
4633a7c4
LW
3022#endif
3023 return 1;
3024
3025no:
3026#ifdef DEBUGGING
3280af22 3027 PL_regindent--;
4633a7c4 3028#endif
a0d0e21e 3029 return 0;
a687059c
LW
3030}
3031
3032/*
3033 - regrepeat - repeatedly match something simple, report how many
3034 */
3035/*
3036 * [This routine now assumes that it will only match on things of length 1.
3037 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3038 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3039 */
76e3520e 3040STATIC I32
cea2e8a9 3041S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3042{
5c0ca799 3043 dTHR;
a0d0e21e
LW
3044 register char *scan;
3045 register char *opnd;
3046 register I32 c;
3280af22 3047 register char *loceol = PL_regeol;
a0ed51b3 3048 register I32 hardcount = 0;
a0d0e21e 3049
3280af22 3050 scan = PL_reginput;
c277df42 3051 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3052 loceol = scan + max;
a0d0e21e 3053 switch (OP(p)) {
22c35a8c 3054 case REG_ANY:
a0d0e21e
LW
3055 while (scan < loceol && *scan != '\n')
3056 scan++;
3057 break;
3058 case SANY:
3059 scan = loceol;
3060 break;
a0ed51b3
LW
3061 case ANYUTF8:
3062 loceol = PL_regeol;
3063 while (scan < loceol && *scan != '\n') {
3064 scan += UTF8SKIP(scan);
3065 hardcount++;
3066 }
3067 break;
3068 case SANYUTF8:
3069 loceol = PL_regeol;
3070 while (scan < loceol) {
3071 scan += UTF8SKIP(scan);
3072 hardcount++;
3073 }
3074 break;
bbce6d69 3075 case EXACT: /* length of string is 1 */
cd439c50 3076 c = (U8)*STRING(p);
bbce6d69
PP
3077 while (scan < loceol && UCHARAT(scan) == c)
3078 scan++;
3079 break;
3080 case EXACTF: /* length of string is 1 */
cd439c50 3081 c = (U8)*STRING(p);
bbce6d69 3082 while (scan < loceol &&
22c35a8c 3083 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69
PP
3084 scan++;
3085 break;
3086 case EXACTFL: /* length of string is 1 */
3280af22 3087 PL_reg_flags |= RF_tainted;
cd439c50 3088 c = (U8)*STRING(p);
bbce6d69 3089 while (scan < loceol &&
22c35a8c 3090 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3091 scan++;
3092 break;
a0ed51b3
LW
3093 case ANYOFUTF8:
3094 loceol = PL_regeol;
3095 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3096 scan += UTF8SKIP(scan);
3097 hardcount++;
3098 }
3099 break;
a0d0e21e 3100 case ANYOF:
cd439c50 3101 opnd = MASK(p);
ae5c130c 3102 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 3103 scan++;
a0d0e21e
LW
3104 break;
3105 case ALNUM:
3106 while (scan < loceol && isALNUM(*scan))
3107 scan++;
3108 break;
a0ed51b3
LW
3109 case ALNUMUTF8:
3110 loceol = PL_regeol;
dfe13c55 3111 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3112 scan += UTF8SKIP(scan);
3113 hardcount++;
3114 }
3115 break;
bbce6d69 3116 case ALNUML:
3280af22 3117 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3118 while (scan < loceol && isALNUM_LC(*scan))
3119 scan++;
3120 break;
a0ed51b3
LW
3121 case ALNUMLUTF8:
3122 PL_reg_flags |= RF_tainted;
3123 loceol = PL_regeol;
dfe13c55 3124 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3125 scan += UTF8SKIP(scan);
3126 hardcount++;
3127 }
3128 break;
3129 break;
a0d0e21e
LW
3130 case NALNUM:
3131 while (scan < loceol && !isALNUM(*scan))
3132 scan++;
3133 break;
a0ed51b3
LW
3134 case NALNUMUTF8:
3135 loceol = PL_regeol;
dfe13c55 3136 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3137 scan += UTF8SKIP(scan);
3138 hardcount++;
3139 }
3140 break;
bbce6d69 3141 case NALNUML:
3280af22 3142 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3143 while (scan < loceol && !isALNUM_LC(*scan))
3144 scan++;
3145 break;
a0ed51b3
LW
3146 case NALNUMLUTF8:
3147 PL_reg_flags |= RF_tainted;
3148 loceol = PL_regeol;
dfe13c55 3149 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3150 scan += UTF8SKIP(scan);
3151 hardcount++;
3152 }
3153 break;
a0d0e21e
LW
3154 case SPACE:
3155 while (scan < loceol && isSPACE(*scan))
3156 scan++;
3157 break;
a0ed51b3
LW
3158 case SPACEUTF8:
3159 loceol = PL_regeol;
dfe13c55 3160 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3161 scan += UTF8SKIP(scan);
3162 hardcount++;
3163 }
3164 break;
bbce6d69 3165 case SPACEL:
3280af22 3166 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3167 while (scan < loceol && isSPACE_LC(*scan))
3168 scan++;
3169 break;
a0ed51b3
LW
3170 case SPACELUTF8:
3171 PL_reg_flags |= RF_tainted;
3172 loceol = PL_regeol;
dfe13c55 3173 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3174 scan += UTF8SKIP(scan);
3175 hardcount++;
3176 }
3177 break;
a0d0e21e
LW
3178 case NSPACE:
3179 while (scan < loceol && !isSPACE(*scan))
3180 scan++;
3181 break;
a0ed51b3
LW
3182 case NSPACEUTF8:
3183 loceol = PL_regeol;
dfe13c55 3184 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3185 scan += UTF8SKIP(scan);
3186 hardcount++;
3187 }
3188 break;
bbce6d69 3189 case NSPACEL:
3280af22 3190 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3191 while (scan < loceol && !isSPACE_LC(*scan))
3192 scan++;
3193 break;
a0ed51b3
LW
3194 case NSPACELUTF8:
3195 PL_reg_flags |= RF_tainted;
3196 loceol = PL_regeol;
dfe13c55 3197 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3198 scan += UTF8SKIP(scan);
3199 hardcount++;
3200 }
3201 break;
a0d0e21e
LW
3202 case DIGIT:
3203 while (scan < loceol && isDIGIT(*scan))
3204 scan++;
3205 break;
a0ed51b3
LW
3206 case DIGITUTF8:
3207 loceol = PL_regeol;
dfe13c55 3208 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3209 scan += UTF8SKIP(scan);
3210 hardcount++;
3211 }
3212 break;
3213 break;
a0d0e21e
LW
3214 case NDIGIT:
3215 while (scan < loceol && !isDIGIT(*scan))
3216 scan++;
3217 break;
a0ed51b3
LW
3218 case NDIGITUTF8:
3219 loceol = PL_regeol;
dfe13c55 3220 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3221 scan += UTF8SKIP(scan);
3222 hardcount++;
3223 }
3224 break;
a0d0e21e
LW
3225 default: /* Called on something of 0 width. */
3226 break; /* So match right here or not at all. */
3227 }
a687059c 3228
a0ed51b3
LW
3229 if (hardcount)
3230 c = hardcount;
3231 else
3232 c = scan - PL_reginput;
3280af22 3233 PL_reginput = scan;
a687059c 3234
c277df42
IZ
3235 DEBUG_r(
3236 {
3237 SV *prop = sv_newmortal();
3238
3239 regprop(prop, p);
3240 PerlIO_printf(Perl_debug_log,
3241 "%*s %s can match %ld times out of %ld...\n",
3242 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
3243 });
3244
a0d0e21e 3245 return(c);
a687059c
LW
3246}
3247
3248/*
c277df42
IZ
3249 - regrepeat_hard - repeatedly match something, report total lenth and length
3250 *
3251 * The repeater is supposed to have constant length.
3252 */
3253
76e3520e 3254STATIC I32
cea2e8a9 3255S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3256{
5c0ca799 3257 dTHR;
c277df42
IZ
3258 register char *scan;
3259 register char *start;
3280af22 3260 register char *loceol = PL_regeol;
a0ed51b3 3261 I32 l = 0;
708e3b05 3262 I32 count = 0, res = 1;
a0ed51b3
LW
3263
3264 if (!max)
3265 return 0;
c277df42 3266
3280af22 3267 start = PL_reginput;
a0ed51b3 3268 if (UTF) {
708e3b05 3269 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3270 if (!count++) {
3271 l = 0;
3272 while (start < PL_reginput) {
3273 l++;
3274 start += UTF8SKIP(start);
3275 }
3276 *lp = l;
3277 if (l == 0)
3278 return max;
3279 }
3280 if (count == max)
3281 return count;
3282 }
3283 }
3284 else {
708e3b05 3285 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3286 if (!count++) {
3287 *lp = l = PL_reginput - start;
3288 if (max != REG_INFTY && l*max < loceol - scan)
3289 loceol = scan + l*max;
3290 if (l == 0)
3291 return max;
c277df42
IZ
3292 }
3293 }
3294 }
708e3b05 3295 if (!res)
3280af22 3296 PL_reginput = scan;
c277df42 3297
a0ed51b3 3298 return count;
c277df42
IZ
3299}
3300
3301/*
cb8d8820 3302 - reginclass - determine if a character falls into a character class
bbce6d69
PP
3303 */
3304
76e3520e 3305STATIC bool
cea2e8a9 3306S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 3307{
5c0ca799 3308 dTHR;
b8c5462f 3309 char flags = ANYOF_FLAGS(p);
bbce6d69
PP
3310 bool match = FALSE;
3311
3312 c &= 0xFF;
b8c5462f 3313 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69
PP
3314 match = TRUE;
3315 else if (flags & ANYOF_FOLD) {
3316 I32 cf;
3317 if (flags & ANYOF_LOCALE) {
3280af22 3318 PL_reg_flags |= RF_tainted;
22c35a8c 3319 cf = PL_fold_locale[c];
bbce6d69
PP
3320 }
3321 else
22c35a8c 3322 cf = PL_fold[c];
b8c5462f 3323 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69
PP
3324 match = TRUE;
3325 }
3326
b8c5462f 3327 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3328 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3329 if (
3330 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3331 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3332 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3333 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3334 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3335 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3336 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3337 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3338 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3339 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3340 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3341 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3342 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3343 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3344 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3345 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3346 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3347 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3348 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3349 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3350 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3351 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3352 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3353 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3354 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3355 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3356 ) /* How's that for a conditional? */
bbce6d69
PP
3357 {
3358 match = TRUE;
3359 }
3360 }
3361
ae5c130c 3362 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69
PP
3363}
3364
a0ed51b3 3365STATIC bool
cea2e8a9 3366S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3367{
3368 dTHR;
a0ed51b3
LW
3369 char flags = ARG1(f);
3370 bool match = FALSE;
3371 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3372
3373 if (swash_fetch(sv, p))
3374 match = TRUE;
3375 else if (flags & ANYOF_FOLD) {
3376 I32 cf;
dfe13c55 3377 U8 tmpbuf[10];
a0ed51b3
LW
3378 if (flags & ANYOF_LOCALE) {
3379 PL_reg_flags |= RF_tainted;
3380 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3381 }
3382 else
3383 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3384 if (swash_fetch(sv, tmpbuf))
3385 match = TRUE;
3386 }
3387
b8c5462f 3388 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3389
3390 return (flags & ANYOF_INVERT) ? !match : match;
3391}
161b471a 3392
dfe13c55 3393STATIC U8 *
cea2e8a9 3394S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3395{
3396 dTHR;
a0ed51b3
LW
3397 if (off >= 0) {
3398 while (off-- && s < (U8*)PL_regeol)
3399 s += UTF8SKIP(s);
3400 }
3401 else {
3402 while (off++) {
3403 if (s > (U8*)PL_bostr) {
3404 s--;
3405 if (*s & 0x80) {
3406 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3407 s--;
3408 } /* XXX could check well-formedness here */
3409 }
3410 }
3411 }
3412 return s;
3413}
161b471a 3414
dfe13c55 3415STATIC U8 *
cea2e8a9 3416S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3417{
c485e607 3418 dTHR;
a0ed51b3
LW
3419 if (off >= 0) {
3420 while (off-- && s < (U8*)PL_regeol)
3421 s += UTF8SKIP(s);
3422 if (off >= 0)
3423 return 0;
3424 }
3425 else {
3426 while (off++) {
3427 if (s > (U8*)PL_bostr) {
3428 s--;
3429 if (*s & 0x80) {
3430 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3431 s--;
3432 } /* XXX could check well-formedness here */
3433 }
3434 else
3435 break;
3436 }
3437 if (off <= 0)
3438 return 0;
3439 }
3440 return s;
3441}
51371543
GS
3442
3443#ifdef PERL_OBJECT
3444#define NO_XSLOCKS
3445#include "XSUB.h"
3446#endif
3447
3448static void
3449restore_pos(pTHXo_ void *arg)
3450{
3451 dTHR;
3452 if (PL_reg_eval_set) {
3453 if (PL_reg_oldsaved) {
3454 PL_reg_re->subbeg = PL_reg_oldsaved;
3455 PL_reg_re->sublen = PL_reg_oldsavedlen;
3456 RX_MATCH_COPIED_on(PL_reg_re);
3457 }
3458 PL_reg_magic->mg_len = PL_reg_oldpos;
3459 PL_reg_eval_set = 0;
3460 PL_curpm = PL_reg_oldcurpm;
3461 }
3462}
3463