This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
clean up some stray "global" symbols
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
28# ifndef DEBUGGING
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
d06ea78c
GS
38/* *These* symbols are masked to allow static link. */
39# define Perl_pregexec my_pregexec
d88dccdf 40# define Perl_reginitcolors my_reginitcolors
56953603
IZ
41#endif
42
f0fcb552 43/*SUPPRESS 112*/
a687059c 44/*
e50aee73 45 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
46 *
47 * Copyright (c) 1986 by University of Toronto.
48 * Written by Henry Spencer. Not derived from licensed software.
49 *
50 * Permission is granted to anyone to use this software for any
51 * purpose on any computer system, and to redistribute it freely,
52 * subject to the following restrictions:
53 *
54 * 1. The author is not responsible for the consequences of use of
55 * this software, no matter how awful, even if they arise
56 * from defects in it.
57 *
58 * 2. The origin of this software must not be misrepresented, either
59 * by explicit claim or by omission.
60 *
61 * 3. Altered versions must be plainly marked as such, and must not
62 * be misrepresented as being the original software.
63 *
64 **** Alterations to Henry's code are...
65 ****
4eb8286e 66 **** Copyright (c) 1991-1999, Larry Wall
a687059c 67 ****
9ef589d8
LW
68 **** You may distribute under the terms of either the GNU General Public
69 **** License or the Artistic License, as specified in the README file.
a687059c
LW
70 *
71 * Beware that some of this code is subtly aware of the way operator
72 * precedence is structured in regular expressions. Serious changes in
73 * regular-expression syntax might require a total rethink.
74 */
75#include "EXTERN.h"
76#include "perl.h"
0f5d15d6 77
a687059c
LW
78#include "regcomp.h"
79
c277df42
IZ
80#define RF_tainted 1 /* tainted information used? */
81#define RF_warned 2 /* warned about big count? */
ce862d02 82#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
83#define RF_utf8 8 /* String contains multibyte chars? */
84
85#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
86
87#define RS_init 1 /* eval environment created */
88#define RS_set 2 /* replsv value is set */
c277df42 89
a687059c
LW
90#ifndef STATIC
91#define STATIC static
92#endif
93
76e3520e 94#ifndef PERL_OBJECT
a0d0e21e
LW
95typedef I32 CHECKPOINT;
96
c277df42
IZ
97/*
98 * Forwards.
99 */
100
20ce7b12
GS
101static I32 regmatch (regnode *prog);
102static I32 regrepeat (regnode *p, I32 max);
103static I32 regrepeat_hard (regnode *p, I32 max, I32 *lp);
104static I32 regtry (regexp *prog, char *startpos);
105
106static bool reginclass (char *p, I32 c);
107static bool reginclassutf8 (regnode *f, U8* p);
108static CHECKPOINT regcppush (I32 parenfloor);
109static char * regcppop (void);
110static char * regcp_set_to (I32 ss);
111static void cache_re (regexp *prog);
112static void restore_pos (void *arg);
76e3520e 113#endif
a0ed51b3 114
ae5c130c 115#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0ed51b3
LW
116#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
117
118#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
119#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
120
dfe13c55 121#ifndef PERL_OBJECT
20ce7b12
GS
122static U8 * reghop (U8 *pos, I32 off);
123static U8 * reghopmaybe (U8 *pos, I32 off);
dfe13c55
GS
124#endif
125#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
126#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
127#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
128#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
129#define HOPc(pos,off) ((char*)HOP(pos,off))
130#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 131
76e3520e 132STATIC CHECKPOINT
8ac85365 133regcppush(I32 parenfloor)
a0d0e21e 134{
11343788 135 dTHR;
3280af22
NIS
136 int retval = PL_savestack_ix;
137 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
138 int p;
139
140 SSCHECK(i + 5);
3280af22 141 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
142 SSPUSHINT(PL_regendp[p]);
143 SSPUSHINT(PL_regstartp[p]);
3280af22 144 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
145 SSPUSHINT(p);
146 }
3280af22
NIS
147 SSPUSHINT(PL_regsize);
148 SSPUSHINT(*PL_reglastparen);
149 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
150 SSPUSHINT(i + 3);
151 SSPUSHINT(SAVEt_REGCONTEXT);
152 return retval;
153}
154
c277df42 155/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
156# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
157 " Setting an EVAL scope, savestack=%i\n", \
3280af22 158 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 159
3280af22 160# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
161 PerlIO_printf(Perl_debug_log, \
162 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 163 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 164
76e3520e 165STATIC char *
8ac85365 166regcppop(void)
a0d0e21e 167{
11343788 168 dTHR;
a0d0e21e
LW
169 I32 i = SSPOPINT;
170 U32 paren = 0;
171 char *input;
cf93c79d 172 I32 tmps;
a0d0e21e
LW
173 assert(i == SAVEt_REGCONTEXT);
174 i = SSPOPINT;
175 input = (char *) SSPOPPTR;
3280af22
NIS
176 *PL_reglastparen = SSPOPINT;
177 PL_regsize = SSPOPINT;
c277df42 178 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 179 paren = (U32)SSPOPINT;
3280af22 180 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
181 PL_regstartp[paren] = SSPOPINT;
182 tmps = SSPOPINT;
3280af22
NIS
183 if (paren <= *PL_reglastparen)
184 PL_regendp[paren] = tmps;
c277df42 185 DEBUG_r(
c3464db5
DD
186 PerlIO_printf(Perl_debug_log,
187 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
188 paren, PL_regstartp[paren],
189 PL_reg_start_tmp[paren] - PL_bostr,
190 PL_regendp[paren],
3280af22 191 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 192 );
a0d0e21e 193 }
c277df42 194 DEBUG_r(
3280af22 195 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
196 PerlIO_printf(Perl_debug_log,
197 " restoring \\%d..\\%d to undef\n",
3280af22 198 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
199 }
200 );
3280af22
NIS
201 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
202 if (paren > PL_regsize)
cf93c79d
IZ
203 PL_regstartp[paren] = -1;
204 PL_regendp[paren] = -1;
a0d0e21e
LW
205 }
206 return input;
207}
208
0f5d15d6
IZ
209STATIC char *
210regcp_set_to(I32 ss)
211{
46124e9e 212 dTHR;
0f5d15d6
IZ
213 I32 tmp = PL_savestack_ix;
214
215 PL_savestack_ix = ss;
216 regcppop();
217 PL_savestack_ix = tmp;
942e002e 218 return Nullch;
0f5d15d6
IZ
219}
220
221typedef struct re_cc_state
222{
223 I32 ss;
224 regnode *node;
225 struct re_cc_state *prev;
226 CURCUR *cc;
227 regexp *re;
228} re_cc_state;
229
c277df42 230#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 231
a687059c 232/*
e50aee73 233 * pregexec and friends
a687059c
LW
234 */
235
236/*
c277df42 237 - pregexec - match a regexp against a string
a687059c 238 */
c277df42 239I32
c3464db5
DD
240pregexec(register regexp *prog, char *stringarg, register char *strend,
241 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
242/* strend: pointer to null at end of string */
243/* strbeg: real beginning of string */
244/* minend: end of match must be >=minend after stringarg. */
245/* nosave: For optimizations. */
246{
247 return
248 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
249 nosave ? 0 : REXEC_COPY_STR);
250}
0f5d15d6
IZ
251
252STATIC void
253cache_re(regexp *prog)
254{
46124e9e 255 dTHR;
0f5d15d6
IZ
256 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
257#ifdef DEBUGGING
258 PL_regprogram = prog->program;
259#endif
260 PL_regnpar = prog->nparens;
261 PL_regdata = prog->data;
262 PL_reg_re = prog;
263}
22e551b9 264
9661b544
IZ
265STATIC void
266restore_pos(void *arg)
35ef4773
GS
267{
268 dTHR;
cf93c79d
IZ
269 if (PL_reg_eval_set) {
270 if (PL_reg_oldsaved) {
271 PL_reg_re->subbeg = PL_reg_oldsaved;
272 PL_reg_re->sublen = PL_reg_oldsavedlen;
273 RX_MATCH_COPIED_on(PL_reg_re);
274 }
9661b544
IZ
275 PL_reg_magic->mg_len = PL_reg_oldpos;
276 PL_reg_eval_set = 0;
5c5e4c24 277 PL_curpm = PL_reg_oldcurpm;
9661b544
IZ
278 }
279}
280
281
a687059c 282/*
c277df42 283 - regexec_flags - match a regexp against a string
a687059c 284 */
79072805 285I32
c3464db5 286regexec_flags(register regexp *prog, char *stringarg, register char *strend,
22e551b9 287 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
288/* strend: pointer to null at end of string */
289/* strbeg: real beginning of string */
290/* minend: end of match must be >=minend after stringarg. */
291/* data: May be used for some additional optimizations. */
292/* nosave: For optimizations. */
a687059c 293{
5c0ca799 294 dTHR;
a0d0e21e 295 register char *s;
c277df42 296 register regnode *c;
a0d0e21e
LW
297 register char *startpos = stringarg;
298 register I32 tmp;
c277df42 299 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
300 I32 dontbother = 0; /* how many characters not to try at end */
301 CURCUR cc;
c277df42 302 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
303 constant substr. */ /* CC */
304 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
305 I32 scream_pos = -1; /* Internal iterator of scream. */
306 char *scream_olds;
3280af22 307 SV* oreplsv = GvSV(PL_replgv);
a687059c 308
a0d0e21e 309 cc.cur = 0;
4633a7c4 310 cc.oldcc = 0;
3280af22 311 PL_regcc = &cc;
a0d0e21e 312
0f5d15d6 313 cache_re(prog);
a0d0e21e 314#ifdef DEBUGGING
3280af22 315 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
316#endif
317
318 /* Be paranoid... */
319 if (prog == NULL || startpos == NULL) {
320 croak("NULL regexp parameter");
321 return 0;
322 }
323
c277df42
IZ
324 minlen = prog->minlen;
325 if (strend - startpos < minlen) goto phooey;
326
a0d0e21e 327 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 328 PL_regprev = '\n';
a0d0e21e 329 else {
a0ed51b3 330 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
331 if (!PL_multiline && PL_regprev == '\n')
332 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 333 }
bbce6d69 334
a0d0e21e 335 /* Check validity of program. */
22c35a8c 336 if (UCHARAT(prog->program) != REG_MAGIC) {
4327152a 337 croak("corrupted regexp program");
a0d0e21e
LW
338 }
339
3280af22
NIS
340 PL_reg_flags = 0;
341 PL_reg_eval_set = 0;
a0d0e21e 342
a0ed51b3
LW
343 if (prog->reganch & ROPT_UTF8)
344 PL_reg_flags |= RF_utf8;
345
346 /* Mark beginning of line for ^ and lookbehind. */
347 PL_regbol = startpos;
348 PL_bostr = strbeg;
9661b544 349 PL_reg_sv = sv;
a0ed51b3
LW
350
351 /* Mark end of line for $ (and such) */
352 PL_regeol = strend;
353
354 /* see how far we have to get to not match where we matched before */
355 PL_regtill = startpos+minend;
356
0f5d15d6
IZ
357 /* We start without call_cc context. */
358 PL_reg_call_cc = 0;
359
a0d0e21e
LW
360 /* If there is a "must appear" string, look for it. */
361 s = startpos;
c277df42
IZ
362 if (!(flags & REXEC_CHECKED)
363 && prog->check_substr != Nullsv &&
774d564b 364 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42 365 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
3280af22 366 || (PL_multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 367 {
a0ed51b3
LW
368 char *t;
369 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
c277df42 370 /* Should be nonnegative! */
cf93c79d
IZ
371 end_shift = minlen - start_shift -
372 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
22e551b9 373 if (flags & REXEC_SCREAM) {
cf93c79d
IZ
374 SV *c = prog->check_substr;
375
376 if (PL_screamfirst[BmRARE(c)] >= 0
377 || ( BmRARE(c) == '\n'
378 && (BmPREVIOUS(c) == SvCUR(c) - 1)
379 && SvTAIL(c) ))
22e551b9 380 s = screaminstr(sv, prog->check_substr,
c277df42
IZ
381 start_shift + (stringarg - strbeg),
382 end_shift, &scream_pos, 0);
a0d0e21e
LW
383 else
384 s = Nullch;
c277df42 385 scream_olds = s;
0a12ae7d 386 }
a0d0e21e 387 else
c277df42
IZ
388 s = fbm_instr((unsigned char*)s + start_shift,
389 (unsigned char*)strend - end_shift,
cf93c79d 390 prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
a0d0e21e 391 if (!s) {
c277df42 392 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 393 goto phooey; /* not present */
a0ed51b3
LW
394 }
395 else if (s - stringarg > prog->check_offset_max &&
396 (UTF
dfe13c55 397 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max))) && t >= stringarg)
a0ed51b3
LW
398 : (t = s - prog->check_offset_max) != 0
399 )
400 )
401 {
c277df42 402 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
a0ed51b3
LW
403 s = t;
404 }
405 else if (!(prog->reganch & ROPT_NAUGHTY)
c277df42
IZ
406 && --BmUSEFUL(prog->check_substr) < 0
407 && prog->check_substr == prog->float_substr) { /* boo */
408 SvREFCNT_dec(prog->check_substr);
409 prog->check_substr = Nullsv; /* disable */
410 prog->float_substr = Nullsv; /* clear */
a0d0e21e 411 s = startpos;
a0ed51b3
LW
412 }
413 else
414 s = startpos;
a0d0e21e 415 }
a687059c 416
35ef4773
GS
417 DEBUG_r(if (!PL_colorset) reginitcolors());
418 DEBUG_r(PerlIO_printf(Perl_debug_log,
8d300b32
GS
419 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
420 PL_colors[4],PL_colors[5],PL_colors[0],
421 prog->precomp,
422 PL_colors[1],
c277df42 423 (strlen(prog->precomp) > 60 ? "..." : ""),
8d300b32 424 PL_colors[0],
c277df42 425 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 426 startpos, PL_colors[1],
c277df42
IZ
427 (strend - startpos > 60 ? "..." : ""))
428 );
429
22e551b9
IZ
430 if (prog->reganch & ROPT_GPOS_SEEN) {
431 MAGIC *mg;
22e551b9 432
ad94a511
IZ
433 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
434 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
435 PL_reg_ganch = strbeg + mg->mg_len;
436 else
437 PL_reg_ganch = startpos;
22e551b9
IZ
438 }
439
a0d0e21e 440 /* Simplest case: anchored match need be tried only once. */
774d564b 441 /* [unless only anchor is BOL and multiline is set] */
22e551b9 442 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
a0d0e21e
LW
443 if (regtry(prog, startpos))
444 goto got_it;
22e551b9
IZ
445 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
446 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 447 {
a0d0e21e
LW
448 if (minlen)
449 dontbother = minlen - 1;
dfe13c55 450 strend = HOPc(strend, -dontbother);
a0d0e21e
LW
451 /* for multiline we only have to try after newlines */
452 if (s > startpos)
453 s--;
454 while (s < strend) {
6f06b55f 455 if (*s++ == '\n') { /* don't need PL_utf8skip here */
a0d0e21e
LW
456 if (s < strend && regtry(prog, s))
457 goto got_it;
458 }
35c8bce7 459 }
35c8bce7 460 }
a0d0e21e 461 goto phooey;
22e551b9
IZ
462 } else if (prog->reganch & ROPT_ANCH_GPOS) {
463 if (regtry(prog, PL_reg_ganch))
464 goto got_it;
465 goto phooey;
a0d0e21e 466 }
35c8bce7 467
a0d0e21e 468 /* Messy cases: unanchored match. */
c277df42
IZ
469 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
470 /* we have /x+whatever/ */
471 /* it must be a one character string */
472 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
473 if (UTF) {
474 while (s < strend) {
475 if (*s == ch) {
476 if (regtry(prog, s)) goto got_it;
477 s += UTF8SKIP(s);
478 while (s < strend && *s == ch)
479 s += UTF8SKIP(s);
480 }
481 s += UTF8SKIP(s);
482 }
483 }
484 else {
485 while (s < strend) {
486 if (*s == ch) {
487 if (regtry(prog, s)) goto got_it;
c277df42 488 s++;
a0ed51b3
LW
489 while (s < strend && *s == ch)
490 s++;
491 }
492 s++;
a0d0e21e 493 }
a687059c 494 }
c277df42
IZ
495 }
496 /*SUPPRESS 560*/
497 else if (prog->anchored_substr != Nullsv
498 || (prog->float_substr != Nullsv
499 && prog->float_max_offset < strend - s)) {
500 SV *must = prog->anchored_substr
501 ? prog->anchored_substr : prog->float_substr;
502 I32 back_max =
503 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
504 I32 back_min =
505 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
506 I32 delta = back_max - back_min;
cf93c79d
IZ
507 char *last = HOPc(strend, /* Cannot start after this */
508 -(CHR_SVLEN(must) - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
509 char *last1; /* Last position checked before */
510
511 if (s > PL_bostr)
dfe13c55 512 last1 = HOPc(s, -1);
a0ed51b3
LW
513 else
514 last1 = s - 1; /* bogus */
c277df42
IZ
515
516 /* XXXX check_substr already used to find `s', can optimize if
517 check_substr==must. */
518 scream_pos = -1;
519 dontbother = end_shift;
dfe13c55 520 strend = HOPc(strend, -dontbother);
c277df42 521 while ( (s <= last) &&
22e551b9
IZ
522 ((flags & REXEC_SCREAM)
523 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 524 end_shift, &scream_pos, 0))
a0ed51b3 525 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
526 (unsigned char*)strend, must,
527 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
528 if (HOPc(s, -back_max) > last1) {
529 last1 = HOPc(s, -back_min);
530 s = HOPc(s, -back_max);
a0ed51b3
LW
531 }
532 else {
dfe13c55 533 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 534
dfe13c55 535 last1 = HOPc(s, -back_min);
c277df42 536 s = t;
a0d0e21e 537 }
a0ed51b3
LW
538 if (UTF) {
539 while (s <= last1) {
540 if (regtry(prog, s))
541 goto got_it;
542 s += UTF8SKIP(s);
543 }
544 }
545 else {
546 while (s <= last1) {
547 if (regtry(prog, s))
548 goto got_it;
549 s++;
550 }
a0d0e21e
LW
551 }
552 }
553 goto phooey;
a0ed51b3
LW
554 }
555 else if (c = prog->regstclass) {
a0d0e21e 556 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 557 char *cc;
a687059c 558
a0d0e21e
LW
559 if (minlen)
560 dontbother = minlen - 1;
dfe13c55 561 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
562 tmp = 1;
563 /* We know what class it must start with. */
564 switch (OP(c)) {
a0ed51b3
LW
565 case ANYOFUTF8:
566 cc = (char *) OPERAND(c);
567 while (s < strend) {
568 if (REGINCLASSUTF8(c, (U8*)s)) {
569 if (tmp && regtry(prog, s))
570 goto got_it;
571 else
572 tmp = doevery;
573 }
574 else
575 tmp = 1;
576 s += UTF8SKIP(s);
577 }
578 break;
a0d0e21e 579 case ANYOF:
a0ed51b3 580 cc = (char *) OPERAND(c);
a0d0e21e 581 while (s < strend) {
a0ed51b3 582 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
583 if (tmp && regtry(prog, s))
584 goto got_it;
585 else
586 tmp = doevery;
a687059c 587 }
a0d0e21e
LW
588 else
589 tmp = 1;
590 s++;
591 }
592 break;
bbce6d69 593 case BOUNDL:
3280af22 594 PL_reg_flags |= RF_tainted;
bbce6d69 595 /* FALL THROUGH */
a0d0e21e 596 case BOUND:
a0ed51b3
LW
597 if (minlen) {
598 dontbother++;
599 strend -= 1;
600 }
3280af22 601 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 602 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 603 while (s < strend) {
95bac841 604 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
605 tmp = !tmp;
606 if (regtry(prog, s))
607 goto got_it;
a687059c 608 }
a0d0e21e
LW
609 s++;
610 }
611 if ((minlen || tmp) && regtry(prog,s))
612 goto got_it;
613 break;
a0ed51b3
LW
614 case BOUNDLUTF8:
615 PL_reg_flags |= RF_tainted;
616 /* FALL THROUGH */
617 case BOUNDUTF8:
618 if (minlen) {
619 dontbother++;
dfe13c55 620 strend = reghop_c(strend, -1);
a0ed51b3 621 }
dfe13c55 622 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
623 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
624 while (s < strend) {
dfe13c55
GS
625 if (tmp == !(OP(c) == BOUND ?
626 swash_fetch(PL_utf8_alnum, (U8*)s) :
627 isALNUM_LC_utf8((U8*)s)))
628 {
a0ed51b3
LW
629 tmp = !tmp;
630 if (regtry(prog, s))
631 goto got_it;
632 }
633 s += UTF8SKIP(s);
634 }
635 if ((minlen || tmp) && regtry(prog,s))
636 goto got_it;
637 break;
bbce6d69 638 case NBOUNDL:
3280af22 639 PL_reg_flags |= RF_tainted;
bbce6d69 640 /* FALL THROUGH */
a0d0e21e 641 case NBOUND:
a0ed51b3
LW
642 if (minlen) {
643 dontbother++;
644 strend -= 1;
645 }
3280af22 646 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 647 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 648 while (s < strend) {
95bac841 649 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
650 tmp = !tmp;
651 else if (regtry(prog, s))
652 goto got_it;
653 s++;
654 }
655 if ((minlen || !tmp) && regtry(prog,s))
656 goto got_it;
657 break;
a0ed51b3
LW
658 case NBOUNDLUTF8:
659 PL_reg_flags |= RF_tainted;
660 /* FALL THROUGH */
661 case NBOUNDUTF8:
662 if (minlen) {
663 dontbother++;
dfe13c55 664 strend = reghop_c(strend, -1);
a0ed51b3 665 }
dfe13c55 666 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
667 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
668 while (s < strend) {
dfe13c55
GS
669 if (tmp == !(OP(c) == NBOUND ?
670 swash_fetch(PL_utf8_alnum, (U8*)s) :
671 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
672 tmp = !tmp;
673 else if (regtry(prog, s))
674 goto got_it;
675 s += UTF8SKIP(s);
676 }
677 if ((minlen || !tmp) && regtry(prog,s))
678 goto got_it;
679 break;
a0d0e21e
LW
680 case ALNUM:
681 while (s < strend) {
bbce6d69 682 if (isALNUM(*s)) {
683 if (tmp && regtry(prog, s))
684 goto got_it;
685 else
686 tmp = doevery;
687 }
688 else
689 tmp = 1;
690 s++;
691 }
692 break;
a0ed51b3
LW
693 case ALNUMUTF8:
694 while (s < strend) {
dfe13c55 695 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
696 if (tmp && regtry(prog, s))
697 goto got_it;
698 else
699 tmp = doevery;
700 }
701 else
702 tmp = 1;
703 s += UTF8SKIP(s);
704 }
705 break;
bbce6d69 706 case ALNUML:
3280af22 707 PL_reg_flags |= RF_tainted;
bbce6d69 708 while (s < strend) {
709 if (isALNUM_LC(*s)) {
a0d0e21e
LW
710 if (tmp && regtry(prog, s))
711 goto got_it;
a687059c 712 else
a0d0e21e
LW
713 tmp = doevery;
714 }
715 else
716 tmp = 1;
717 s++;
718 }
719 break;
a0ed51b3
LW
720 case ALNUMLUTF8:
721 PL_reg_flags |= RF_tainted;
722 while (s < strend) {
dfe13c55 723 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
724 if (tmp && regtry(prog, s))
725 goto got_it;
726 else
727 tmp = doevery;
728 }
729 else
730 tmp = 1;
731 s += UTF8SKIP(s);
732 }
733 break;
a0d0e21e
LW
734 case NALNUM:
735 while (s < strend) {
bbce6d69 736 if (!isALNUM(*s)) {
737 if (tmp && regtry(prog, s))
738 goto got_it;
739 else
740 tmp = doevery;
741 }
742 else
743 tmp = 1;
744 s++;
745 }
746 break;
a0ed51b3
LW
747 case NALNUMUTF8:
748 while (s < strend) {
dfe13c55 749 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
750 if (tmp && regtry(prog, s))
751 goto got_it;
752 else
753 tmp = doevery;
754 }
755 else
756 tmp = 1;
757 s += UTF8SKIP(s);
758 }
759 break;
bbce6d69 760 case NALNUML:
3280af22 761 PL_reg_flags |= RF_tainted;
bbce6d69 762 while (s < strend) {
763 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
764 if (tmp && regtry(prog, s))
765 goto got_it;
a687059c 766 else
a0d0e21e 767 tmp = doevery;
a687059c 768 }
a0d0e21e
LW
769 else
770 tmp = 1;
771 s++;
772 }
773 break;
a0ed51b3
LW
774 case NALNUMLUTF8:
775 PL_reg_flags |= RF_tainted;
776 while (s < strend) {
dfe13c55 777 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
778 if (tmp && regtry(prog, s))
779 goto got_it;
780 else
781 tmp = doevery;
782 }
783 else
784 tmp = 1;
785 s += UTF8SKIP(s);
786 }
787 break;
a0d0e21e
LW
788 case SPACE:
789 while (s < strend) {
790 if (isSPACE(*s)) {
791 if (tmp && regtry(prog, s))
792 goto got_it;
793 else
794 tmp = doevery;
2304df62 795 }
a0d0e21e
LW
796 else
797 tmp = 1;
798 s++;
799 }
800 break;
a0ed51b3
LW
801 case SPACEUTF8:
802 while (s < strend) {
dfe13c55 803 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
804 if (tmp && regtry(prog, s))
805 goto got_it;
806 else
807 tmp = doevery;
808 }
809 else
810 tmp = 1;
811 s += UTF8SKIP(s);
812 }
813 break;
bbce6d69 814 case SPACEL:
3280af22 815 PL_reg_flags |= RF_tainted;
bbce6d69 816 while (s < strend) {
817 if (isSPACE_LC(*s)) {
818 if (tmp && regtry(prog, s))
819 goto got_it;
820 else
821 tmp = doevery;
822 }
823 else
824 tmp = 1;
825 s++;
826 }
827 break;
a0ed51b3
LW
828 case SPACELUTF8:
829 PL_reg_flags |= RF_tainted;
830 while (s < strend) {
dfe13c55 831 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
832 if (tmp && regtry(prog, s))
833 goto got_it;
834 else
835 tmp = doevery;
836 }
837 else
838 tmp = 1;
839 s += UTF8SKIP(s);
840 }
841 break;
a0d0e21e
LW
842 case NSPACE:
843 while (s < strend) {
844 if (!isSPACE(*s)) {
845 if (tmp && regtry(prog, s))
846 goto got_it;
847 else
848 tmp = doevery;
a687059c 849 }
a0d0e21e
LW
850 else
851 tmp = 1;
852 s++;
853 }
854 break;
a0ed51b3
LW
855 case NSPACEUTF8:
856 while (s < strend) {
dfe13c55 857 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
858 if (tmp && regtry(prog, s))
859 goto got_it;
860 else
861 tmp = doevery;
862 }
863 else
864 tmp = 1;
865 s += UTF8SKIP(s);
866 }
867 break;
bbce6d69 868 case NSPACEL:
3280af22 869 PL_reg_flags |= RF_tainted;
bbce6d69 870 while (s < strend) {
871 if (!isSPACE_LC(*s)) {
872 if (tmp && regtry(prog, s))
873 goto got_it;
874 else
875 tmp = doevery;
876 }
877 else
878 tmp = 1;
879 s++;
880 }
881 break;
a0ed51b3
LW
882 case NSPACELUTF8:
883 PL_reg_flags |= RF_tainted;
884 while (s < strend) {
dfe13c55 885 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
886 if (tmp && regtry(prog, s))
887 goto got_it;
888 else
889 tmp = doevery;
890 }
891 else
892 tmp = 1;
893 s += UTF8SKIP(s);
894 }
895 break;
a0d0e21e
LW
896 case DIGIT:
897 while (s < strend) {
898 if (isDIGIT(*s)) {
899 if (tmp && regtry(prog, s))
900 goto got_it;
901 else
902 tmp = doevery;
2b69d0c2 903 }
a0d0e21e
LW
904 else
905 tmp = 1;
906 s++;
907 }
908 break;
a0ed51b3
LW
909 case DIGITUTF8:
910 while (s < strend) {
dfe13c55 911 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
912 if (tmp && regtry(prog, s))
913 goto got_it;
914 else
915 tmp = doevery;
916 }
917 else
918 tmp = 1;
919 s += UTF8SKIP(s);
920 }
921 break;
a0d0e21e
LW
922 case NDIGIT:
923 while (s < strend) {
924 if (!isDIGIT(*s)) {
925 if (tmp && regtry(prog, s))
926 goto got_it;
927 else
928 tmp = doevery;
a687059c 929 }
a0d0e21e
LW
930 else
931 tmp = 1;
932 s++;
933 }
934 break;
a0ed51b3
LW
935 case NDIGITUTF8:
936 while (s < strend) {
dfe13c55 937 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
938 if (tmp && regtry(prog, s))
939 goto got_it;
940 else
941 tmp = doevery;
942 }
943 else
944 tmp = 1;
945 s += UTF8SKIP(s);
946 }
947 break;
a687059c 948 }
a0d0e21e
LW
949 }
950 else {
c277df42
IZ
951 dontbother = 0;
952 if (prog->float_substr != Nullsv) { /* Trim the end. */
953 char *last;
954 I32 oldpos = scream_pos;
955
22e551b9
IZ
956 if (flags & REXEC_SCREAM) {
957 last = screaminstr(sv, prog->float_substr, s - strbeg,
c277df42 958 end_shift, &scream_pos, 1); /* last one */
cf93c79d 959 if (!last)
c277df42 960 last = scream_olds; /* Only one occurence. */
a0ed51b3
LW
961 }
962 else {
c277df42
IZ
963 STRLEN len;
964 char *little = SvPV(prog->float_substr, len);
cf93c79d
IZ
965
966 if (SvTAIL(prog->float_substr)) {
967 if (memEQ(strend - len + 1, little, len - 1))
968 last = strend - len + 1;
969 else if (!PL_multiline)
970 last = memEQ(strend - len, little, len)
971 ? strend - len : Nullch;
972 else
973 goto find_last;
974 } else {
975 find_last:
976 if (len)
977 last = rninstr(s, strend, little, little + len);
978 else
979 last = strend; /* matching `$' */
980 }
c277df42
IZ
981 }
982 if (last == NULL) goto phooey; /* Should not happen! */
19b4f81a 983 dontbother = strend - last + prog->float_min_offset;
c277df42
IZ
984 }
985 if (minlen && (dontbother < minlen))
a0d0e21e 986 dontbother = minlen - 1;
a0ed51b3 987 strend -= dontbother; /* this one's always in bytes! */
a0d0e21e 988 /* We don't know much -- general case. */
a0ed51b3
LW
989 if (UTF) {
990 for (;;) {
84df6dba 991 if (regtry(prog, s))
a0ed51b3 992 goto got_it;
a0ed51b3
LW
993 if (s >= strend)
994 break;
995 s += UTF8SKIP(s);
996 };
997 }
998 else {
999 do {
1000 if (regtry(prog, s))
1001 goto got_it;
1002 } while (s++ < strend);
1003 }
a0d0e21e
LW
1004 }
1005
1006 /* Failure. */
1007 goto phooey;
a687059c 1008
a0d0e21e 1009got_it:
3280af22 1010 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
5f05dabc 1011
5c5e4c24
IZ
1012 if (PL_reg_eval_set) {
1013 /* Preserve the current value of $^R */
1014 if (oreplsv != GvSV(PL_replgv))
1015 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1016 restored, the value remains
1017 the same. */
9661b544 1018 restore_pos(0);
5c5e4c24 1019 }
cf93c79d
IZ
1020
1021 /* make sure $`, $&, $', and $digit will work later */
1022 if ( !(flags & REXEC_NOT_FIRST) ) {
1023 if (RX_MATCH_COPIED(prog)) {
1024 Safefree(prog->subbeg);
1025 RX_MATCH_COPIED_off(prog);
1026 }
1027 if (flags & REXEC_COPY_STR) {
1028 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1029
1030 s = savepvn(strbeg, i);
1031 prog->subbeg = s;
1032 prog->sublen = i;
1033 RX_MATCH_COPIED_on(prog);
1034 }
1035 else {
1036 prog->subbeg = strbeg;
1037 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1038 }
1039 }
5c5e4c24 1040
a0d0e21e
LW
1041 return 1;
1042
1043phooey:
9661b544
IZ
1044 if (PL_reg_eval_set)
1045 restore_pos(0);
a0d0e21e 1046 return 0;
a687059c
LW
1047}
1048
1049/*
1050 - regtry - try match at specific point
1051 */
76e3520e 1052STATIC I32 /* 0 failure, 1 success */
8ac85365 1053regtry(regexp *prog, char *startpos)
a687059c 1054{
c277df42 1055 dTHR;
a0d0e21e 1056 register I32 i;
cf93c79d
IZ
1057 register I32 *sp;
1058 register I32 *ep;
c277df42 1059 CHECKPOINT lastcp;
a0d0e21e 1060
3280af22 1061 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
9661b544
IZ
1062 MAGIC *mg;
1063
3280af22 1064 PL_reg_eval_set = RS_init;
ce862d02 1065 DEBUG_r(DEBUG_s(
c3464db5 1066 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
3280af22 1067 PL_stack_sp - PL_stack_base);
ce862d02
IZ
1068 ));
1069 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
3280af22 1070 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
ce862d02
IZ
1071 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1072 SAVETMPS;
1073 /* Apparently this is not needed, judging by wantarray. */
1074 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1075 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
9661b544
IZ
1076
1077 if (PL_reg_sv) {
1078 /* Make $_ available to executed code. */
127ad2b7
GS
1079 if (PL_reg_sv != DEFSV) {
1080 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1081 SAVESPTR(DEFSV);
1082 DEFSV = PL_reg_sv;
9661b544
IZ
1083 }
1084
1085 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1086 && (mg = mg_find(PL_reg_sv, 'g')))) {
1087 /* prepare for quick setting of pos */
1088 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1089 mg = mg_find(PL_reg_sv, 'g');
1090 mg->mg_len = -1;
1091 }
1092 PL_reg_magic = mg;
1093 PL_reg_oldpos = mg->mg_len;
1094 SAVEDESTRUCTOR(restore_pos, 0);
1095 }
5c5e4c24
IZ
1096 if (!PL_reg_curpm)
1097 New(22,PL_reg_curpm, 1, PMOP);
1098 PL_reg_curpm->op_pmregexp = prog;
1099 PL_reg_oldcurpm = PL_curpm;
1100 PL_curpm = PL_reg_curpm;
cf93c79d
IZ
1101 if (RX_MATCH_COPIED(prog)) {
1102 /* Here is a serious problem: we cannot rewrite subbeg,
1103 since it may be needed if this match fails. Thus
1104 $` inside (?{}) could fail... */
1105 PL_reg_oldsaved = prog->subbeg;
1106 PL_reg_oldsavedlen = prog->sublen;
1107 RX_MATCH_COPIED_off(prog);
1108 }
1109 else
1110 PL_reg_oldsaved = Nullch;
5c5e4c24 1111 prog->subbeg = PL_bostr;
cf93c79d 1112 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
ce862d02 1113 }
cf93c79d 1114 prog->startp[0] = startpos - PL_bostr;
3280af22
NIS
1115 PL_reginput = startpos;
1116 PL_regstartp = prog->startp;
1117 PL_regendp = prog->endp;
1118 PL_reglastparen = &prog->lastparen;
a0d0e21e 1119 prog->lastparen = 0;
3280af22 1120 PL_regsize = 0;
364723c2 1121 DEBUG_r(PL_reg_starttry = startpos);
3280af22
NIS
1122 if (PL_reg_start_tmpl <= prog->nparens) {
1123 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1124 if(PL_reg_start_tmp)
1125 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1126 else
3280af22 1127 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1128 }
a0d0e21e 1129
5c5e4c24
IZ
1130 /* XXXX What this code is doing here?!!! There should be no need
1131 to do this again and again, PL_reglastparen should take care of
1132 this! */
a0d0e21e
LW
1133 sp = prog->startp;
1134 ep = prog->endp;
1135 if (prog->nparens) {
5c5e4c24 1136 for (i = prog->nparens; i >= 1; i--) {
cf93c79d
IZ
1137 *++sp = -1;
1138 *++ep = -1;
a687059c 1139 }
a0d0e21e 1140 }
c277df42 1141 REGCP_SET;
7e5428c5 1142 if (regmatch(prog->program + 1)) {
cf93c79d 1143 prog->endp[0] = PL_reginput - PL_bostr;
a0d0e21e
LW
1144 return 1;
1145 }
c277df42
IZ
1146 REGCP_UNWIND;
1147 return 0;
a687059c
LW
1148}
1149
1150/*
1151 - regmatch - main matching routine
1152 *
1153 * Conceptually the strategy is simple: check to see whether the current
1154 * node matches, call self recursively to see whether the rest matches,
1155 * and then act accordingly. In practice we make some effort to avoid
1156 * recursion, in particular by going through "ordinary" nodes (that don't
1157 * need to know whether the rest of the match failed) by a loop instead of
1158 * by recursion.
1159 */
1160/* [lwall] I've hoisted the register declarations to the outer block in order to
1161 * maybe save a little bit of pushing and popping on the stack. It also takes
1162 * advantage of machines that use a register save mask on subroutine entry.
1163 */
76e3520e 1164STATIC I32 /* 0 failure, 1 success */
c277df42 1165regmatch(regnode *prog)
a687059c 1166{
c277df42
IZ
1167 dTHR;
1168 register regnode *scan; /* Current node. */
1169 regnode *next; /* Next node. */
1170 regnode *inner; /* Next node in internal branch. */
c3464db5
DD
1171 register I32 nextchr; /* renamed nextchr - nextchar colides with
1172 function of same name */
a0d0e21e
LW
1173 register I32 n; /* no or next */
1174 register I32 ln; /* len or last */
1175 register char *s; /* operand or save */
3280af22 1176 register char *locinput = PL_reginput;
c277df42
IZ
1177 register I32 c1, c2, paren; /* case fold search, parenth */
1178 int minmod = 0, sw = 0, logical = 0;
4633a7c4 1179#ifdef DEBUGGING
3280af22 1180 PL_regindent++;
4633a7c4 1181#endif
a0d0e21e 1182
a0ed51b3 1183 /* Note that nextchr is a byte even in UTF */
76e3520e 1184 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1185 scan = prog;
1186 while (scan != NULL) {
c277df42 1187#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 1188#ifdef DEBUGGING
c277df42
IZ
1189# define sayYES goto yes
1190# define sayNO goto no
1191# define saySAME(x) if (x) goto yes; else goto no
1192# define REPORT_CODE_OFF 24
4633a7c4 1193#else
c277df42
IZ
1194# define sayYES return 1
1195# define sayNO return 0
1196# define saySAME(x) return x
a687059c 1197#endif
c277df42
IZ
1198 DEBUG_r( {
1199 SV *prop = sv_newmortal();
3280af22 1200 int docolor = *PL_colors[0];
c277df42 1201 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22 1202 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
beed8111
IZ
1203 /* The part of the string before starttry has one color
1204 (pref0_len chars), between starttry and current
1205 position another one (pref_len - pref0_len chars),
1206 after the current position the third one.
1207 We assume that pref0_len <= pref_len, otherwise we
1208 decrease pref0_len. */
3280af22
NIS
1209 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1210 ? (5 + taill) - l : locinput - PL_bostr);
364723c2 1211 int pref0_len = pref_len - (locinput - PL_reg_starttry);
c277df42 1212
3280af22
NIS
1213 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1214 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1215 ? (5 + taill) - pref_len : PL_regeol - locinput);
8d300b32
GS
1216 if (pref0_len < 0)
1217 pref0_len = 0;
beed8111
IZ
1218 if (pref0_len > pref_len)
1219 pref0_len = pref_len;
c277df42
IZ
1220 regprop(prop, scan);
1221 PerlIO_printf(Perl_debug_log,
8d300b32 1222 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22 1223 locinput - PL_bostr,
8d300b32
GS
1224 PL_colors[4], pref0_len,
1225 locinput - pref_len, PL_colors[5],
1226 PL_colors[2], pref_len - pref0_len,
1227 locinput - pref_len + pref0_len, PL_colors[3],
c277df42 1228 (docolor ? "" : "> <"),
3280af22 1229 PL_colors[0], l, locinput, PL_colors[1],
c277df42
IZ
1230 15 - l - pref_len + 1,
1231 "",
3280af22 1232 scan - PL_regprogram, PL_regindent*2, "",
c277df42
IZ
1233 SvPVX(prop));
1234 } );
a687059c 1235
c277df42 1236 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
1237 if (next == scan)
1238 next = NULL;
a687059c 1239
a0d0e21e
LW
1240 switch (OP(scan)) {
1241 case BOL:
3280af22
NIS
1242 if (locinput == PL_bostr
1243 ? PL_regprev == '\n'
1244 : (PL_multiline &&
1245 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1246 {
a0ed51b3 1247 /* regtill = regbol; */
a0d0e21e
LW
1248 break;
1249 }
4633a7c4 1250 sayNO;
a0d0e21e 1251 case MBOL:
3280af22
NIS
1252 if (locinput == PL_bostr
1253 ? PL_regprev == '\n'
1254 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
1255 {
1256 break;
1257 }
4633a7c4 1258 sayNO;
a0d0e21e 1259 case SBOL:
3280af22 1260 if (locinput == PL_regbol && PL_regprev == '\n')
a0d0e21e 1261 break;
4633a7c4 1262 sayNO;
774d564b 1263 case GPOS:
22e551b9 1264 if (locinput == PL_reg_ganch)
a0d0e21e 1265 break;
4633a7c4 1266 sayNO;
a0d0e21e 1267 case EOL:
3280af22 1268 if (PL_multiline)
a0d0e21e
LW
1269 goto meol;
1270 else
1271 goto seol;
1272 case MEOL:
1273 meol:
3280af22 1274 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1275 sayNO;
a0d0e21e
LW
1276 break;
1277 case SEOL:
1278 seol:
3280af22 1279 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1280 sayNO;
3280af22 1281 if (PL_regeol - locinput > 1)
4633a7c4 1282 sayNO;
a0d0e21e 1283 break;
b85d18e9 1284 case EOS:
3280af22 1285 if (PL_regeol != locinput)
b85d18e9
IZ
1286 sayNO;
1287 break;
a0ed51b3
LW
1288 case SANYUTF8:
1289 if (nextchr & 0x80) {
6f06b55f 1290 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1291 if (locinput > PL_regeol)
1292 sayNO;
1293 nextchr = UCHARAT(locinput);
1294 break;
1295 }
1296 if (!nextchr && locinput >= PL_regeol)
1297 sayNO;
1298 nextchr = UCHARAT(++locinput);
1299 break;
a0d0e21e 1300 case SANY:
3280af22 1301 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1302 sayNO;
76e3520e 1303 nextchr = UCHARAT(++locinput);
a0d0e21e 1304 break;
a0ed51b3
LW
1305 case ANYUTF8:
1306 if (nextchr & 0x80) {
6f06b55f 1307 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1308 if (locinput > PL_regeol)
1309 sayNO;
1310 nextchr = UCHARAT(locinput);
1311 break;
1312 }
1313 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1314 sayNO;
1315 nextchr = UCHARAT(++locinput);
1316 break;
22c35a8c 1317 case REG_ANY:
3280af22 1318 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1319 sayNO;
76e3520e 1320 nextchr = UCHARAT(++locinput);
a0d0e21e 1321 break;
bbce6d69 1322 case EXACT:
161b471a 1323 s = (char *) OPERAND(scan);
c277df42 1324 ln = UCHARAT(s++);
a0d0e21e 1325 /* Inline the first character, for speed. */
76e3520e 1326 if (UCHARAT(s) != nextchr)
4633a7c4 1327 sayNO;
3280af22 1328 if (PL_regeol - locinput < ln)
4633a7c4 1329 sayNO;
36477c24 1330 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 1331 sayNO;
a0d0e21e 1332 locinput += ln;
76e3520e 1333 nextchr = UCHARAT(locinput);
bbce6d69 1334 break;
1335 case EXACTFL:
3280af22 1336 PL_reg_flags |= RF_tainted;
bbce6d69 1337 /* FALL THROUGH */
1338 case EXACTF:
161b471a 1339 s = (char *) OPERAND(scan);
c277df42 1340 ln = UCHARAT(s++);
a0ed51b3
LW
1341
1342 if (UTF) {
1343 char *l = locinput;
1344 char *e = s + ln;
1345 c1 = OP(scan) == EXACTF;
1346 while (s < e) {
1347 if (l >= PL_regeol)
1348 sayNO;
dfe13c55
GS
1349 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1350 toLOWER_utf8((U8*)l) :
1351 toLOWER_LC_utf8((U8*)l)))
1352 {
a0ed51b3 1353 sayNO;
dfe13c55 1354 }
a0ed51b3
LW
1355 s += UTF8SKIP(s);
1356 l += UTF8SKIP(l);
1357 }
1358 locinput = l;
1359 nextchr = UCHARAT(locinput);
1360 break;
1361 }
1362
bbce6d69 1363 /* Inline the first character, for speed. */
76e3520e 1364 if (UCHARAT(s) != nextchr &&
bbce6d69 1365 UCHARAT(s) != ((OP(scan) == EXACTF)
22c35a8c 1366 ? PL_fold : PL_fold_locale)[nextchr])
bbce6d69 1367 sayNO;
3280af22 1368 if (PL_regeol - locinput < ln)
bbce6d69 1369 sayNO;
5f05dabc 1370 if (ln > 1 && (OP(scan) == EXACTF
1371 ? ibcmp(s, locinput, ln)
1372 : ibcmp_locale(s, locinput, ln)))
bbce6d69 1373 sayNO;
1374 locinput += ln;
76e3520e 1375 nextchr = UCHARAT(locinput);
a0d0e21e 1376 break;
a0ed51b3
LW
1377 case ANYOFUTF8:
1378 s = (char *) OPERAND(scan);
1379 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1380 sayNO;
1381 if (locinput >= PL_regeol)
1382 sayNO;
6f06b55f 1383 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1384 nextchr = UCHARAT(locinput);
1385 break;
a0d0e21e 1386 case ANYOF:
161b471a 1387 s = (char *) OPERAND(scan);
76e3520e
GS
1388 if (nextchr < 0)
1389 nextchr = UCHARAT(locinput);
873ef191 1390 if (!REGINCLASS(s, nextchr))
4633a7c4 1391 sayNO;
3280af22 1392 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1393 sayNO;
76e3520e 1394 nextchr = UCHARAT(++locinput);
a0d0e21e 1395 break;
bbce6d69 1396 case ALNUML:
3280af22 1397 PL_reg_flags |= RF_tainted;
bbce6d69 1398 /* FALL THROUGH */
a0d0e21e 1399 case ALNUM:
76e3520e 1400 if (!nextchr)
4633a7c4 1401 sayNO;
bbce6d69 1402 if (!(OP(scan) == ALNUM
76e3520e 1403 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 1404 sayNO;
76e3520e 1405 nextchr = UCHARAT(++locinput);
a0d0e21e 1406 break;
a0ed51b3
LW
1407 case ALNUMLUTF8:
1408 PL_reg_flags |= RF_tainted;
1409 /* FALL THROUGH */
1410 case ALNUMUTF8:
1411 if (!nextchr)
1412 sayNO;
1413 if (nextchr & 0x80) {
1414 if (!(OP(scan) == ALNUMUTF8
dfe13c55
GS
1415 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1416 : isALNUM_LC_utf8((U8*)locinput)))
1417 {
a0ed51b3 1418 sayNO;
dfe13c55 1419 }
6f06b55f 1420 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1421 nextchr = UCHARAT(locinput);
1422 break;
1423 }
1424 if (!(OP(scan) == ALNUMUTF8
1425 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1426 sayNO;
1427 nextchr = UCHARAT(++locinput);
1428 break;
bbce6d69 1429 case NALNUML:
3280af22 1430 PL_reg_flags |= RF_tainted;
bbce6d69 1431 /* FALL THROUGH */
a0d0e21e 1432 case NALNUM:
3280af22 1433 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1434 sayNO;
bbce6d69 1435 if (OP(scan) == NALNUM
76e3520e 1436 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 1437 sayNO;
76e3520e 1438 nextchr = UCHARAT(++locinput);
a0d0e21e 1439 break;
a0ed51b3
LW
1440 case NALNUMLUTF8:
1441 PL_reg_flags |= RF_tainted;
1442 /* FALL THROUGH */
1443 case NALNUMUTF8:
1444 if (!nextchr && locinput >= PL_regeol)
1445 sayNO;
1446 if (nextchr & 0x80) {
1447 if (OP(scan) == NALNUMUTF8
dfe13c55
GS
1448 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1449 : isALNUM_LC_utf8((U8*)locinput))
1450 {
a0ed51b3 1451 sayNO;
dfe13c55 1452 }
6f06b55f 1453 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1454 nextchr = UCHARAT(locinput);
1455 break;
1456 }
1457 if (OP(scan) == NALNUMUTF8
1458 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1459 sayNO;
1460 nextchr = UCHARAT(++locinput);
1461 break;
bbce6d69 1462 case BOUNDL:
1463 case NBOUNDL:
3280af22 1464 PL_reg_flags |= RF_tainted;
bbce6d69 1465 /* FALL THROUGH */
a0d0e21e 1466 case BOUND:
bbce6d69 1467 case NBOUND:
1468 /* was last char in word? */
3280af22 1469 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
bbce6d69 1470 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1471 ln = isALNUM(ln);
76e3520e 1472 n = isALNUM(nextchr);
bbce6d69 1473 }
1474 else {
1475 ln = isALNUM_LC(ln);
76e3520e 1476 n = isALNUM_LC(nextchr);
bbce6d69 1477 }
95bac841 1478 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1479 sayNO;
a0d0e21e 1480 break;
a0ed51b3
LW
1481 case BOUNDLUTF8:
1482 case NBOUNDLUTF8:
1483 PL_reg_flags |= RF_tainted;
1484 /* FALL THROUGH */
1485 case BOUNDUTF8:
1486 case NBOUNDUTF8:
1487 /* was last char in word? */
dfe13c55
GS
1488 ln = (locinput != PL_regbol)
1489 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
a0ed51b3
LW
1490 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1491 ln = isALNUM_uni(ln);
dfe13c55 1492 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3
LW
1493 }
1494 else {
1495 ln = isALNUM_LC_uni(ln);
dfe13c55 1496 n = isALNUM_LC_utf8((U8*)locinput);
a0ed51b3
LW
1497 }
1498 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1499 sayNO;
1500 break;
bbce6d69 1501 case SPACEL:
3280af22 1502 PL_reg_flags |= RF_tainted;
bbce6d69 1503 /* FALL THROUGH */
a0d0e21e 1504 case SPACE:
3280af22 1505 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1506 sayNO;
bbce6d69 1507 if (!(OP(scan) == SPACE
76e3520e 1508 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1509 sayNO;
76e3520e 1510 nextchr = UCHARAT(++locinput);
a0d0e21e 1511 break;
a0ed51b3
LW
1512 case SPACELUTF8:
1513 PL_reg_flags |= RF_tainted;
1514 /* FALL THROUGH */
1515 case SPACEUTF8:
1516 if (!nextchr && locinput >= PL_regeol)
1517 sayNO;
1518 if (nextchr & 0x80) {
1519 if (!(OP(scan) == SPACEUTF8
dfe13c55
GS
1520 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1521 : isSPACE_LC_utf8((U8*)locinput)))
1522 {
a0ed51b3 1523 sayNO;
dfe13c55 1524 }
6f06b55f 1525 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1526 nextchr = UCHARAT(locinput);
1527 break;
1528 }
1529 if (!(OP(scan) == SPACEUTF8
1530 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1531 sayNO;
1532 nextchr = UCHARAT(++locinput);
1533 break;
bbce6d69 1534 case NSPACEL:
3280af22 1535 PL_reg_flags |= RF_tainted;
bbce6d69 1536 /* FALL THROUGH */
a0d0e21e 1537 case NSPACE:
76e3520e 1538 if (!nextchr)
4633a7c4 1539 sayNO;
bbce6d69 1540 if (OP(scan) == SPACE
76e3520e 1541 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1542 sayNO;
76e3520e 1543 nextchr = UCHARAT(++locinput);
a0d0e21e 1544 break;
a0ed51b3
LW
1545 case NSPACELUTF8:
1546 PL_reg_flags |= RF_tainted;
1547 /* FALL THROUGH */
1548 case NSPACEUTF8:
1549 if (!nextchr)
1550 sayNO;
1551 if (nextchr & 0x80) {
1552 if (OP(scan) == NSPACEUTF8
dfe13c55
GS
1553 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1554 : isSPACE_LC_utf8((U8*)locinput))
1555 {
a0ed51b3 1556 sayNO;
dfe13c55 1557 }
6f06b55f 1558 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1559 nextchr = UCHARAT(locinput);
1560 break;
1561 }
1562 if (OP(scan) == NSPACEUTF8
1563 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1564 sayNO;
1565 nextchr = UCHARAT(++locinput);
1566 break;
a0d0e21e 1567 case DIGIT:
76e3520e 1568 if (!isDIGIT(nextchr))
4633a7c4 1569 sayNO;
76e3520e 1570 nextchr = UCHARAT(++locinput);
a0d0e21e 1571 break;
a0ed51b3
LW
1572 case DIGITUTF8:
1573 if (nextchr & 0x80) {
dfe13c55 1574 if (!(swash_fetch(PL_utf8_digit,(U8*)locinput)))
a0ed51b3 1575 sayNO;
6f06b55f 1576 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1577 nextchr = UCHARAT(locinput);
1578 break;
1579 }
1580 if (!isDIGIT(nextchr))
1581 sayNO;
1582 nextchr = UCHARAT(++locinput);
1583 break;
a0d0e21e 1584 case NDIGIT:
3280af22 1585 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1586 sayNO;
76e3520e 1587 if (isDIGIT(nextchr))
4633a7c4 1588 sayNO;
76e3520e 1589 nextchr = UCHARAT(++locinput);
a0d0e21e 1590 break;
a0ed51b3
LW
1591 case NDIGITUTF8:
1592 if (!nextchr && locinput >= PL_regeol)
1593 sayNO;
1594 if (nextchr & 0x80) {
dfe13c55 1595 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 1596 sayNO;
6f06b55f 1597 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1598 nextchr = UCHARAT(locinput);
1599 break;
1600 }
1601 if (isDIGIT(nextchr))
1602 sayNO;
1603 nextchr = UCHARAT(++locinput);
1604 break;
1605 case CLUMP:
dfe13c55 1606 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 1607 sayNO;
6f06b55f 1608 locinput += PL_utf8skip[nextchr];
dfe13c55 1609 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
1610 locinput += UTF8SKIP(locinput);
1611 if (locinput > PL_regeol)
1612 sayNO;
1613 nextchr = UCHARAT(locinput);
1614 break;
c8756f30 1615 case REFFL:
3280af22 1616 PL_reg_flags |= RF_tainted;
c8756f30 1617 /* FALL THROUGH */
c277df42 1618 case REF:
c8756f30 1619 case REFF:
c277df42 1620 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
1621 ln = PL_regstartp[n];
1622 if (*PL_reglastparen < n || ln == -1)
af3f8c16 1623 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 1624 if (ln == PL_regendp[n])
a0d0e21e 1625 break;
a0ed51b3 1626
cf93c79d 1627 s = PL_bostr + ln;
a0ed51b3
LW
1628 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
1629 char *l = locinput;
cf93c79d 1630 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
1631 /*
1632 * Note that we can't do the "other character" lookup trick as
1633 * in the 8-bit case (no pun intended) because in Unicode we
1634 * have to map both upper and title case to lower case.
1635 */
1636 if (OP(scan) == REFF) {
1637 while (s < e) {
1638 if (l >= PL_regeol)
1639 sayNO;
dfe13c55 1640 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
1641 sayNO;
1642 s += UTF8SKIP(s);
1643 l += UTF8SKIP(l);
1644 }
1645 }
1646 else {
1647 while (s < e) {
1648 if (l >= PL_regeol)
1649 sayNO;
dfe13c55 1650 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
1651 sayNO;
1652 s += UTF8SKIP(s);
1653 l += UTF8SKIP(l);
1654 }
1655 }
1656 locinput = l;
1657 nextchr = UCHARAT(locinput);
1658 break;
1659 }
1660
a0d0e21e 1661 /* Inline the first character, for speed. */
76e3520e 1662 if (UCHARAT(s) != nextchr &&
c8756f30
AK
1663 (OP(scan) == REF ||
1664 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 1665 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 1666 sayNO;
cf93c79d 1667 ln = PL_regendp[n] - ln;
3280af22 1668 if (locinput + ln > PL_regeol)
4633a7c4 1669 sayNO;
c8756f30
AK
1670 if (ln > 1 && (OP(scan) == REF
1671 ? memNE(s, locinput, ln)
1672 : (OP(scan) == REFF
1673 ? ibcmp(s, locinput, ln)
1674 : ibcmp_locale(s, locinput, ln))))
4633a7c4 1675 sayNO;
a0d0e21e 1676 locinput += ln;
76e3520e 1677 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1678 break;
1679
1680 case NOTHING:
c277df42 1681 case TAIL:
a0d0e21e
LW
1682 break;
1683 case BACK:
1684 break;
c277df42
IZ
1685 case EVAL:
1686 {
1687 dSP;
533c011a 1688 OP_4tree *oop = PL_op;
3280af22
NIS
1689 COP *ocurcop = PL_curcop;
1690 SV **ocurpad = PL_curpad;
c277df42
IZ
1691 SV *ret;
1692
1693 n = ARG(scan);
533c011a
NIS
1694 PL_op = (OP_4tree*)PL_regdata->data[n];
1695 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 1696 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 1697 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 1698
76e3520e 1699 CALLRUNOPS(); /* Scalar context. */
c277df42
IZ
1700 SPAGAIN;
1701 ret = POPs;
1702 PUTBACK;
1703
0f5d15d6
IZ
1704 PL_op = oop;
1705 PL_curpad = ocurpad;
1706 PL_curcop = ocurcop;
c277df42 1707 if (logical) {
0f5d15d6
IZ
1708 if (logical == 2) { /* Postponed subexpression. */
1709 regexp *re;
22c35a8c 1710 MAGIC *mg = Null(MAGIC*);
0f5d15d6
IZ
1711 re_cc_state state;
1712 CURCUR cctmp;
1713 CHECKPOINT cp, lastcp;
1714
1715 if(SvROK(ret) || SvRMAGICAL(ret)) {
1716 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
1717
1718 if(SvMAGICAL(sv))
1719 mg = mg_find(sv, 'r');
1720 }
1721 if (mg) {
1722 re = (regexp *)mg->mg_obj;
df0003d4 1723 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
1724 }
1725 else {
1726 STRLEN len;
1727 char *t = SvPV(ret, len);
1728 PMOP pm;
1729 char *oprecomp = PL_regprecomp;
1730 I32 osize = PL_regsize;
1731 I32 onpar = PL_regnpar;
1732
1733 pm.op_pmflags = 0;
1734 re = CALLREGCOMP(t, t + len, &pm);
1735 if (!(SvFLAGS(ret)
1736 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
1737 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
1738 PL_regprecomp = oprecomp;
1739 PL_regsize = osize;
1740 PL_regnpar = onpar;
1741 }
1742 DEBUG_r(
1743 PerlIO_printf(Perl_debug_log,
1744 "Entering embedded `%s%.60s%s%s'\n",
1745 PL_colors[0],
1746 re->precomp,
1747 PL_colors[1],
1748 (strlen(re->precomp) > 60 ? "..." : ""))
1749 );
1750 state.node = next;
1751 state.prev = PL_reg_call_cc;
1752 state.cc = PL_regcc;
1753 state.re = PL_reg_re;
1754
1755 cctmp.cur = 0;
1756 cctmp.oldcc = 0;
1757 PL_regcc = &cctmp;
1758
1759 cp = regcppush(0); /* Save *all* the positions. */
1760 REGCP_SET;
1761 cache_re(re);
1762 state.ss = PL_savestack_ix;
1763 *PL_reglastparen = 0;
1764 PL_reg_call_cc = &state;
1765 PL_reginput = locinput;
1766 if (regmatch(re->program + 1)) {
1767 ReREFCNT_dec(re);
1768 regcpblow(cp);
1769 sayYES;
1770 }
1771 DEBUG_r(
1772 PerlIO_printf(Perl_debug_log,
1773 "%*s failed...\n",
1774 REPORT_CODE_OFF+PL_regindent*2, "")
1775 );
1776 ReREFCNT_dec(re);
1777 REGCP_UNWIND;
1778 regcppop();
1779 PL_reg_call_cc = state.prev;
1780 PL_regcc = state.cc;
1781 PL_reg_re = state.re;
d3790889 1782 cache_re(PL_reg_re);
0f5d15d6
IZ
1783 sayNO;
1784 }
c277df42 1785 sw = SvTRUE(ret);
0f5d15d6 1786 logical = 0;
a0ed51b3
LW
1787 }
1788 else
3280af22 1789 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
1790 break;
1791 }
a0d0e21e 1792 case OPEN:
c277df42 1793 n = ARG(scan); /* which paren pair */
3280af22
NIS
1794 PL_reg_start_tmp[n] = locinput;
1795 if (n > PL_regsize)
1796 PL_regsize = n;
a0d0e21e
LW
1797 break;
1798 case CLOSE:
c277df42 1799 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
1800 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
1801 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
1802 if (n > *PL_reglastparen)
1803 *PL_reglastparen = n;
a0d0e21e 1804 break;
c277df42
IZ
1805 case GROUPP:
1806 n = ARG(scan); /* which paren pair */
cf93c79d 1807 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
1808 break;
1809 case IFTHEN:
1810 if (sw)
1811 next = NEXTOPER(NEXTOPER(scan));
1812 else {
1813 next = scan + ARG(scan);
1814 if (OP(next) == IFTHEN) /* Fake one. */
1815 next = NEXTOPER(NEXTOPER(next));
1816 }
1817 break;
1818 case LOGICAL:
0f5d15d6 1819 logical = scan->flags;
c277df42 1820 break;
a0d0e21e
LW
1821 case CURLYX: {
1822 CURCUR cc;
3280af22 1823 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
1824
1825 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1826 next += ARG(next);
3280af22
NIS
1827 cc.oldcc = PL_regcc;
1828 PL_regcc = &cc;
1829 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
1830 cc.cur = -1;
1831 cc.min = ARG1(scan);
1832 cc.max = ARG2(scan);
c277df42 1833 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
1834 cc.next = next;
1835 cc.minmod = minmod;
1836 cc.lastloc = 0;
3280af22 1837 PL_reginput = locinput;
a0d0e21e
LW
1838 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1839 regcpblow(cp);
3280af22 1840 PL_regcc = cc.oldcc;
4633a7c4 1841 saySAME(n);
a0d0e21e
LW
1842 }
1843 /* NOT REACHED */
1844 case WHILEM: {
1845 /*
1846 * This is really hard to understand, because after we match
1847 * what we're trying to match, we must make sure the rest of
1848 * the RE is going to match for sure, and to do that we have
1849 * to go back UP the parse tree by recursing ever deeper. And
1850 * if it fails, we have to reset our parent's current state
1851 * that we can try again after backing off.
1852 */
1853
c277df42 1854 CHECKPOINT cp, lastcp;
3280af22 1855 CURCUR* cc = PL_regcc;
c277df42
IZ
1856 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1857
4633a7c4 1858 n = cc->cur + 1; /* how many we know we matched */
3280af22 1859 PL_reginput = locinput;
a0d0e21e 1860
c277df42
IZ
1861 DEBUG_r(
1862 PerlIO_printf(Perl_debug_log,
1863 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 1864 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
1865 (long)n, (long)cc->min,
1866 (long)cc->max, (long)cc)
1867 );
4633a7c4 1868
a0d0e21e
LW
1869 /* If degenerate scan matches "", assume scan done. */
1870
579cf2c3 1871 if (locinput == cc->lastloc && n >= cc->min) {
3280af22
NIS
1872 PL_regcc = cc->oldcc;
1873 ln = PL_regcc->cur;
c277df42 1874 DEBUG_r(
c3464db5
DD
1875 PerlIO_printf(Perl_debug_log,
1876 "%*s empty match detected, try continuation...\n",
3280af22 1877 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1878 );
a0d0e21e 1879 if (regmatch(cc->next))
4633a7c4 1880 sayYES;
c277df42 1881 DEBUG_r(
c3464db5
DD
1882 PerlIO_printf(Perl_debug_log,
1883 "%*s failed...\n",
3280af22 1884 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1885 );
3280af22
NIS
1886 PL_regcc->cur = ln;
1887 PL_regcc = cc;
4633a7c4 1888 sayNO;
a0d0e21e
LW
1889 }
1890
1891 /* First just match a string of min scans. */
1892
1893 if (n < cc->min) {
1894 cc->cur = n;
1895 cc->lastloc = locinput;
4633a7c4
LW
1896 if (regmatch(cc->scan))
1897 sayYES;
1898 cc->cur = n - 1;
c277df42
IZ
1899 cc->lastloc = lastloc;
1900 DEBUG_r(
c3464db5
DD
1901 PerlIO_printf(Perl_debug_log,
1902 "%*s failed...\n",
3280af22 1903 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1904 );
4633a7c4 1905 sayNO;
a0d0e21e
LW
1906 }
1907
1908 /* Prefer next over scan for minimal matching. */
1909
1910 if (cc->minmod) {
3280af22
NIS
1911 PL_regcc = cc->oldcc;
1912 ln = PL_regcc->cur;
5f05dabc 1913 cp = regcppush(cc->parenfloor);
c277df42 1914 REGCP_SET;
5f05dabc 1915 if (regmatch(cc->next)) {
c277df42 1916 regcpblow(cp);
4633a7c4 1917 sayYES; /* All done. */
5f05dabc 1918 }
c277df42 1919 REGCP_UNWIND;
5f05dabc 1920 regcppop();
3280af22
NIS
1921 PL_regcc->cur = ln;
1922 PL_regcc = cc;
a0d0e21e 1923
c277df42 1924 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 1925 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
1926 && !(PL_reg_flags & RF_warned)) {
1927 PL_reg_flags |= RF_warned;
599cee73 1928 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
1929 "Complex regular subexpression recursion",
1930 REG_INFTY - 1);
c277df42 1931 }
4633a7c4 1932 sayNO;
c277df42 1933 }
a687059c 1934
c277df42 1935 DEBUG_r(
c3464db5
DD
1936 PerlIO_printf(Perl_debug_log,
1937 "%*s trying longer...\n",
3280af22 1938 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1939 );
a0d0e21e 1940 /* Try scanning more and see if it helps. */
3280af22 1941 PL_reginput = locinput;
a0d0e21e
LW
1942 cc->cur = n;
1943 cc->lastloc = locinput;
5f05dabc 1944 cp = regcppush(cc->parenfloor);
c277df42 1945 REGCP_SET;
5f05dabc 1946 if (regmatch(cc->scan)) {
c277df42 1947 regcpblow(cp);
4633a7c4 1948 sayYES;
5f05dabc 1949 }
c277df42 1950 DEBUG_r(
c3464db5
DD
1951 PerlIO_printf(Perl_debug_log,
1952 "%*s failed...\n",
3280af22 1953 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1954 );
1955 REGCP_UNWIND;
5f05dabc 1956 regcppop();
4633a7c4 1957 cc->cur = n - 1;
c277df42 1958 cc->lastloc = lastloc;
4633a7c4 1959 sayNO;
a0d0e21e
LW
1960 }
1961
1962 /* Prefer scan over next for maximal matching. */
1963
1964 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1965 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
1966 cc->cur = n;
1967 cc->lastloc = locinput;
c277df42 1968 REGCP_SET;
5f05dabc 1969 if (regmatch(cc->scan)) {
c277df42 1970 regcpblow(cp);
4633a7c4 1971 sayYES;
5f05dabc 1972 }
c277df42 1973 REGCP_UNWIND;
a0d0e21e 1974 regcppop(); /* Restore some previous $<digit>s? */
3280af22 1975 PL_reginput = locinput;
c277df42 1976 DEBUG_r(
c3464db5
DD
1977 PerlIO_printf(Perl_debug_log,
1978 "%*s failed, try continuation...\n",
3280af22 1979 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1980 );
1981 }
599cee73
PM
1982 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
1983 && !(PL_reg_flags & RF_warned)) {
3280af22 1984 PL_reg_flags |= RF_warned;
599cee73 1985 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
1986 "Complex regular subexpression recursion",
1987 REG_INFTY - 1);
a0d0e21e
LW
1988 }
1989
1990 /* Failed deeper matches of scan, so see if this one works. */
3280af22
NIS
1991 PL_regcc = cc->oldcc;
1992 ln = PL_regcc->cur;
a0d0e21e 1993 if (regmatch(cc->next))
4633a7c4 1994 sayYES;
c277df42 1995 DEBUG_r(
c3464db5 1996 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 1997 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1998 );
3280af22
NIS
1999 PL_regcc->cur = ln;
2000 PL_regcc = cc;
4633a7c4 2001 cc->cur = n - 1;
c277df42 2002 cc->lastloc = lastloc;
4633a7c4 2003 sayNO;
a0d0e21e
LW
2004 }
2005 /* NOT REACHED */
c277df42
IZ
2006 case BRANCHJ:
2007 next = scan + ARG(scan);
2008 if (next == scan)
2009 next = NULL;
2010 inner = NEXTOPER(NEXTOPER(scan));
2011 goto do_branch;
2012 case BRANCH:
2013 inner = NEXTOPER(scan);
2014 do_branch:
2015 {
2016 CHECKPOINT lastcp;
2017 c1 = OP(scan);
2018 if (OP(next) != c1) /* No choice. */
2019 next = inner; /* Avoid recursion. */
a0d0e21e 2020 else {
3280af22 2021 int lastparen = *PL_reglastparen;
c277df42
IZ
2022
2023 REGCP_SET;
a0d0e21e 2024 do {
3280af22 2025 PL_reginput = locinput;
c277df42 2026 if (regmatch(inner))
4633a7c4 2027 sayYES;
c277df42 2028 REGCP_UNWIND;
3280af22 2029 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2030 PL_regendp[n] = -1;
3280af22 2031 *PL_reglastparen = n;
c277df42 2032 scan = next;
a0d0e21e 2033 /*SUPPRESS 560*/
c277df42
IZ
2034 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2035 next += n;
a0d0e21e 2036 else
c277df42 2037 next = NULL;
c277df42
IZ
2038 inner = NEXTOPER(scan);
2039 if (c1 == BRANCHJ) {
2040 inner = NEXTOPER(inner);
2041 }
2042 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2043 sayNO;
a0d0e21e 2044 /* NOTREACHED */
a687059c 2045 }
a0d0e21e
LW
2046 }
2047 break;
2048 case MINMOD:
2049 minmod = 1;
2050 break;
c277df42
IZ
2051 case CURLYM:
2052 {
00db4c45 2053 I32 l = 0;
c277df42
IZ
2054 CHECKPOINT lastcp;
2055
2056 /* We suppose that the next guy does not need
2057 backtracking: in particular, it is of constant length,
2058 and has no parenths to influence future backrefs. */
2059 ln = ARG1(scan); /* min to match */
2060 n = ARG2(scan); /* max to match */
c277df42
IZ
2061 paren = scan->flags;
2062 if (paren) {
3280af22
NIS
2063 if (paren > PL_regsize)
2064 PL_regsize = paren;
2065 if (paren > *PL_reglastparen)
2066 *PL_reglastparen = paren;
c277df42 2067 }
dc45a647 2068 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2069 if (paren)
2070 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2071 PL_reginput = locinput;
c277df42
IZ
2072 if (minmod) {
2073 minmod = 0;
2074 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2075 sayNO;
5f4b28b2 2076 if (ln && l == 0 && n >= ln
c277df42
IZ
2077 /* In fact, this is tricky. If paren, then the
2078 fact that we did/didnot match may influence
2079 future execution. */
2080 && !(paren && ln == 0))
2081 ln = n;
3280af22 2082 locinput = PL_reginput;
22c35a8c 2083 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
2084 c1 = UCHARAT(OPERAND(next) + 1);
2085 if (OP(next) == EXACTF)
22c35a8c 2086 c2 = PL_fold[c1];
c277df42 2087 else if (OP(next) == EXACTFL)
22c35a8c 2088 c2 = PL_fold_locale[c1];
c277df42
IZ
2089 else
2090 c2 = c1;
a0ed51b3
LW
2091 }
2092 else
c277df42
IZ
2093 c1 = c2 = -1000;
2094 REGCP_SET;
5f4b28b2 2095 /* This may be improved if l == 0. */
c277df42
IZ
2096 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2097 /* If it could work, try it. */
2098 if (c1 == -1000 ||
3280af22
NIS
2099 UCHARAT(PL_reginput) == c1 ||
2100 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2101 {
2102 if (paren) {
2103 if (n) {
cf93c79d
IZ
2104 PL_regstartp[paren] =
2105 HOPc(PL_reginput, -l) - PL_bostr;
2106 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2107 }
2108 else
cf93c79d 2109 PL_regendp[paren] = -1;
c277df42
IZ
2110 }
2111 if (regmatch(next))
2112 sayYES;
2113 REGCP_UNWIND;
2114 }
2115 /* Couldn't or didn't -- move forward. */
3280af22 2116 PL_reginput = locinput;
c277df42
IZ
2117 if (regrepeat_hard(scan, 1, &l)) {
2118 ln++;
3280af22 2119 locinput = PL_reginput;
c277df42
IZ
2120 }
2121 else
2122 sayNO;
2123 }
a0ed51b3
LW
2124 }
2125 else {
c277df42
IZ
2126 n = regrepeat_hard(scan, n, &l);
2127 if (n != 0 && l == 0
2128 /* In fact, this is tricky. If paren, then the
2129 fact that we did/didnot match may influence
2130 future execution. */
2131 && !(paren && ln == 0))
2132 ln = n;
3280af22 2133 locinput = PL_reginput;
c277df42 2134 DEBUG_r(
5c0ca799
GS
2135 PerlIO_printf(Perl_debug_log,
2136 "%*s matched %ld times, len=%ld...\n",
3280af22 2137 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
2138 );
2139 if (n >= ln) {
22c35a8c 2140 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
2141 c1 = UCHARAT(OPERAND(next) + 1);
2142 if (OP(next) == EXACTF)
22c35a8c 2143 c2 = PL_fold[c1];
c277df42 2144 else if (OP(next) == EXACTFL)
22c35a8c 2145 c2 = PL_fold_locale[c1];
c277df42
IZ
2146 else
2147 c2 = c1;
a0ed51b3
LW
2148 }
2149 else
c277df42
IZ
2150 c1 = c2 = -1000;
2151 }
2152 REGCP_SET;
2153 while (n >= ln) {
2154 /* If it could work, try it. */
2155 if (c1 == -1000 ||
3280af22
NIS
2156 UCHARAT(PL_reginput) == c1 ||
2157 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2158 {
2159 DEBUG_r(
c3464db5
DD
2160 PerlIO_printf(Perl_debug_log,
2161 "%*s trying tail with n=%ld...\n",
3280af22 2162 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
2163 );
2164 if (paren) {
2165 if (n) {
cf93c79d
IZ
2166 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2167 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2168 }
a0ed51b3 2169 else
cf93c79d 2170 PL_regendp[paren] = -1;
c277df42 2171 }
a0ed51b3
LW
2172 if (regmatch(next))
2173 sayYES;
2174 REGCP_UNWIND;
2175 }
c277df42
IZ
2176 /* Couldn't or didn't -- back up. */
2177 n--;
dfe13c55 2178 locinput = HOPc(locinput, -l);
3280af22 2179 PL_reginput = locinput;
c277df42
IZ
2180 }
2181 }
2182 sayNO;
2183 break;
2184 }
2185 case CURLYN:
2186 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2187 if (paren > PL_regsize)
2188 PL_regsize = paren;
2189 if (paren > *PL_reglastparen)
2190 *PL_reglastparen = paren;
c277df42
IZ
2191 ln = ARG1(scan); /* min to match */
2192 n = ARG2(scan); /* max to match */
dc45a647 2193 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2194 goto repeat;
a0d0e21e 2195 case CURLY:
c277df42 2196 paren = 0;
a0d0e21e
LW
2197 ln = ARG1(scan); /* min to match */
2198 n = ARG2(scan); /* max to match */
dc45a647 2199 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2200 goto repeat;
2201 case STAR:
2202 ln = 0;
c277df42 2203 n = REG_INFTY;
a0d0e21e 2204 scan = NEXTOPER(scan);
c277df42 2205 paren = 0;
a0d0e21e
LW
2206 goto repeat;
2207 case PLUS:
c277df42
IZ
2208 ln = 1;
2209 n = REG_INFTY;
2210 scan = NEXTOPER(scan);
2211 paren = 0;
2212 repeat:
a0d0e21e
LW
2213 /*
2214 * Lookahead to avoid useless match attempts
2215 * when we know what character comes next.
2216 */
22c35a8c 2217 if (PL_regkind[(U8)OP(next)] == EXACT) {
bbce6d69 2218 c1 = UCHARAT(OPERAND(next) + 1);
2219 if (OP(next) == EXACTF)
22c35a8c 2220 c2 = PL_fold[c1];
bbce6d69 2221 else if (OP(next) == EXACTFL)
22c35a8c 2222 c2 = PL_fold_locale[c1];
bbce6d69 2223 else
2224 c2 = c1;
2225 }
a0d0e21e 2226 else
bbce6d69 2227 c1 = c2 = -1000;
3280af22 2228 PL_reginput = locinput;
a0d0e21e 2229 if (minmod) {
c277df42 2230 CHECKPOINT lastcp;
a0d0e21e
LW
2231 minmod = 0;
2232 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2233 sayNO;
a0ed51b3 2234 locinput = PL_reginput;
c277df42 2235 REGCP_SET;
0fe9bf95
IZ
2236 if (c1 != -1000) {
2237 char *e = locinput + n - ln; /* Should not check after this */
2238 char *old = locinput;
2239
2240 if (e >= PL_regeol || (n == REG_INFTY))
2241 e = PL_regeol - 1;
2242 while (1) {
2243 /* Find place 'next' could work */
2244 if (c1 == c2) {
2245 while (locinput <= e && *locinput != c1)
2246 locinput++;
2247 } else {
2248 while (locinput <= e
2249 && *locinput != c1
2250 && *locinput != c2)
2251 locinput++;
2252 }
2253 if (locinput > e)
2254 sayNO;
2255 /* PL_reginput == old now */
2256 if (locinput != old) {
2257 ln = 1; /* Did some */
2258 if (regrepeat(scan, locinput - old) <
2259 locinput - old)
2260 sayNO;
2261 }
2262 /* PL_reginput == locinput now */
2263 if (paren) {
2264 if (ln) {
cf93c79d
IZ
2265 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2266 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2267 }
2268 else
cf93c79d 2269 PL_regendp[paren] = -1;
0fe9bf95
IZ
2270 }
2271 if (regmatch(next))
2272 sayYES;
2273 PL_reginput = locinput; /* Could be reset... */
2274 REGCP_UNWIND;
2275 /* Couldn't or didn't -- move forward. */
2276 old = locinput++;
2277 }
2278 }
2279 else
c277df42 2280 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2281 /* If it could work, try it. */
bbce6d69 2282 if (c1 == -1000 ||
3280af22
NIS
2283 UCHARAT(PL_reginput) == c1 ||
2284 UCHARAT(PL_reginput) == c2)
bbce6d69 2285 {
c277df42
IZ
2286 if (paren) {
2287 if (n) {
cf93c79d
IZ
2288 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2289 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2290 }
2291 else
cf93c79d 2292 PL_regendp[paren] = -1;
c277df42 2293 }
a0d0e21e 2294 if (regmatch(next))
4633a7c4 2295 sayYES;
c277df42 2296 REGCP_UNWIND;
bbce6d69 2297 }
c277df42 2298 /* Couldn't or didn't -- move forward. */
a0ed51b3 2299 PL_reginput = locinput;
a0d0e21e
LW
2300 if (regrepeat(scan, 1)) {
2301 ln++;
a0ed51b3
LW
2302 locinput = PL_reginput;
2303 }
2304 else
4633a7c4 2305 sayNO;
a0d0e21e
LW
2306 }
2307 }
2308 else {
c277df42 2309 CHECKPOINT lastcp;
a0d0e21e 2310 n = regrepeat(scan, n);
a0ed51b3 2311 locinput = PL_reginput;
22c35a8c 2312 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2313 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2314 ln = n; /* why back off? */
c277df42
IZ
2315 REGCP_SET;
2316 if (paren) {
2317 while (n >= ln) {
2318 /* If it could work, try it. */
2319 if (c1 == -1000 ||
3280af22
NIS
2320 UCHARAT(PL_reginput) == c1 ||
2321 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2322 {
2323 if (paren && n) {
2324 if (n) {
cf93c79d
IZ
2325 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2326 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2327 }
2328 else
cf93c79d 2329 PL_regendp[paren] = -1;
c277df42
IZ
2330 }
2331 if (regmatch(next))
2332 sayYES;
2333 REGCP_UNWIND;
2334 }
2335 /* Couldn't or didn't -- back up. */
2336 n--;
dfe13c55 2337 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2338 }
a0ed51b3
LW
2339 }
2340 else {
c277df42
IZ
2341 while (n >= ln) {
2342 /* If it could work, try it. */
2343 if (c1 == -1000 ||
3280af22
NIS
2344 UCHARAT(PL_reginput) == c1 ||
2345 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2346 {
2347 if (regmatch(next))
2348 sayYES;
2349 REGCP_UNWIND;
2350 }
2351 /* Couldn't or didn't -- back up. */
2352 n--;
dfe13c55 2353 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2354 }
a0d0e21e
LW
2355 }
2356 }
4633a7c4 2357 sayNO;
c277df42 2358 break;
a0d0e21e 2359 case END:
0f5d15d6
IZ
2360 if (PL_reg_call_cc) {
2361 re_cc_state *cur_call_cc = PL_reg_call_cc;
2362 CURCUR *cctmp = PL_regcc;
2363 regexp *re = PL_reg_re;
2364 CHECKPOINT cp, lastcp;
2365
2366 cp = regcppush(0); /* Save *all* the positions. */
2367 REGCP_SET;
2368 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2369 the caller. */
2370 PL_reginput = locinput; /* Make position available to
2371 the callcc. */
2372 cache_re(PL_reg_call_cc->re);
2373 PL_regcc = PL_reg_call_cc->cc;
2374 PL_reg_call_cc = PL_reg_call_cc->prev;
2375 if (regmatch(cur_call_cc->node)) {
2376 PL_reg_call_cc = cur_call_cc;
2377 regcpblow(cp);
2378 sayYES;
2379 }
2380 REGCP_UNWIND;
2381 regcppop();
2382 PL_reg_call_cc = cur_call_cc;
2383 PL_regcc = cctmp;
2384 PL_reg_re = re;
2385 cache_re(re);
2386
2387 DEBUG_r(
2388 PerlIO_printf(Perl_debug_log,
2389 "%*s continuation failed...\n",
2390 REPORT_CODE_OFF+PL_regindent*2, "")
2391 );
2392 sayNO;
2393 }
3280af22 2394 if (locinput < PL_regtill)
7e5428c5
IZ
2395 sayNO; /* Cannot match: too short. */
2396 /* Fall through */
2397 case SUCCEED:
3280af22 2398 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2399 sayYES; /* Success! */
c277df42
IZ
2400 case SUSPEND:
2401 n = 1;
9fe1d20c 2402 PL_reginput = locinput;
c277df42 2403 goto do_ifmatch;
a0d0e21e 2404 case UNLESSM:
c277df42 2405 n = 0;
a0ed51b3 2406 if (scan->flags) {
0fe9bf95
IZ
2407 if (UTF) { /* XXXX This is absolutely
2408 broken, we read before
2409 start of string. */
2410 s = HOPMAYBEc(locinput, -scan->flags);
2411 if (!s)
2412 goto say_yes;
2413 PL_reginput = s;
2414 }
2415 else {
2416 if (locinput < PL_bostr + scan->flags)
2417 goto say_yes;
2418 PL_reginput = locinput - scan->flags;
2419 goto do_ifmatch;
2420 }
a0ed51b3
LW
2421 }
2422 else
2423 PL_reginput = locinput;
c277df42
IZ
2424 goto do_ifmatch;
2425 case IFMATCH:
2426 n = 1;
a0ed51b3 2427 if (scan->flags) {
0fe9bf95
IZ
2428 if (UTF) { /* XXXX This is absolutely
2429 broken, we read before
2430 start of string. */
2431 s = HOPMAYBEc(locinput, -scan->flags);
2432 if (!s || s < PL_bostr)
2433 goto say_no;
2434 PL_reginput = s;
2435 }
2436 else {
2437 if (locinput < PL_bostr + scan->flags)
2438 goto say_no;
2439 PL_reginput = locinput - scan->flags;
2440 goto do_ifmatch;
2441 }
a0ed51b3
LW
2442 }
2443 else
2444 PL_reginput = locinput;
2445
c277df42 2446 do_ifmatch:
c277df42
IZ
2447 inner = NEXTOPER(NEXTOPER(scan));
2448 if (regmatch(inner) != n) {
2449 say_no:
2450 if (logical) {
2451 logical = 0;
2452 sw = 0;
2453 goto do_longjump;
a0ed51b3
LW
2454 }
2455 else
c277df42
IZ
2456 sayNO;
2457 }
2458 say_yes:
2459 if (logical) {
2460 logical = 0;
2461 sw = 1;
2462 }
fe44a5e8 2463 if (OP(scan) == SUSPEND) {
3280af22 2464 locinput = PL_reginput;
565764a8 2465 nextchr = UCHARAT(locinput);
fe44a5e8 2466 }
c277df42
IZ
2467 /* FALL THROUGH. */
2468 case LONGJMP:
2469 do_longjump:
2470 next = scan + ARG(scan);
2471 if (next == scan)
2472 next = NULL;
a0d0e21e
LW
2473 break;
2474 default:
c030ccd9 2475 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 2476 (unsigned long)scan, OP(scan));
4327152a 2477 croak("regexp memory corruption");
a687059c 2478 }
a0d0e21e
LW
2479 scan = next;
2480 }
a687059c 2481
a0d0e21e
LW
2482 /*
2483 * We get here only if there's trouble -- normally "case END" is
2484 * the terminating point.
2485 */
4327152a 2486 croak("corrupted regexp pointers");
a0d0e21e 2487 /*NOTREACHED*/
4633a7c4
LW
2488 sayNO;
2489
2490yes:
2491#ifdef DEBUGGING
3280af22 2492 PL_regindent--;
4633a7c4
LW
2493#endif
2494 return 1;
2495
2496no:
2497#ifdef DEBUGGING
3280af22 2498 PL_regindent--;
4633a7c4 2499#endif
a0d0e21e 2500 return 0;
a687059c
LW
2501}
2502
2503/*
2504 - regrepeat - repeatedly match something simple, report how many
2505 */
2506/*
2507 * [This routine now assumes that it will only match on things of length 1.
2508 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 2509 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 2510 */
76e3520e 2511STATIC I32
c277df42 2512regrepeat(regnode *p, I32 max)
a687059c 2513{
5c0ca799 2514 dTHR;
a0d0e21e
LW
2515 register char *scan;
2516 register char *opnd;
2517 register I32 c;
3280af22 2518 register char *loceol = PL_regeol;
a0ed51b3 2519 register I32 hardcount = 0;
a0d0e21e 2520
3280af22 2521 scan = PL_reginput;
c277df42 2522 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 2523 loceol = scan + max;
161b471a 2524 opnd = (char *) OPERAND(p);
a0d0e21e 2525 switch (OP(p)) {
22c35a8c 2526 case REG_ANY:
a0d0e21e
LW
2527 while (scan < loceol && *scan != '\n')
2528 scan++;
2529 break;
2530 case SANY:
2531 scan = loceol;
2532 break;
a0ed51b3
LW
2533 case ANYUTF8:
2534 loceol = PL_regeol;
2535 while (scan < loceol && *scan != '\n') {
2536 scan += UTF8SKIP(scan);
2537 hardcount++;
2538 }
2539 break;
2540 case SANYUTF8:
2541 loceol = PL_regeol;
2542 while (scan < loceol) {
2543 scan += UTF8SKIP(scan);
2544 hardcount++;
2545 }
2546 break;
bbce6d69 2547 case EXACT: /* length of string is 1 */
2548 c = UCHARAT(++opnd);
2549 while (scan < loceol && UCHARAT(scan) == c)
2550 scan++;
2551 break;
2552 case EXACTF: /* length of string is 1 */
2553 c = UCHARAT(++opnd);
2554 while (scan < loceol &&
22c35a8c 2555 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 2556 scan++;
2557 break;
2558 case EXACTFL: /* length of string is 1 */
3280af22 2559 PL_reg_flags |= RF_tainted;
bbce6d69 2560 c = UCHARAT(++opnd);
2561 while (scan < loceol &&
22c35a8c 2562 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
2563 scan++;
2564 break;
a0ed51b3
LW
2565 case ANYOFUTF8:
2566 loceol = PL_regeol;
2567 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
2568 scan += UTF8SKIP(scan);
2569 hardcount++;
2570 }
2571 break;
a0d0e21e 2572 case ANYOF:
ae5c130c 2573 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 2574 scan++;
a0d0e21e
LW
2575 break;
2576 case ALNUM:
2577 while (scan < loceol && isALNUM(*scan))
2578 scan++;
2579 break;
a0ed51b3
LW
2580 case ALNUMUTF8:
2581 loceol = PL_regeol;
dfe13c55 2582 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2583 scan += UTF8SKIP(scan);
2584 hardcount++;
2585 }
2586 break;
bbce6d69 2587 case ALNUML:
3280af22 2588 PL_reg_flags |= RF_tainted;
bbce6d69 2589 while (scan < loceol && isALNUM_LC(*scan))
2590 scan++;
2591 break;
a0ed51b3
LW
2592 case ALNUMLUTF8:
2593 PL_reg_flags |= RF_tainted;
2594 loceol = PL_regeol;
dfe13c55 2595 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2596 scan += UTF8SKIP(scan);
2597 hardcount++;
2598 }
2599 break;
2600 break;
a0d0e21e
LW
2601 case NALNUM:
2602 while (scan < loceol && !isALNUM(*scan))
2603 scan++;
2604 break;
a0ed51b3
LW
2605 case NALNUMUTF8:
2606 loceol = PL_regeol;
dfe13c55 2607 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2608 scan += UTF8SKIP(scan);
2609 hardcount++;
2610 }
2611 break;
bbce6d69 2612 case NALNUML:
3280af22 2613 PL_reg_flags |= RF_tainted;
bbce6d69 2614 while (scan < loceol && !isALNUM_LC(*scan))
2615 scan++;
2616 break;
a0ed51b3
LW
2617 case NALNUMLUTF8:
2618 PL_reg_flags |= RF_tainted;
2619 loceol = PL_regeol;
dfe13c55 2620 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2621 scan += UTF8SKIP(scan);
2622 hardcount++;
2623 }
2624 break;
a0d0e21e
LW
2625 case SPACE:
2626 while (scan < loceol && isSPACE(*scan))
2627 scan++;
2628 break;
a0ed51b3
LW
2629 case SPACEUTF8:
2630 loceol = PL_regeol;
dfe13c55 2631 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2632 scan += UTF8SKIP(scan);
2633 hardcount++;
2634 }
2635 break;
bbce6d69 2636 case SPACEL:
3280af22 2637 PL_reg_flags |= RF_tainted;
bbce6d69 2638 while (scan < loceol && isSPACE_LC(*scan))
2639 scan++;
2640 break;
a0ed51b3
LW
2641 case SPACELUTF8:
2642 PL_reg_flags |= RF_tainted;
2643 loceol = PL_regeol;
dfe13c55 2644 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2645 scan += UTF8SKIP(scan);
2646 hardcount++;
2647 }
2648 break;
a0d0e21e
LW
2649 case NSPACE:
2650 while (scan < loceol && !isSPACE(*scan))
2651 scan++;
2652 break;
a0ed51b3
LW
2653 case NSPACEUTF8:
2654 loceol = PL_regeol;
dfe13c55 2655 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2656 scan += UTF8SKIP(scan);
2657 hardcount++;
2658 }
2659 break;
bbce6d69 2660 case NSPACEL:
3280af22 2661 PL_reg_flags |= RF_tainted;
bbce6d69 2662 while (scan < loceol && !isSPACE_LC(*scan))
2663 scan++;
2664 break;
a0ed51b3
LW
2665 case NSPACELUTF8:
2666 PL_reg_flags |= RF_tainted;
2667 loceol = PL_regeol;
dfe13c55 2668 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2669 scan += UTF8SKIP(scan);
2670 hardcount++;
2671 }
2672 break;
a0d0e21e
LW
2673 case DIGIT:
2674 while (scan < loceol && isDIGIT(*scan))
2675 scan++;
2676 break;
a0ed51b3
LW
2677 case DIGITUTF8:
2678 loceol = PL_regeol;
dfe13c55 2679 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2680 scan += UTF8SKIP(scan);
2681 hardcount++;
2682 }
2683 break;
2684 break;
a0d0e21e
LW
2685 case NDIGIT:
2686 while (scan < loceol && !isDIGIT(*scan))
2687 scan++;
2688 break;
a0ed51b3
LW
2689 case NDIGITUTF8:
2690 loceol = PL_regeol;
dfe13c55 2691 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2692 scan += UTF8SKIP(scan);
2693 hardcount++;
2694 }
2695 break;
a0d0e21e
LW
2696 default: /* Called on something of 0 width. */
2697 break; /* So match right here or not at all. */
2698 }
a687059c 2699
a0ed51b3
LW
2700 if (hardcount)
2701 c = hardcount;
2702 else
2703 c = scan - PL_reginput;
3280af22 2704 PL_reginput = scan;
a687059c 2705
c277df42
IZ
2706 DEBUG_r(
2707 {
2708 SV *prop = sv_newmortal();
2709
2710 regprop(prop, p);
2711 PerlIO_printf(Perl_debug_log,
2712 "%*s %s can match %ld times out of %ld...\n",
2713 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
2714 });
2715
a0d0e21e 2716 return(c);
a687059c
LW
2717}
2718
2719/*
c277df42
IZ
2720 - regrepeat_hard - repeatedly match something, report total lenth and length
2721 *
2722 * The repeater is supposed to have constant length.
2723 */
2724
76e3520e 2725STATIC I32
c277df42
IZ
2726regrepeat_hard(regnode *p, I32 max, I32 *lp)
2727{
5c0ca799 2728 dTHR;
c277df42
IZ
2729 register char *scan;
2730 register char *start;
3280af22 2731 register char *loceol = PL_regeol;
a0ed51b3 2732 I32 l = 0;
708e3b05 2733 I32 count = 0, res = 1;
a0ed51b3
LW
2734
2735 if (!max)
2736 return 0;
c277df42 2737
3280af22 2738 start = PL_reginput;
a0ed51b3 2739 if (UTF) {
708e3b05 2740 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
2741 if (!count++) {
2742 l = 0;
2743 while (start < PL_reginput) {
2744 l++;
2745 start += UTF8SKIP(start);
2746 }
2747 *lp = l;
2748 if (l == 0)
2749 return max;
2750 }
2751 if (count == max)
2752 return count;
2753 }
2754 }
2755 else {
708e3b05 2756 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
2757 if (!count++) {
2758 *lp = l = PL_reginput - start;
2759 if (max != REG_INFTY && l*max < loceol - scan)
2760 loceol = scan + l*max;
2761 if (l == 0)
2762 return max;
c277df42
IZ
2763 }
2764 }
2765 }
708e3b05 2766 if (!res)
3280af22 2767 PL_reginput = scan;
c277df42 2768
a0ed51b3 2769 return count;
c277df42
IZ
2770}
2771
2772/*
cb8d8820 2773 - reginclass - determine if a character falls into a character class
bbce6d69 2774 */
2775
76e3520e 2776STATIC bool
8ac85365 2777reginclass(register char *p, register I32 c)
bbce6d69 2778{
5c0ca799 2779 dTHR;
bbce6d69 2780 char flags = *p;
2781 bool match = FALSE;
2782
2783 c &= 0xFF;
ae5c130c 2784 if (ANYOF_TEST(p, c))
bbce6d69 2785 match = TRUE;
2786 else if (flags & ANYOF_FOLD) {
2787 I32 cf;
2788 if (flags & ANYOF_LOCALE) {
3280af22 2789 PL_reg_flags |= RF_tainted;
22c35a8c 2790 cf = PL_fold_locale[c];
bbce6d69 2791 }
2792 else
22c35a8c 2793 cf = PL_fold[c];
ae5c130c 2794 if (ANYOF_TEST(p, cf))
bbce6d69 2795 match = TRUE;
2796 }
2797
2798 if (!match && (flags & ANYOF_ISA)) {
3280af22 2799 PL_reg_flags |= RF_tainted;
bbce6d69 2800
2801 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
2802 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
2803 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
2804 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
2805 {
2806 match = TRUE;
2807 }
2808 }
2809
ae5c130c 2810 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 2811}
2812
a0ed51b3
LW
2813STATIC bool
2814reginclassutf8(regnode *f, U8 *p)
c485e607
NIS
2815{
2816 dTHR;
a0ed51b3
LW
2817 char flags = ARG1(f);
2818 bool match = FALSE;
2819 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
2820
2821 if (swash_fetch(sv, p))
2822 match = TRUE;
2823 else if (flags & ANYOF_FOLD) {
2824 I32 cf;
dfe13c55 2825 U8 tmpbuf[10];
a0ed51b3
LW
2826 if (flags & ANYOF_LOCALE) {
2827 PL_reg_flags |= RF_tainted;
2828 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
2829 }
2830 else
2831 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
2832 if (swash_fetch(sv, tmpbuf))
2833 match = TRUE;
2834 }
2835
2836 if (!match && (flags & ANYOF_ISA)) {
2837 PL_reg_flags |= RF_tainted;
2838
2839 if (((flags & ANYOF_ALNUML) && isALNUM_LC_utf8(p)) ||
2840 ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
2841 ((flags & ANYOF_SPACEL) && isSPACE_LC_utf8(p)) ||
2842 ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
2843 {
2844 match = TRUE;
2845 }
2846 }
2847
2848 return (flags & ANYOF_INVERT) ? !match : match;
2849}
161b471a 2850
dfe13c55
GS
2851STATIC U8 *
2852reghop(U8 *s, I32 off)
c485e607
NIS
2853{
2854 dTHR;
a0ed51b3
LW
2855 if (off >= 0) {
2856 while (off-- && s < (U8*)PL_regeol)
2857 s += UTF8SKIP(s);
2858 }
2859 else {
2860 while (off++) {
2861 if (s > (U8*)PL_bostr) {
2862 s--;
2863 if (*s & 0x80) {
2864 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2865 s--;
2866 } /* XXX could check well-formedness here */
2867 }
2868 }
2869 }
2870 return s;
2871}
161b471a 2872
dfe13c55
GS
2873STATIC U8 *
2874reghopmaybe(U8* s, I32 off)
a0ed51b3 2875{
c485e607 2876 dTHR;
a0ed51b3
LW
2877 if (off >= 0) {
2878 while (off-- && s < (U8*)PL_regeol)
2879 s += UTF8SKIP(s);
2880 if (off >= 0)
2881 return 0;
2882 }
2883 else {
2884 while (off++) {
2885 if (s > (U8*)PL_bostr) {
2886 s--;
2887 if (*s & 0x80) {
2888 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2889 s--;
2890 } /* XXX could check well-formedness here */
2891 }
2892 else
2893 break;
2894 }
2895 if (off <= 0)
2896 return 0;
2897 }
2898 return s;
2899}