This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate with Sarathy.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5
DD
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
182 PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
273/* If SCREAM, then sv should be compatible with strpos and strend.
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
278char *
279Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
280 char *strend, U32 flags, re_scream_pos_data *data)
281{
282 I32 start_shift;
283 /* Should be nonnegative! */
284 I32 end_shift;
285 char *s;
286 char *t;
287 I32 ml_anch;
288
289 DEBUG_r( if (!PL_colorset) reginitcolors() );
290 DEBUG_r(PerlIO_printf(Perl_debug_log,
291 "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
292 PL_colors[4],PL_colors[5],PL_colors[0],
293 prog->precomp,
294 PL_colors[1],
295 (strlen(prog->precomp) > 60 ? "..." : ""),
296 PL_colors[0],
297 (strend - strpos > 60 ? 60 : strend - strpos),
298 strpos, PL_colors[1],
299 (strend - strpos > 60 ? "..." : ""))
300 );
301
302 if (prog->minlen > strend - strpos)
303 goto fail;
304
305 /* XXXX Move further down? */
306 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
307 /* Should be nonnegative! */
308 end_shift = prog->minlen - start_shift -
309 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
310
311 if (prog->reganch & ROPT_ANCH) {
312 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
313 || ( (prog->reganch & ROPT_ANCH_BOL)
314 && !PL_multiline ) );
315
316 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
317 /* Anchored... */
318 I32 slen;
319
320 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
321 && (sv && (strpos + SvCUR(sv) != strend)) )
322 goto fail;
323
324 s = (char*)HOP((U8*)strpos, prog->check_offset_min);
325 if (SvTAIL(prog->check_substr)) {
326 slen = SvCUR(prog->check_substr); /* >= 1 */
327
328 if ( strend - s > slen || strend - s < slen - 1 ) {
329 s = Nullch;
330 goto finish;
331 }
332 if ( strend - s == slen && strend[-1] != '\n') {
333 s = Nullch;
334 goto finish;
335 }
336 /* Now should match s[0..slen-2] */
337 slen--;
338 if (slen && (*SvPVX(prog->check_substr) != *s
339 || (slen > 1
340 && memNE(SvPVX(prog->check_substr), s, slen))))
341 s = Nullch;
342 }
343 else if (*SvPVX(prog->check_substr) != *s
344 || ((slen = SvCUR(prog->check_substr)) > 1
345 && memNE(SvPVX(prog->check_substr), s, slen)))
346 s = Nullch;
347 else
348 s = strpos;
349 goto finish;
350 }
351 s = strpos;
352 if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
353 end_shift += strend - s - prog->minlen - prog->check_offset_max;
354 }
355 else {
356 ml_anch = 0;
357 s = strpos;
358 }
359
360 restart:
361 if (flags & REXEC_SCREAM) {
362 SV *c = prog->check_substr;
363 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
364 I32 p = -1; /* Internal iterator of scream. */
365 I32 *pp = data ? data->scream_pos : &p;
366
367 if (PL_screamfirst[BmRARE(c)] >= 0
368 || ( BmRARE(c) == '\n'
369 && (BmPREVIOUS(c) == SvCUR(c) - 1)
370 && SvTAIL(c) ))
371 s = screaminstr(sv, prog->check_substr,
372 start_shift + (strpos - strbeg), end_shift, pp, 0);
373 else
374 s = Nullch;
375 if (data)
376 *data->scream_olds = s;
377 }
378 else
379 s = fbm_instr((unsigned char*)s + start_shift,
380 (unsigned char*)strend - end_shift,
381 prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
382
383 /* Update the count-of-usability, remove useless subpatterns,
384 unshift s. */
385 finish:
386 if (!s) {
387 ++BmUSEFUL(prog->check_substr); /* hooray */
388 goto fail; /* not present */
389 }
390 else if (s - strpos > prog->check_offset_max &&
391 ((prog->reganch & ROPT_UTF8)
392 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
393 && t >= strpos)
394 : (t = s - prog->check_offset_max) != 0) ) {
395 if (ml_anch && t[-1] != '\n') {
396 find_anchor:
397 while (t < strend - end_shift - prog->minlen) {
398 if (*t == '\n') {
399 if (t < s - prog->check_offset_min) {
400 s = t + 1;
401 goto set_useful;
402 }
403 s = t + 1;
404 goto restart;
405 }
406 t++;
407 }
408 s = Nullch;
409 goto finish;
410 }
411 s = t;
412 set_useful:
413 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
414 }
415 else {
416 if (ml_anch && sv
417 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
418 t = strpos;
419 goto find_anchor;
420 }
421 if (!(prog->reganch & ROPT_NAUGHTY)
422 && --BmUSEFUL(prog->check_substr) < 0
423 && prog->check_substr == prog->float_substr) { /* boo */
424 /* If flags & SOMETHING - do not do it many times on the same match */
425 SvREFCNT_dec(prog->check_substr);
426 prog->check_substr = Nullsv; /* disable */
427 prog->float_substr = Nullsv; /* clear */
428 s = strpos;
429 prog->reganch &= ~RE_USE_INTUIT;
430 }
431 else
432 s = strpos;
433 }
434
435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
436 PL_colors[4],PL_colors[5], (long)(s - strpos)) );
437 return s;
438 fail:
439 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
440 PL_colors[4],PL_colors[5]));
441 return Nullch;
442}
9661b544 443
a687059c 444/*
c277df42 445 - regexec_flags - match a regexp against a string
a687059c 446 */
79072805 447I32
864dbfa3 448Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 449 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
450/* strend: pointer to null at end of string */
451/* strbeg: real beginning of string */
452/* minend: end of match must be >=minend after stringarg. */
453/* data: May be used for some additional optimizations. */
454/* nosave: For optimizations. */
a687059c 455{
5c0ca799 456 dTHR;
a0d0e21e 457 register char *s;
c277df42 458 register regnode *c;
a0d0e21e
LW
459 register char *startpos = stringarg;
460 register I32 tmp;
c277df42 461 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
462 I32 dontbother = 0; /* how many characters not to try at end */
463 CURCUR cc;
c277df42 464 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
465 constant substr. */ /* CC */
466 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
467 I32 scream_pos = -1; /* Internal iterator of scream. */
468 char *scream_olds;
3280af22 469 SV* oreplsv = GvSV(PL_replgv);
a687059c 470
a0d0e21e 471 cc.cur = 0;
4633a7c4 472 cc.oldcc = 0;
3280af22 473 PL_regcc = &cc;
a0d0e21e 474
0f5d15d6 475 cache_re(prog);
a0d0e21e 476#ifdef DEBUGGING
3280af22 477 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
478#endif
479
480 /* Be paranoid... */
481 if (prog == NULL || startpos == NULL) {
cea2e8a9 482 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
483 return 0;
484 }
485
c277df42
IZ
486 minlen = prog->minlen;
487 if (strend - startpos < minlen) goto phooey;
488
a0d0e21e 489 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 490 PL_regprev = '\n';
a0d0e21e 491 else {
a0ed51b3 492 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
493 if (!PL_multiline && PL_regprev == '\n')
494 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 495 }
bbce6d69 496
a0d0e21e 497 /* Check validity of program. */
22c35a8c 498 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 499 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
500 }
501
3280af22
NIS
502 PL_reg_flags = 0;
503 PL_reg_eval_set = 0;
a0d0e21e 504
a0ed51b3
LW
505 if (prog->reganch & ROPT_UTF8)
506 PL_reg_flags |= RF_utf8;
507
508 /* Mark beginning of line for ^ and lookbehind. */
509 PL_regbol = startpos;
510 PL_bostr = strbeg;
9661b544 511 PL_reg_sv = sv;
a0ed51b3
LW
512
513 /* Mark end of line for $ (and such) */
514 PL_regeol = strend;
515
516 /* see how far we have to get to not match where we matched before */
517 PL_regtill = startpos+minend;
518
0f5d15d6
IZ
519 /* We start without call_cc context. */
520 PL_reg_call_cc = 0;
521
a0d0e21e
LW
522 /* If there is a "must appear" string, look for it. */
523 s = startpos;
cad2e5aa
JH
524
525 if (prog->reganch & ROPT_GPOS_SEEN) {
526 MAGIC *mg;
527
528 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
529 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
530 PL_reg_ganch = strbeg + mg->mg_len;
a0d0e21e 531 else
cad2e5aa
JH
532 PL_reg_ganch = startpos;
533 if (prog->reganch & ROPT_ANCH_GPOS) {
534 if (s > PL_reg_ganch)
535 goto phooey;
536 s = PL_reg_ganch;
a0ed51b3 537 }
a0d0e21e 538 }
a687059c 539
cad2e5aa
JH
540 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
541 re_scream_pos_data d;
542
543 d.scream_olds = &scream_olds;
544 d.scream_pos = &scream_pos;
545 s = re_intuit_start(prog, sv, s, strend, flags, &d);
546 if (!s)
547 goto phooey; /* not present */
548 }
549
550 DEBUG_r( if (!PL_colorset) reginitcolors() );
551 DEBUG_r(PerlIO_printf(Perl_debug_log,
8d300b32
GS
552 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
553 PL_colors[4],PL_colors[5],PL_colors[0],
554 prog->precomp,
555 PL_colors[1],
c277df42 556 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 557 PL_colors[0],
c277df42 558 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 559 startpos, PL_colors[1],
c277df42
IZ
560 (strend - startpos > 60 ? "..." : ""))
561 );
562
a0d0e21e 563 /* Simplest case: anchored match need be tried only once. */
774d564b 564 /* [unless only anchor is BOL and multiline is set] */
22e551b9 565 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 566 if (s == startpos && regtry(prog, startpos))
a0d0e21e 567 goto got_it;
22e551b9
IZ
568 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
569 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 570 {
cad2e5aa
JH
571 char *end;
572
a0d0e21e
LW
573 if (minlen)
574 dontbother = minlen - 1;
cad2e5aa 575 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 576 /* for multiline we only have to try after newlines */
cad2e5aa
JH
577 if (prog->check_substr) {
578 while (1) {
579 if (regtry(prog, s))
a0d0e21e 580 goto got_it;
cad2e5aa
JH
581 if (s >= end)
582 goto phooey;
583 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
584 if (!s)
585 goto phooey;
586 }
587 } else {
588 if (s > startpos)
589 s--;
590 while (s < end) {
591 if (*s++ == '\n') { /* don't need PL_utf8skip here */
592 if (regtry(prog, s))
593 goto got_it;
594 }
595 }
35c8bce7 596 }
35c8bce7 597 }
a0d0e21e 598 goto phooey;
22e551b9
IZ
599 } else if (prog->reganch & ROPT_ANCH_GPOS) {
600 if (regtry(prog, PL_reg_ganch))
601 goto got_it;
602 goto phooey;
a0d0e21e 603 }
35c8bce7 604
a0d0e21e 605 /* Messy cases: unanchored match. */
c277df42
IZ
606 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
607 /* we have /x+whatever/ */
cad2e5aa 608 /* it must be a one character string (XXXX Except UTF?) */
c277df42 609 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
610 if (UTF) {
611 while (s < strend) {
612 if (*s == ch) {
613 if (regtry(prog, s)) goto got_it;
614 s += UTF8SKIP(s);
615 while (s < strend && *s == ch)
616 s += UTF8SKIP(s);
617 }
618 s += UTF8SKIP(s);
619 }
620 }
621 else {
622 while (s < strend) {
623 if (*s == ch) {
624 if (regtry(prog, s)) goto got_it;
c277df42 625 s++;
a0ed51b3
LW
626 while (s < strend && *s == ch)
627 s++;
628 }
629 s++;
a0d0e21e 630 }
a687059c 631 }
c277df42
IZ
632 }
633 /*SUPPRESS 560*/
634 else if (prog->anchored_substr != Nullsv
635 || (prog->float_substr != Nullsv
636 && prog->float_max_offset < strend - s)) {
637 SV *must = prog->anchored_substr
638 ? prog->anchored_substr : prog->float_substr;
639 I32 back_max =
640 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
641 I32 back_min =
642 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
643 I32 delta = back_max - back_min;
cf93c79d 644 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
645 -(I32)(CHR_SVLEN(must)
646 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
647 char *last1; /* Last position checked before */
648
649 if (s > PL_bostr)
dfe13c55 650 last1 = HOPc(s, -1);
a0ed51b3
LW
651 else
652 last1 = s - 1; /* bogus */
c277df42
IZ
653
654 /* XXXX check_substr already used to find `s', can optimize if
655 check_substr==must. */
656 scream_pos = -1;
657 dontbother = end_shift;
dfe13c55 658 strend = HOPc(strend, -dontbother);
c277df42 659 while ( (s <= last) &&
22e551b9
IZ
660 ((flags & REXEC_SCREAM)
661 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 662 end_shift, &scream_pos, 0))
a0ed51b3 663 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
664 (unsigned char*)strend, must,
665 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
666 if (HOPc(s, -back_max) > last1) {
667 last1 = HOPc(s, -back_min);
668 s = HOPc(s, -back_max);
a0ed51b3
LW
669 }
670 else {
dfe13c55 671 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 672
dfe13c55 673 last1 = HOPc(s, -back_min);
c277df42 674 s = t;
a0d0e21e 675 }
a0ed51b3
LW
676 if (UTF) {
677 while (s <= last1) {
678 if (regtry(prog, s))
679 goto got_it;
680 s += UTF8SKIP(s);
681 }
682 }
683 else {
684 while (s <= last1) {
685 if (regtry(prog, s))
686 goto got_it;
687 s++;
688 }
a0d0e21e
LW
689 }
690 }
691 goto phooey;
a0ed51b3
LW
692 }
693 else if (c = prog->regstclass) {
a0d0e21e 694 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 695 char *cc;
a687059c 696
a0d0e21e
LW
697 if (minlen)
698 dontbother = minlen - 1;
dfe13c55 699 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
700 tmp = 1;
701 /* We know what class it must start with. */
702 switch (OP(c)) {
a0ed51b3
LW
703 case ANYOFUTF8:
704 cc = (char *) OPERAND(c);
705 while (s < strend) {
706 if (REGINCLASSUTF8(c, (U8*)s)) {
707 if (tmp && regtry(prog, s))
708 goto got_it;
709 else
710 tmp = doevery;
711 }
712 else
713 tmp = 1;
714 s += UTF8SKIP(s);
715 }
716 break;
a0d0e21e 717 case ANYOF:
a0ed51b3 718 cc = (char *) OPERAND(c);
a0d0e21e 719 while (s < strend) {
a0ed51b3 720 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
721 if (tmp && regtry(prog, s))
722 goto got_it;
723 else
724 tmp = doevery;
a687059c 725 }
a0d0e21e
LW
726 else
727 tmp = 1;
728 s++;
729 }
730 break;
bbce6d69 731 case BOUNDL:
3280af22 732 PL_reg_flags |= RF_tainted;
bbce6d69 733 /* FALL THROUGH */
a0d0e21e 734 case BOUND:
a0ed51b3
LW
735 if (minlen) {
736 dontbother++;
737 strend -= 1;
738 }
3280af22 739 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 740 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 741 while (s < strend) {
95bac841 742 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
743 tmp = !tmp;
744 if (regtry(prog, s))
745 goto got_it;
a687059c 746 }
a0d0e21e
LW
747 s++;
748 }
749 if ((minlen || tmp) && regtry(prog,s))
750 goto got_it;
751 break;
a0ed51b3
LW
752 case BOUNDLUTF8:
753 PL_reg_flags |= RF_tainted;
754 /* FALL THROUGH */
755 case BOUNDUTF8:
756 if (minlen) {
757 dontbother++;
dfe13c55 758 strend = reghop_c(strend, -1);
a0ed51b3 759 }
dfe13c55 760 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
761 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
762 while (s < strend) {
dfe13c55
GS
763 if (tmp == !(OP(c) == BOUND ?
764 swash_fetch(PL_utf8_alnum, (U8*)s) :
765 isALNUM_LC_utf8((U8*)s)))
766 {
a0ed51b3
LW
767 tmp = !tmp;
768 if (regtry(prog, s))
769 goto got_it;
770 }
771 s += UTF8SKIP(s);
772 }
773 if ((minlen || tmp) && regtry(prog,s))
774 goto got_it;
775 break;
bbce6d69 776 case NBOUNDL:
3280af22 777 PL_reg_flags |= RF_tainted;
bbce6d69 778 /* FALL THROUGH */
a0d0e21e 779 case NBOUND:
a0ed51b3
LW
780 if (minlen) {
781 dontbother++;
782 strend -= 1;
783 }
3280af22 784 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 785 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 786 while (s < strend) {
95bac841 787 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
788 tmp = !tmp;
789 else if (regtry(prog, s))
790 goto got_it;
791 s++;
792 }
793 if ((minlen || !tmp) && regtry(prog,s))
794 goto got_it;
795 break;
a0ed51b3
LW
796 case NBOUNDLUTF8:
797 PL_reg_flags |= RF_tainted;
798 /* FALL THROUGH */
799 case NBOUNDUTF8:
800 if (minlen) {
801 dontbother++;
dfe13c55 802 strend = reghop_c(strend, -1);
a0ed51b3 803 }
dfe13c55 804 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
805 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
806 while (s < strend) {
dfe13c55
GS
807 if (tmp == !(OP(c) == NBOUND ?
808 swash_fetch(PL_utf8_alnum, (U8*)s) :
809 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
810 tmp = !tmp;
811 else if (regtry(prog, s))
812 goto got_it;
813 s += UTF8SKIP(s);
814 }
815 if ((minlen || !tmp) && regtry(prog,s))
816 goto got_it;
817 break;
a0d0e21e
LW
818 case ALNUM:
819 while (s < strend) {
bbce6d69 820 if (isALNUM(*s)) {
821 if (tmp && regtry(prog, s))
822 goto got_it;
823 else
824 tmp = doevery;
825 }
826 else
827 tmp = 1;
828 s++;
829 }
830 break;
a0ed51b3
LW
831 case ALNUMUTF8:
832 while (s < strend) {
dfe13c55 833 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
834 if (tmp && regtry(prog, s))
835 goto got_it;
836 else
837 tmp = doevery;
838 }
839 else
840 tmp = 1;
841 s += UTF8SKIP(s);
842 }
843 break;
bbce6d69 844 case ALNUML:
3280af22 845 PL_reg_flags |= RF_tainted;
bbce6d69 846 while (s < strend) {
847 if (isALNUM_LC(*s)) {
a0d0e21e
LW
848 if (tmp && regtry(prog, s))
849 goto got_it;
a687059c 850 else
a0d0e21e
LW
851 tmp = doevery;
852 }
853 else
854 tmp = 1;
855 s++;
856 }
857 break;
a0ed51b3
LW
858 case ALNUMLUTF8:
859 PL_reg_flags |= RF_tainted;
860 while (s < strend) {
dfe13c55 861 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
862 if (tmp && regtry(prog, s))
863 goto got_it;
864 else
865 tmp = doevery;
866 }
867 else
868 tmp = 1;
869 s += UTF8SKIP(s);
870 }
871 break;
a0d0e21e
LW
872 case NALNUM:
873 while (s < strend) {
bbce6d69 874 if (!isALNUM(*s)) {
875 if (tmp && regtry(prog, s))
876 goto got_it;
877 else
878 tmp = doevery;
879 }
880 else
881 tmp = 1;
882 s++;
883 }
884 break;
a0ed51b3
LW
885 case NALNUMUTF8:
886 while (s < strend) {
dfe13c55 887 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
888 if (tmp && regtry(prog, s))
889 goto got_it;
890 else
891 tmp = doevery;
892 }
893 else
894 tmp = 1;
895 s += UTF8SKIP(s);
896 }
897 break;
bbce6d69 898 case NALNUML:
3280af22 899 PL_reg_flags |= RF_tainted;
bbce6d69 900 while (s < strend) {
901 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
902 if (tmp && regtry(prog, s))
903 goto got_it;
a687059c 904 else
a0d0e21e 905 tmp = doevery;
a687059c 906 }
a0d0e21e
LW
907 else
908 tmp = 1;
909 s++;
910 }
911 break;
a0ed51b3
LW
912 case NALNUMLUTF8:
913 PL_reg_flags |= RF_tainted;
914 while (s < strend) {
dfe13c55 915 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
916 if (tmp && regtry(prog, s))
917 goto got_it;
918 else
919 tmp = doevery;
920 }
921 else
922 tmp = 1;
923 s += UTF8SKIP(s);
924 }
925 break;
a0d0e21e
LW
926 case SPACE:
927 while (s < strend) {
928 if (isSPACE(*s)) {
929 if (tmp && regtry(prog, s))
930 goto got_it;
931 else
932 tmp = doevery;
2304df62 933 }
a0d0e21e
LW
934 else
935 tmp = 1;
936 s++;
937 }
938 break;
a0ed51b3
LW
939 case SPACEUTF8:
940 while (s < strend) {
dfe13c55 941 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
942 if (tmp && regtry(prog, s))
943 goto got_it;
944 else
945 tmp = doevery;
946 }
947 else
948 tmp = 1;
949 s += UTF8SKIP(s);
950 }
951 break;
bbce6d69 952 case SPACEL:
3280af22 953 PL_reg_flags |= RF_tainted;
bbce6d69 954 while (s < strend) {
955 if (isSPACE_LC(*s)) {
956 if (tmp && regtry(prog, s))
957 goto got_it;
958 else
959 tmp = doevery;
960 }
961 else
962 tmp = 1;
963 s++;
964 }
965 break;
a0ed51b3
LW
966 case SPACELUTF8:
967 PL_reg_flags |= RF_tainted;
968 while (s < strend) {
dfe13c55 969 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
970 if (tmp && regtry(prog, s))
971 goto got_it;
972 else
973 tmp = doevery;
974 }
975 else
976 tmp = 1;
977 s += UTF8SKIP(s);
978 }
979 break;
a0d0e21e
LW
980 case NSPACE:
981 while (s < strend) {
982 if (!isSPACE(*s)) {
983 if (tmp && regtry(prog, s))
984 goto got_it;
985 else
986 tmp = doevery;
a687059c 987 }
a0d0e21e
LW
988 else
989 tmp = 1;
990 s++;
991 }
992 break;
a0ed51b3
LW
993 case NSPACEUTF8:
994 while (s < strend) {
dfe13c55 995 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
996 if (tmp && regtry(prog, s))
997 goto got_it;
998 else
999 tmp = doevery;
1000 }
1001 else
1002 tmp = 1;
1003 s += UTF8SKIP(s);
1004 }
1005 break;
bbce6d69 1006 case NSPACEL:
3280af22 1007 PL_reg_flags |= RF_tainted;
bbce6d69 1008 while (s < strend) {
1009 if (!isSPACE_LC(*s)) {
1010 if (tmp && regtry(prog, s))
1011 goto got_it;
1012 else
1013 tmp = doevery;
1014 }
1015 else
1016 tmp = 1;
1017 s++;
1018 }
1019 break;
a0ed51b3
LW
1020 case NSPACELUTF8:
1021 PL_reg_flags |= RF_tainted;
1022 while (s < strend) {
dfe13c55 1023 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1024 if (tmp && regtry(prog, s))
1025 goto got_it;
1026 else
1027 tmp = doevery;
1028 }
1029 else
1030 tmp = 1;
1031 s += UTF8SKIP(s);
1032 }
1033 break;
a0d0e21e
LW
1034 case DIGIT:
1035 while (s < strend) {
1036 if (isDIGIT(*s)) {
1037 if (tmp && regtry(prog, s))
1038 goto got_it;
1039 else
1040 tmp = doevery;
2b69d0c2 1041 }
a0d0e21e
LW
1042 else
1043 tmp = 1;
1044 s++;
1045 }
1046 break;
a0ed51b3
LW
1047 case DIGITUTF8:
1048 while (s < strend) {
dfe13c55 1049 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1050 if (tmp && regtry(prog, s))
1051 goto got_it;
1052 else
1053 tmp = doevery;
1054 }
1055 else
1056 tmp = 1;
1057 s += UTF8SKIP(s);
1058 }
1059 break;
b8c5462f
JH
1060 case DIGITL:
1061 PL_reg_flags |= RF_tainted;
1062 while (s < strend) {
1063 if (isDIGIT_LC(*s)) {
1064 if (tmp && regtry(prog, s))
1065 goto got_it;
1066 else
1067 tmp = doevery;
1068 }
1069 else
1070 tmp = 1;
1071 s++;
1072 }
1073 break;
1074 case DIGITLUTF8:
1075 PL_reg_flags |= RF_tainted;
1076 while (s < strend) {
1077 if (isDIGIT_LC_utf8((U8*)s)) {
1078 if (tmp && regtry(prog, s))
1079 goto got_it;
1080 else
1081 tmp = doevery;
1082 }
1083 else
1084 tmp = 1;
1085 s += UTF8SKIP(s);
1086 }
1087 break;
a0d0e21e
LW
1088 case NDIGIT:
1089 while (s < strend) {
1090 if (!isDIGIT(*s)) {
1091 if (tmp && regtry(prog, s))
1092 goto got_it;
1093 else
1094 tmp = doevery;
a687059c 1095 }
a0d0e21e
LW
1096 else
1097 tmp = 1;
1098 s++;
1099 }
1100 break;
a0ed51b3
LW
1101 case NDIGITUTF8:
1102 while (s < strend) {
dfe13c55 1103 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1104 if (tmp && regtry(prog, s))
1105 goto got_it;
1106 else
1107 tmp = doevery;
1108 }
1109 else
1110 tmp = 1;
1111 s += UTF8SKIP(s);
1112 }
1113 break;
b8c5462f
JH
1114 case NDIGITL:
1115 PL_reg_flags |= RF_tainted;
1116 while (s < strend) {
1117 if (!isDIGIT_LC(*s)) {
1118 if (tmp && regtry(prog, s))
1119 goto got_it;
1120 else
1121 tmp = doevery;
1122 }
1123 else
1124 tmp = 1;
1125 s++;
a0ed51b3 1126 }
b8c5462f
JH
1127 break;
1128 case NDIGITLUTF8:
1129 PL_reg_flags |= RF_tainted;
1130 while (s < strend) {
1131 if (!isDIGIT_LC_utf8((U8*)s)) {
1132 if (tmp && regtry(prog, s))
1133 goto got_it;
cf93c79d 1134 else
b8c5462f
JH
1135 tmp = doevery;
1136 }
1137 else
1138 tmp = 1;
1139 s += UTF8SKIP(s);
1140 }
1141 break;
1142 case ALNUMC:
1143 while (s < strend) {
1144 if (isALNUMC(*s)) {
1145 if (tmp && regtry(prog, s))
1146 goto got_it;
cf93c79d 1147 else
b8c5462f 1148 tmp = doevery;
cf93c79d 1149 }
b8c5462f
JH
1150 else
1151 tmp = 1;
1152 s++;
c277df42 1153 }
b8c5462f
JH
1154 break;
1155 case ALNUMCUTF8:
1156 while (s < strend) {
1157 if (swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1158 if (tmp && regtry(prog, s))
1159 goto got_it;
1160 else
1161 tmp = doevery;
1162 }
1163 else
1164 tmp = 1;
a0ed51b3 1165 s += UTF8SKIP(s);
9661b544 1166 }
b8c5462f
JH
1167 break;
1168 case ALNUMCL:
1169 PL_reg_flags |= RF_tainted;
1170 while (s < strend) {
1171 if (isALNUMC_LC(*s)) {
1172 if (tmp && regtry(prog, s))
1173 goto got_it;
1174 else
1175 tmp = doevery;
1176 }
1177 else
1178 tmp = 1;
1179 s++;
9661b544 1180 }
b8c5462f
JH
1181 break;
1182 case ALNUMCLUTF8:
1183 PL_reg_flags |= RF_tainted;
1184 while (s < strend) {
1185 if (isALNUMC_LC_utf8((U8*)s)) {
1186 if (tmp && regtry(prog, s))
1187 goto got_it;
1188 else
1189 tmp = doevery;
1190 }
1191 else
1192 tmp = 1;
1193 s += UTF8SKIP(s);
1194 }
1195 break;
1196 case NALNUMC:
1197 while (s < strend) {
1198 if (!isALNUMC(*s)) {
1199 if (tmp && regtry(prog, s))
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
1204 else
1205 tmp = 1;
1206 s++;
1207 }
1208 break;
1209 case NALNUMCUTF8:
1210 while (s < strend) {
1211 if (!swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1212 if (tmp && regtry(prog, s))
1213 goto got_it;
1214 else
1215 tmp = doevery;
1216 }
1217 else
1218 tmp = 1;
1219 s += UTF8SKIP(s);
1220 }
1221 break;
1222 case NALNUMCL:
1223 PL_reg_flags |= RF_tainted;
1224 while (s < strend) {
1225 if (!isALNUMC_LC(*s)) {
1226 if (tmp && regtry(prog, s))
1227 goto got_it;
1228 else
1229 tmp = doevery;
1230 }
1231 else
1232 tmp = 1;
1233 s++;
1234 }
1235 break;
1236 case NALNUMCLUTF8:
1237 PL_reg_flags |= RF_tainted;
1238 while (s < strend) {
1239 if (!isALNUMC_LC_utf8((U8*)s)) {
1240 if (tmp && regtry(prog, s))
1241 goto got_it;
1242 else
1243 tmp = doevery;
1244 }
1245 else
1246 tmp = 1;
1247 s += UTF8SKIP(s);
1248 }
1249 break;
1250 case ASCII:
1251 while (s < strend) {
f248d071 1252 if (isASCII(*(U8*)s)) {
b8c5462f
JH
1253 if (tmp && regtry(prog, s))
1254 goto got_it;
1255 else
1256 tmp = doevery;
1257 }
1258 else
1259 tmp = 1;
1260 s++;
1261 }
1262 break;
1263 case NASCII:
1264 while (s < strend) {
f248d071 1265 if (!isASCII(*(U8*)s)) {
b8c5462f
JH
1266 if (tmp && regtry(prog, s))
1267 goto got_it;
1268 else
1269 tmp = doevery;
1270 }
1271 else
1272 tmp = 1;
1273 s++;
1274 }
1275 break;
1276 case CNTRL:
1277 while (s < strend) {
1278 if (isCNTRL(*s)) {
1279 if (tmp && regtry(prog, s))
1280 goto got_it;
1281 else
1282 tmp = doevery;
1283 }
1284 else
1285 tmp = 1;
1286 s++;
1287 }
1288 break;
1289 case CNTRLUTF8:
1290 while (s < strend) {
1291 if (swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1292 if (tmp && regtry(prog, s))
1293 goto got_it;
1294 else
1295 tmp = doevery;
1296 }
1297 else
1298 tmp = 1;
1299 s += UTF8SKIP(s);
1300 }
1301 break;
1302 case CNTRLL:
1303 PL_reg_flags |= RF_tainted;
1304 while (s < strend) {
1305 if (isCNTRL_LC(*s)) {
1306 if (tmp && regtry(prog, s))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
1311 else
1312 tmp = 1;
1313 s++;
1314 }
1315 break;
1316 case CNTRLLUTF8:
1317 PL_reg_flags |= RF_tainted;
1318 while (s < strend) {
1319 if (*s == ' ' || isCNTRL_LC_utf8((U8*)s)) {
1320 if (tmp && regtry(prog, s))
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
1325 else
1326 tmp = 1;
1327 s += UTF8SKIP(s);
1328 }
1329 break;
1330 case NCNTRL:
1331 while (s < strend) {
1332 if (!isCNTRL(*s)) {
1333 if (tmp && regtry(prog, s))
1334 goto got_it;
1335 else
1336 tmp = doevery;
1337 }
1338 else
1339 tmp = 1;
1340 s++;
1341 }
1342 break;
1343 case NCNTRLUTF8:
1344 while (s < strend) {
1345 if (!swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1346 if (tmp && regtry(prog, s))
1347 goto got_it;
1348 else
1349 tmp = doevery;
1350 }
1351 else
1352 tmp = 1;
1353 s += UTF8SKIP(s);
1354 }
1355 break;
1356 case NCNTRLL:
1357 PL_reg_flags |= RF_tainted;
1358 while (s < strend) {
1359 if (!isCNTRL_LC(*s)) {
1360 if (tmp && regtry(prog, s))
1361 goto got_it;
1362 else
1363 tmp = doevery;
1364 }
1365 else
1366 tmp = 1;
1367 s++;
1368 }
1369 break;
1370 case NCNTRLLUTF8:
1371 PL_reg_flags |= RF_tainted;
1372 while (s < strend) {
1373 if (!isCNTRL_LC_utf8((U8*)s)) {
1374 if (tmp && regtry(prog, s))
1375 goto got_it;
1376 else
1377 tmp = doevery;
1378 }
1379 else
1380 tmp = 1;
1381 s += UTF8SKIP(s);
1382 }
1383 break;
1384 case GRAPH:
1385 while (s < strend) {
1386 if (isGRAPH(*s)) {
1387 if (tmp && regtry(prog, s))
1388 goto got_it;
1389 else
1390 tmp = doevery;
1391 }
1392 else
1393 tmp = 1;
1394 s++;
1395 }
1396 break;
1397 case GRAPHUTF8:
1398 while (s < strend) {
1399 if (swash_fetch(PL_utf8_graph,(U8*)s)) {
1400 if (tmp && regtry(prog, s))
1401 goto got_it;
1402 else
1403 tmp = doevery;
1404 }
1405 else
1406 tmp = 1;
1407 s += UTF8SKIP(s);
1408 }
1409 break;
1410 case GRAPHL:
1411 PL_reg_flags |= RF_tainted;
1412 while (s < strend) {
1413 if (isGRAPH_LC(*s)) {
1414 if (tmp && regtry(prog, s))
1415 goto got_it;
1416 else
1417 tmp = doevery;
1418 }
1419 else
1420 tmp = 1;
1421 s++;
1422 }
1423 break;
1424 case GRAPHLUTF8:
1425 PL_reg_flags |= RF_tainted;
1426 while (s < strend) {
1427 if (*s == ' ' || isGRAPH_LC_utf8((U8*)s)) {
1428 if (tmp && regtry(prog, s))
1429 goto got_it;
1430 else
1431 tmp = doevery;
1432 }
1433 else
1434 tmp = 1;
1435 s += UTF8SKIP(s);
1436 }
1437 break;
1438 case NGRAPH:
1439 while (s < strend) {
1440 if (!isGRAPH(*s)) {
1441 if (tmp && regtry(prog, s))
1442 goto got_it;
1443 else
1444 tmp = doevery;
1445 }
1446 else
1447 tmp = 1;
1448 s++;
1449 }
1450 break;
1451 case NGRAPHUTF8:
1452 while (s < strend) {
1453 if (!swash_fetch(PL_utf8_graph,(U8*)s)) {
1454 if (tmp && regtry(prog, s))
1455 goto got_it;
1456 else
1457 tmp = doevery;
1458 }
1459 else
1460 tmp = 1;
1461 s += UTF8SKIP(s);
1462 }
1463 break;
1464 case NGRAPHL:
1465 PL_reg_flags |= RF_tainted;
1466 while (s < strend) {
1467 if (!isGRAPH_LC(*s)) {
1468 if (tmp && regtry(prog, s))
1469 goto got_it;
1470 else
1471 tmp = doevery;
1472 }
1473 else
1474 tmp = 1;
1475 s++;
1476 }
1477 break;
1478 case NGRAPHLUTF8:
1479 PL_reg_flags |= RF_tainted;
1480 while (s < strend) {
1481 if (!isGRAPH_LC_utf8((U8*)s)) {
1482 if (tmp && regtry(prog, s))
1483 goto got_it;
1484 else
1485 tmp = doevery;
1486 }
1487 else
1488 tmp = 1;
1489 s += UTF8SKIP(s);
1490 }
1491 break;
1492 case LOWER:
1493 while (s < strend) {
1494 if (isLOWER(*s)) {
1495 if (tmp && regtry(prog, s))
1496 goto got_it;
1497 else
1498 tmp = doevery;
1499 }
1500 else
1501 tmp = 1;
1502 s++;
1503 }
1504 break;
1505 case LOWERUTF8:
1506 while (s < strend) {
1507 if (swash_fetch(PL_utf8_lower,(U8*)s)) {
1508 if (tmp && regtry(prog, s))
1509 goto got_it;
1510 else
1511 tmp = doevery;
1512 }
1513 else
1514 tmp = 1;
1515 s += UTF8SKIP(s);
1516 }
1517 break;
1518 case LOWERL:
1519 PL_reg_flags |= RF_tainted;
1520 while (s < strend) {
1521 if (isLOWER_LC(*s)) {
1522 if (tmp && regtry(prog, s))
1523 goto got_it;
1524 else
1525 tmp = doevery;
1526 }
1527 else
1528 tmp = 1;
1529 s++;
1530 }
1531 break;
1532 case LOWERLUTF8:
1533 PL_reg_flags |= RF_tainted;
1534 while (s < strend) {
1535 if (*s == ' ' || isLOWER_LC_utf8((U8*)s)) {
1536 if (tmp && regtry(prog, s))
1537 goto got_it;
1538 else
1539 tmp = doevery;
1540 }
1541 else
1542 tmp = 1;
1543 s += UTF8SKIP(s);
1544 }
1545 break;
1546 case NLOWER:
1547 while (s < strend) {
1548 if (!isLOWER(*s)) {
1549 if (tmp && regtry(prog, s))
1550 goto got_it;
1551 else
1552 tmp = doevery;
1553 }
1554 else
1555 tmp = 1;
1556 s++;
1557 }
1558 break;
1559 case NLOWERUTF8:
1560 while (s < strend) {
1561 if (!swash_fetch(PL_utf8_lower,(U8*)s)) {
1562 if (tmp && regtry(prog, s))
1563 goto got_it;
1564 else
1565 tmp = doevery;
1566 }
1567 else
1568 tmp = 1;
1569 s += UTF8SKIP(s);
1570 }
1571 break;
1572 case NLOWERL:
1573 PL_reg_flags |= RF_tainted;
1574 while (s < strend) {
1575 if (!isLOWER_LC(*s)) {
1576 if (tmp && regtry(prog, s))
1577 goto got_it;
1578 else
1579 tmp = doevery;
1580 }
1581 else
1582 tmp = 1;
1583 s++;
1584 }
1585 break;
1586 case NLOWERLUTF8:
1587 PL_reg_flags |= RF_tainted;
1588 while (s < strend) {
1589 if (!isLOWER_LC_utf8((U8*)s)) {
1590 if (tmp && regtry(prog, s))
1591 goto got_it;
1592 else
1593 tmp = doevery;
1594 }
1595 else
1596 tmp = 1;
1597 s += UTF8SKIP(s);
1598 }
1599 break;
1600 case PRINT:
1601 while (s < strend) {
1602 if (isPRINT(*s)) {
1603 if (tmp && regtry(prog, s))
1604 goto got_it;
1605 else
1606 tmp = doevery;
1607 }
1608 else
1609 tmp = 1;
1610 s++;
1611 }
1612 break;
1613 case PRINTUTF8:
1614 while (s < strend) {
1615 if (swash_fetch(PL_utf8_print,(U8*)s)) {
1616 if (tmp && regtry(prog, s))
1617 goto got_it;
1618 else
1619 tmp = doevery;
1620 }
1621 else
1622 tmp = 1;
1623 s += UTF8SKIP(s);
1624 }
1625 break;
1626 case PRINTL:
1627 PL_reg_flags |= RF_tainted;
1628 while (s < strend) {
1629 if (isPRINT_LC(*s)) {
1630 if (tmp && regtry(prog, s))
1631 goto got_it;
1632 else
1633 tmp = doevery;
1634 }
1635 else
1636 tmp = 1;
1637 s++;
1638 }
1639 break;
1640 case PRINTLUTF8:
1641 PL_reg_flags |= RF_tainted;
1642 while (s < strend) {
1643 if (*s == ' ' || isPRINT_LC_utf8((U8*)s)) {
1644 if (tmp && regtry(prog, s))
1645 goto got_it;
1646 else
1647 tmp = doevery;
1648 }
1649 else
1650 tmp = 1;
1651 s += UTF8SKIP(s);
1652 }
1653 break;
1654 case NPRINT:
1655 while (s < strend) {
1656 if (!isPRINT(*s)) {
1657 if (tmp && regtry(prog, s))
1658 goto got_it;
1659 else
1660 tmp = doevery;
1661 }
1662 else
1663 tmp = 1;
1664 s++;
1665 }
1666 break;
1667 case NPRINTUTF8:
1668 while (s < strend) {
1669 if (!swash_fetch(PL_utf8_print,(U8*)s)) {
1670 if (tmp && regtry(prog, s))
1671 goto got_it;
1672 else
1673 tmp = doevery;
1674 }
1675 else
1676 tmp = 1;
1677 s += UTF8SKIP(s);
1678 }
1679 break;
1680 case NPRINTL:
1681 PL_reg_flags |= RF_tainted;
1682 while (s < strend) {
1683 if (!isPRINT_LC(*s)) {
1684 if (tmp && regtry(prog, s))
1685 goto got_it;
1686 else
1687 tmp = doevery;
1688 }
1689 else
1690 tmp = 1;
1691 s++;
1692 }
1693 break;
1694 case NPRINTLUTF8:
1695 PL_reg_flags |= RF_tainted;
1696 while (s < strend) {
1697 if (!isPRINT_LC_utf8((U8*)s)) {
1698 if (tmp && regtry(prog, s))
1699 goto got_it;
1700 else
1701 tmp = doevery;
1702 }
1703 else
1704 tmp = 1;
1705 s += UTF8SKIP(s);
1706 }
1707 break;
1708 case PUNCT:
1709 while (s < strend) {
1710 if (isPUNCT(*s)) {
1711 if (tmp && regtry(prog, s))
1712 goto got_it;
1713 else
1714 tmp = doevery;
1715 }
1716 else
1717 tmp = 1;
1718 s++;
1719 }
1720 break;
1721 case PUNCTUTF8:
1722 while (s < strend) {
1723 if (swash_fetch(PL_utf8_punct,(U8*)s)) {
1724 if (tmp && regtry(prog, s))
1725 goto got_it;
1726 else
1727 tmp = doevery;
1728 }
1729 else
1730 tmp = 1;
1731 s += UTF8SKIP(s);
1732 }
1733 break;
1734 case PUNCTL:
1735 PL_reg_flags |= RF_tainted;
1736 while (s < strend) {
1737 if (isPUNCT_LC(*s)) {
1738 if (tmp && regtry(prog, s))
1739 goto got_it;
1740 else
1741 tmp = doevery;
1742 }
1743 else
1744 tmp = 1;
1745 s++;
1746 }
1747 break;
1748 case PUNCTLUTF8:
1749 PL_reg_flags |= RF_tainted;
1750 while (s < strend) {
1751 if (*s == ' ' || isPUNCT_LC_utf8((U8*)s)) {
1752 if (tmp && regtry(prog, s))
1753 goto got_it;
1754 else
1755 tmp = doevery;
1756 }
1757 else
1758 tmp = 1;
1759 s += UTF8SKIP(s);
1760 }
1761 break;
1762 case NPUNCT:
1763 while (s < strend) {
1764 if (!isPUNCT(*s)) {
1765 if (tmp && regtry(prog, s))
1766 goto got_it;
1767 else
1768 tmp = doevery;
1769 }
1770 else
1771 tmp = 1;
1772 s++;
1773 }
1774 break;
1775 case NPUNCTUTF8:
1776 while (s < strend) {
1777 if (!swash_fetch(PL_utf8_punct,(U8*)s)) {
1778 if (tmp && regtry(prog, s))
1779 goto got_it;
1780 else
1781 tmp = doevery;
1782 }
1783 else
1784 tmp = 1;
1785 s += UTF8SKIP(s);
1786 }
1787 break;
1788 case NPUNCTL:
1789 PL_reg_flags |= RF_tainted;
1790 while (s < strend) {
1791 if (!isPUNCT_LC(*s)) {
1792 if (tmp && regtry(prog, s))
1793 goto got_it;
1794 else
1795 tmp = doevery;
1796 }
1797 else
1798 tmp = 1;
1799 s++;
1800 }
1801 break;
1802 case NPUNCTLUTF8:
1803 PL_reg_flags |= RF_tainted;
1804 while (s < strend) {
1805 if (!isPUNCT_LC_utf8((U8*)s)) {
1806 if (tmp && regtry(prog, s))
1807 goto got_it;
1808 else
1809 tmp = doevery;
1810 }
1811 else
1812 tmp = 1;
1813 s += UTF8SKIP(s);
1814 }
1815 break;
1816 case UPPER:
1817 while (s < strend) {
1818 if (isUPPER(*s)) {
1819 if (tmp && regtry(prog, s))
1820 goto got_it;
1821 else
1822 tmp = doevery;
1823 }
1824 else
1825 tmp = 1;
1826 s++;
1827 }
1828 break;
1829 case UPPERUTF8:
1830 while (s < strend) {
1831 if (swash_fetch(PL_utf8_upper,(U8*)s)) {
1832 if (tmp && regtry(prog, s))
1833 goto got_it;
1834 else
1835 tmp = doevery;
1836 }
1837 else
1838 tmp = 1;
1839 s += UTF8SKIP(s);
1840 }
1841 break;
1842 case UPPERL:
1843 PL_reg_flags |= RF_tainted;
1844 while (s < strend) {
1845 if (isUPPER_LC(*s)) {
1846 if (tmp && regtry(prog, s))
1847 goto got_it;
1848 else
1849 tmp = doevery;
1850 }
1851 else
1852 tmp = 1;
1853 s++;
1854 }
1855 break;
1856 case UPPERLUTF8:
1857 PL_reg_flags |= RF_tainted;
1858 while (s < strend) {
1859 if (*s == ' ' || isUPPER_LC_utf8((U8*)s)) {
1860 if (tmp && regtry(prog, s))
1861 goto got_it;
1862 else
1863 tmp = doevery;
1864 }
1865 else
1866 tmp = 1;
1867 s += UTF8SKIP(s);
1868 }
1869 break;
1870 case NUPPER:
1871 while (s < strend) {
1872 if (!isUPPER(*s)) {
1873 if (tmp && regtry(prog, s))
1874 goto got_it;
1875 else
1876 tmp = doevery;
1877 }
1878 else
1879 tmp = 1;
1880 s++;
1881 }
1882 break;
1883 case NUPPERUTF8:
1884 while (s < strend) {
1885 if (!swash_fetch(PL_utf8_upper,(U8*)s)) {
1886 if (tmp && regtry(prog, s))
1887 goto got_it;
1888 else
1889 tmp = doevery;
1890 }
1891 else
1892 tmp = 1;
1893 s += UTF8SKIP(s);
1894 }
1895 break;
1896 case NUPPERL:
1897 PL_reg_flags |= RF_tainted;
1898 while (s < strend) {
1899 if (!isUPPER_LC(*s)) {
1900 if (tmp && regtry(prog, s))
1901 goto got_it;
1902 else
1903 tmp = doevery;
1904 }
1905 else
1906 tmp = 1;
1907 s++;
1908 }
1909 break;
1910 case NUPPERLUTF8:
1911 PL_reg_flags |= RF_tainted;
1912 while (s < strend) {
1913 if (!isUPPER_LC_utf8((U8*)s)) {
1914 if (tmp && regtry(prog, s))
1915 goto got_it;
1916 else
1917 tmp = doevery;
1918 }
1919 else
1920 tmp = 1;
1921 s += UTF8SKIP(s);
1922 }
1923 break;
1924 case XDIGIT:
1925 while (s < strend) {
1926 if (isXDIGIT(*s)) {
1927 if (tmp && regtry(prog, s))
1928 goto got_it;
1929 else
1930 tmp = doevery;
1931 }
1932 else
1933 tmp = 1;
1934 s++;
1935 }
1936 break;
1937 case NXDIGIT:
1938 while (s < strend) {
1939 if (!isXDIGIT(*s)) {
1940 if (tmp && regtry(prog, s))
1941 goto got_it;
1942 else
1943 tmp = doevery;
1944 }
1945 else
1946 tmp = 1;
1947 s++;
1948 }
1949 break;
1950 }
1951 }
1952 else {
1953 dontbother = 0;
1954 if (prog->float_substr != Nullsv) { /* Trim the end. */
1955 char *last;
1956 I32 oldpos = scream_pos;
1957
1958 if (flags & REXEC_SCREAM) {
1959 last = screaminstr(sv, prog->float_substr, s - strbeg,
1960 end_shift, &scream_pos, 1); /* last one */
1961 if (!last)
1962 last = scream_olds; /* Only one occurence. */
1963 }
1964 else {
1965 STRLEN len;
1966 char *little = SvPV(prog->float_substr, len);
1967
1968 if (SvTAIL(prog->float_substr)) {
1969 if (memEQ(strend - len + 1, little, len - 1))
1970 last = strend - len + 1;
1971 else if (!PL_multiline)
1972 last = memEQ(strend - len, little, len)
1973 ? strend - len : Nullch;
1974 else
1975 goto find_last;
1976 } else {
1977 find_last:
1978 if (len)
1979 last = rninstr(s, strend, little, little + len);
1980 else
1981 last = strend; /* matching `$' */
1982 }
1983 }
1984 if (last == NULL) goto phooey; /* Should not happen! */
1985 dontbother = strend - last + prog->float_min_offset;
1986 }
1987 if (minlen && (dontbother < minlen))
1988 dontbother = minlen - 1;
1989 strend -= dontbother; /* this one's always in bytes! */
1990 /* We don't know much -- general case. */
1991 if (UTF) {
1992 for (;;) {
1993 if (regtry(prog, s))
1994 goto got_it;
1995 if (s >= strend)
1996 break;
1997 s += UTF8SKIP(s);
1998 };
1999 }
2000 else {
2001 do {
2002 if (regtry(prog, s))
2003 goto got_it;
2004 } while (s++ < strend);
2005 }
2006 }
2007
2008 /* Failure. */
2009 goto phooey;
2010
2011got_it:
2012 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2013
2014 if (PL_reg_eval_set) {
2015 /* Preserve the current value of $^R */
2016 if (oreplsv != GvSV(PL_replgv))
2017 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2018 restored, the value remains
2019 the same. */
51371543 2020 restore_pos(aTHXo_ 0);
b8c5462f
JH
2021 }
2022
2023 /* make sure $`, $&, $', and $digit will work later */
2024 if ( !(flags & REXEC_NOT_FIRST) ) {
2025 if (RX_MATCH_COPIED(prog)) {
2026 Safefree(prog->subbeg);
2027 RX_MATCH_COPIED_off(prog);
2028 }
2029 if (flags & REXEC_COPY_STR) {
2030 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2031
2032 s = savepvn(strbeg, i);
2033 prog->subbeg = s;
2034 prog->sublen = i;
2035 RX_MATCH_COPIED_on(prog);
2036 }
2037 else {
2038 prog->subbeg = strbeg;
2039 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2040 }
2041 }
2042
2043 return 1;
2044
2045phooey:
2046 if (PL_reg_eval_set)
51371543 2047 restore_pos(aTHXo_ 0);
b8c5462f
JH
2048 return 0;
2049}
2050
2051/*
2052 - regtry - try match at specific point
2053 */
2054STATIC I32 /* 0 failure, 1 success */
2055S_regtry(pTHX_ regexp *prog, char *startpos)
2056{
2057 dTHR;
2058 register I32 i;
2059 register I32 *sp;
2060 register I32 *ep;
2061 CHECKPOINT lastcp;
2062
2063 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2064 MAGIC *mg;
2065
2066 PL_reg_eval_set = RS_init;
2067 DEBUG_r(DEBUG_s(
2068 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
2069 PL_stack_sp - PL_stack_base);
2070 ));
2071 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
2072 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2073 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2074 SAVETMPS;
2075 /* Apparently this is not needed, judging by wantarray. */
2076 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
2077 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2078
2079 if (PL_reg_sv) {
2080 /* Make $_ available to executed code. */
2081 if (PL_reg_sv != DEFSV) {
2082 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
2083 SAVESPTR(DEFSV);
2084 DEFSV = PL_reg_sv;
2085 }
2086
2087 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2088 && (mg = mg_find(PL_reg_sv, 'g')))) {
2089 /* prepare for quick setting of pos */
2090 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
2091 mg = mg_find(PL_reg_sv, 'g');
2092 mg->mg_len = -1;
2093 }
2094 PL_reg_magic = mg;
2095 PL_reg_oldpos = mg->mg_len;
51371543 2096 SAVEDESTRUCTOR(restore_pos, 0);
b8c5462f
JH
2097 }
2098 if (!PL_reg_curpm)
2099 New(22,PL_reg_curpm, 1, PMOP);
2100 PL_reg_curpm->op_pmregexp = prog;
2101 PL_reg_oldcurpm = PL_curpm;
2102 PL_curpm = PL_reg_curpm;
2103 if (RX_MATCH_COPIED(prog)) {
2104 /* Here is a serious problem: we cannot rewrite subbeg,
2105 since it may be needed if this match fails. Thus
2106 $` inside (?{}) could fail... */
2107 PL_reg_oldsaved = prog->subbeg;
2108 PL_reg_oldsavedlen = prog->sublen;
2109 RX_MATCH_COPIED_off(prog);
2110 }
2111 else
2112 PL_reg_oldsaved = Nullch;
2113 prog->subbeg = PL_bostr;
2114 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2115 }
2116 prog->startp[0] = startpos - PL_bostr;
2117 PL_reginput = startpos;
2118 PL_regstartp = prog->startp;
2119 PL_regendp = prog->endp;
2120 PL_reglastparen = &prog->lastparen;
2121 prog->lastparen = 0;
2122 PL_regsize = 0;
2123 DEBUG_r(PL_reg_starttry = startpos);
2124 if (PL_reg_start_tmpl <= prog->nparens) {
2125 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2126 if(PL_reg_start_tmp)
2127 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2128 else
2129 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2130 }
2131
2132 /* XXXX What this code is doing here?!!! There should be no need
2133 to do this again and again, PL_reglastparen should take care of
2134 this! */
2135 sp = prog->startp;
2136 ep = prog->endp;
2137 if (prog->nparens) {
2138 for (i = prog->nparens; i >= 1; i--) {
2139 *++sp = -1;
2140 *++ep = -1;
a687059c 2141 }
a0d0e21e 2142 }
c277df42 2143 REGCP_SET;
7e5428c5 2144 if (regmatch(prog->program + 1)) {
cf93c79d 2145 prog->endp[0] = PL_reginput - PL_bostr;
a0d0e21e
LW
2146 return 1;
2147 }
c277df42
IZ
2148 REGCP_UNWIND;
2149 return 0;
a687059c
LW
2150}
2151
2152/*
2153 - regmatch - main matching routine
2154 *
2155 * Conceptually the strategy is simple: check to see whether the current
2156 * node matches, call self recursively to see whether the rest matches,
2157 * and then act accordingly. In practice we make some effort to avoid
2158 * recursion, in particular by going through "ordinary" nodes (that don't
2159 * need to know whether the rest of the match failed) by a loop instead of
2160 * by recursion.
2161 */
2162/* [lwall] I've hoisted the register declarations to the outer block in order to
2163 * maybe save a little bit of pushing and popping on the stack. It also takes
2164 * advantage of machines that use a register save mask on subroutine entry.
2165 */
76e3520e 2166STATIC I32 /* 0 failure, 1 success */
cea2e8a9 2167S_regmatch(pTHX_ regnode *prog)
a687059c 2168{
c277df42
IZ
2169 dTHR;
2170 register regnode *scan; /* Current node. */
2171 regnode *next; /* Next node. */
2172 regnode *inner; /* Next node in internal branch. */
c3464db5
DD
2173 register I32 nextchr; /* renamed nextchr - nextchar colides with
2174 function of same name */
a0d0e21e
LW
2175 register I32 n; /* no or next */
2176 register I32 ln; /* len or last */
2177 register char *s; /* operand or save */
3280af22 2178 register char *locinput = PL_reginput;
c277df42
IZ
2179 register I32 c1, c2, paren; /* case fold search, parenth */
2180 int minmod = 0, sw = 0, logical = 0;
4633a7c4 2181#ifdef DEBUGGING
3280af22 2182 PL_regindent++;
4633a7c4 2183#endif
a0d0e21e 2184
a0ed51b3 2185 /* Note that nextchr is a byte even in UTF */
76e3520e 2186 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2187 scan = prog;
2188 while (scan != NULL) {
c277df42 2189#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 2190#ifdef DEBUGGING
c277df42
IZ
2191# define sayYES goto yes
2192# define sayNO goto no
2193# define saySAME(x) if (x) goto yes; else goto no
2194# define REPORT_CODE_OFF 24
4633a7c4 2195#else
c277df42
IZ
2196# define sayYES return 1
2197# define sayNO return 0
2198# define saySAME(x) return x
a687059c 2199#endif
c277df42
IZ
2200 DEBUG_r( {
2201 SV *prop = sv_newmortal();
3280af22 2202 int docolor = *PL_colors[0];
c277df42 2203 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22 2204 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
beed8111
IZ
2205 /* The part of the string before starttry has one color
2206 (pref0_len chars), between starttry and current
2207 position another one (pref_len - pref0_len chars),
2208 after the current position the third one.
2209 We assume that pref0_len <= pref_len, otherwise we
2210 decrease pref0_len. */
3280af22
NIS
2211 int pref_len = (locinput - PL_bostr > (5 + taill) - l
2212 ? (5 + taill) - l : locinput - PL_bostr);
364723c2 2213 int pref0_len = pref_len - (locinput - PL_reg_starttry);
c277df42 2214
3280af22
NIS
2215 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2216 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2217 ? (5 + taill) - pref_len : PL_regeol - locinput);
8d300b32
GS
2218 if (pref0_len < 0)
2219 pref0_len = 0;
beed8111
IZ
2220 if (pref0_len > pref_len)
2221 pref0_len = pref_len;
c277df42
IZ
2222 regprop(prop, scan);
2223 PerlIO_printf(Perl_debug_log,
8d300b32 2224 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22 2225 locinput - PL_bostr,
8d300b32
GS
2226 PL_colors[4], pref0_len,
2227 locinput - pref_len, PL_colors[5],
2228 PL_colors[2], pref_len - pref0_len,
2229 locinput - pref_len + pref0_len, PL_colors[3],
c277df42 2230 (docolor ? "" : "> <"),
3280af22 2231 PL_colors[0], l, locinput, PL_colors[1],
c277df42
IZ
2232 15 - l - pref_len + 1,
2233 "",
3280af22 2234 scan - PL_regprogram, PL_regindent*2, "",
c277df42
IZ
2235 SvPVX(prop));
2236 } );
a687059c 2237
c277df42 2238 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
2239 if (next == scan)
2240 next = NULL;
a687059c 2241
a0d0e21e
LW
2242 switch (OP(scan)) {
2243 case BOL:
3280af22
NIS
2244 if (locinput == PL_bostr
2245 ? PL_regprev == '\n'
2246 : (PL_multiline &&
2247 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 2248 {
a0ed51b3 2249 /* regtill = regbol; */
a0d0e21e
LW
2250 break;
2251 }
b8c5462f
JH
2252 sayNO;
2253 case MBOL:
2254 if (locinput == PL_bostr
2255 ? PL_regprev == '\n'
2256 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2257 {
2258 break;
2259 }
2260 sayNO;
2261 case SBOL:
2262 if (locinput == PL_regbol && PL_regprev == '\n')
2263 break;
2264 sayNO;
2265 case GPOS:
2266 if (locinput == PL_reg_ganch)
2267 break;
2268 sayNO;
2269 case EOL:
2270 if (PL_multiline)
2271 goto meol;
2272 else
2273 goto seol;
2274 case MEOL:
2275 meol:
2276 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2277 sayNO;
2278 break;
2279 case SEOL:
2280 seol:
2281 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2282 sayNO;
2283 if (PL_regeol - locinput > 1)
2284 sayNO;
2285 break;
2286 case EOS:
2287 if (PL_regeol != locinput)
2288 sayNO;
2289 break;
2290 case SANYUTF8:
2291 if (nextchr & 0x80) {
2292 locinput += PL_utf8skip[nextchr];
2293 if (locinput > PL_regeol)
2294 sayNO;
2295 nextchr = UCHARAT(locinput);
2296 break;
2297 }
2298 if (!nextchr && locinput >= PL_regeol)
2299 sayNO;
2300 nextchr = UCHARAT(++locinput);
2301 break;
2302 case SANY:
2303 if (!nextchr && locinput >= PL_regeol)
2304 sayNO;
2305 nextchr = UCHARAT(++locinput);
2306 break;
2307 case ANYUTF8:
2308 if (nextchr & 0x80) {
2309 locinput += PL_utf8skip[nextchr];
2310 if (locinput > PL_regeol)
2311 sayNO;
2312 nextchr = UCHARAT(locinput);
2313 break;
2314 }
2315 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2316 sayNO;
2317 nextchr = UCHARAT(++locinput);
2318 break;
2319 case REG_ANY:
2320 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2321 sayNO;
2322 nextchr = UCHARAT(++locinput);
2323 break;
2324 case EXACT:
2325 s = (char *) OPERAND(scan);
2326 ln = UCHARAT(s++);
2327 /* Inline the first character, for speed. */
2328 if (UCHARAT(s) != nextchr)
2329 sayNO;
2330 if (PL_regeol - locinput < ln)
2331 sayNO;
2332 if (ln > 1 && memNE(s, locinput, ln))
2333 sayNO;
2334 locinput += ln;
2335 nextchr = UCHARAT(locinput);
2336 break;
2337 case EXACTFL:
2338 PL_reg_flags |= RF_tainted;
2339 /* FALL THROUGH */
2340 case EXACTF:
2341 s = (char *) OPERAND(scan);
2342 ln = UCHARAT(s++);
2343
2344 if (UTF) {
2345 char *l = locinput;
2346 char *e = s + ln;
2347 c1 = OP(scan) == EXACTF;
2348 while (s < e) {
2349 if (l >= PL_regeol)
2350 sayNO;
2351 if (utf8_to_uv((U8*)s, 0) != (c1 ?
2352 toLOWER_utf8((U8*)l) :
2353 toLOWER_LC_utf8((U8*)l)))
2354 {
2355 sayNO;
2356 }
2357 s += UTF8SKIP(s);
2358 l += UTF8SKIP(l);
2359 }
2360 locinput = l;
2361 nextchr = UCHARAT(locinput);
2362 break;
2363 }
2364
2365 /* Inline the first character, for speed. */
2366 if (UCHARAT(s) != nextchr &&
2367 UCHARAT(s) != ((OP(scan) == EXACTF)
2368 ? PL_fold : PL_fold_locale)[nextchr])
2369 sayNO;
2370 if (PL_regeol - locinput < ln)
2371 sayNO;
2372 if (ln > 1 && (OP(scan) == EXACTF
2373 ? ibcmp(s, locinput, ln)
2374 : ibcmp_locale(s, locinput, ln)))
2375 sayNO;
2376 locinput += ln;
2377 nextchr = UCHARAT(locinput);
2378 break;
2379 case ANYOFUTF8:
2380 s = (char *) OPERAND(scan);
2381 if (!REGINCLASSUTF8(scan, (U8*)locinput))
2382 sayNO;
2383 if (locinput >= PL_regeol)
2384 sayNO;
2385 locinput += PL_utf8skip[nextchr];
2386 nextchr = UCHARAT(locinput);
2387 break;
2388 case ANYOF:
2389 s = (char *) OPERAND(scan);
2390 if (nextchr < 0)
2391 nextchr = UCHARAT(locinput);
2392 if (!REGINCLASS(s, nextchr))
2393 sayNO;
2394 if (!nextchr && locinput >= PL_regeol)
2395 sayNO;
2396 nextchr = UCHARAT(++locinput);
2397 break;
2398 case ALNUML:
2399 PL_reg_flags |= RF_tainted;
2400 /* FALL THROUGH */
2401 case ALNUM:
2402 if (!nextchr)
2403 sayNO;
2404 if (!(OP(scan) == ALNUM
2405 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2406 sayNO;
2407 nextchr = UCHARAT(++locinput);
2408 break;
2409 case ALNUMLUTF8:
2410 PL_reg_flags |= RF_tainted;
2411 /* FALL THROUGH */
2412 case ALNUMUTF8:
2413 if (!nextchr)
2414 sayNO;
2415 if (nextchr & 0x80) {
2416 if (!(OP(scan) == ALNUMUTF8
2417 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2418 : isALNUM_LC_utf8((U8*)locinput)))
2419 {
2420 sayNO;
2421 }
2422 locinput += PL_utf8skip[nextchr];
2423 nextchr = UCHARAT(locinput);
2424 break;
2425 }
2426 if (!(OP(scan) == ALNUMUTF8
2427 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2428 sayNO;
2429 nextchr = UCHARAT(++locinput);
2430 break;
2431 case NALNUML:
2432 PL_reg_flags |= RF_tainted;
2433 /* FALL THROUGH */
2434 case NALNUM:
2435 if (!nextchr && locinput >= PL_regeol)
2436 sayNO;
2437 if (OP(scan) == NALNUM
2438 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2439 sayNO;
2440 nextchr = UCHARAT(++locinput);
2441 break;
2442 case NALNUMLUTF8:
2443 PL_reg_flags |= RF_tainted;
2444 /* FALL THROUGH */
2445 case NALNUMUTF8:
2446 if (!nextchr && locinput >= PL_regeol)
2447 sayNO;
2448 if (nextchr & 0x80) {
2449 if (OP(scan) == NALNUMUTF8
2450 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2451 : isALNUM_LC_utf8((U8*)locinput))
2452 {
2453 sayNO;
2454 }
2455 locinput += PL_utf8skip[nextchr];
2456 nextchr = UCHARAT(locinput);
2457 break;
2458 }
2459 if (OP(scan) == NALNUMUTF8
2460 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2461 sayNO;
2462 nextchr = UCHARAT(++locinput);
2463 break;
2464 case BOUNDL:
2465 case NBOUNDL:
2466 PL_reg_flags |= RF_tainted;
2467 /* FALL THROUGH */
2468 case BOUND:
2469 case NBOUND:
2470 /* was last char in word? */
2471 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2472 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2473 ln = isALNUM(ln);
2474 n = isALNUM(nextchr);
2475 }
2476 else {
2477 ln = isALNUM_LC(ln);
2478 n = isALNUM_LC(nextchr);
2479 }
2480 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2481 sayNO;
2482 break;
2483 case BOUNDLUTF8:
2484 case NBOUNDLUTF8:
2485 PL_reg_flags |= RF_tainted;
2486 /* FALL THROUGH */
2487 case BOUNDUTF8:
2488 case NBOUNDUTF8:
2489 /* was last char in word? */
2490 ln = (locinput != PL_regbol)
2491 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2492 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2493 ln = isALNUM_uni(ln);
2494 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2495 }
2496 else {
2497 ln = isALNUM_LC_uni(ln);
2498 n = isALNUM_LC_utf8((U8*)locinput);
2499 }
2500 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2501 sayNO;
2502 break;
2503 case SPACEL:
2504 PL_reg_flags |= RF_tainted;
2505 /* FALL THROUGH */
2506 case SPACE:
2507 if (!nextchr && locinput >= PL_regeol)
2508 sayNO;
2509 if (!(OP(scan) == SPACE
2510 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2511 sayNO;
2512 nextchr = UCHARAT(++locinput);
2513 break;
2514 case SPACELUTF8:
2515 PL_reg_flags |= RF_tainted;
2516 /* FALL THROUGH */
2517 case SPACEUTF8:
2518 if (!nextchr && locinput >= PL_regeol)
2519 sayNO;
2520 if (nextchr & 0x80) {
2521 if (!(OP(scan) == SPACEUTF8
2522 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2523 : isSPACE_LC_utf8((U8*)locinput)))
2524 {
2525 sayNO;
2526 }
2527 locinput += PL_utf8skip[nextchr];
2528 nextchr = UCHARAT(locinput);
2529 break;
2530 }
2531 if (!(OP(scan) == SPACEUTF8
2532 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2533 sayNO;
2534 nextchr = UCHARAT(++locinput);
2535 break;
2536 case NSPACEL:
2537 PL_reg_flags |= RF_tainted;
2538 /* FALL THROUGH */
2539 case NSPACE:
2540 if (!nextchr)
2541 sayNO;
2542 if (OP(scan) == SPACE
2543 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2544 sayNO;
2545 nextchr = UCHARAT(++locinput);
2546 break;
2547 case NSPACELUTF8:
2548 PL_reg_flags |= RF_tainted;
2549 /* FALL THROUGH */
2550 case NSPACEUTF8:
2551 if (!nextchr)
2552 sayNO;
2553 if (nextchr & 0x80) {
2554 if (OP(scan) == NSPACEUTF8
2555 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2556 : isSPACE_LC_utf8((U8*)locinput))
2557 {
2558 sayNO;
2559 }
2560 locinput += PL_utf8skip[nextchr];
2561 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2562 break;
2563 }
b8c5462f
JH
2564 if (OP(scan) == NSPACEUTF8
2565 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2566 sayNO;
2567 nextchr = UCHARAT(++locinput);
2568 break;
2569 case DIGITL:
2570 PL_reg_flags |= RF_tainted;
2571 /* FALL THROUGH */
2572 case DIGIT:
2573 if (!nextchr && locinput >= PL_regeol)
2574 sayNO;
2575 if (!(OP(scan) == DIGIT
2576 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2577 sayNO;
2578 nextchr = UCHARAT(++locinput);
2579 break;
2580 case DIGITLUTF8:
2581 PL_reg_flags |= RF_tainted;
2582 /* FALL THROUGH */
2583 case DIGITUTF8:
2584 if (!nextchr)
2585 sayNO;
2586 if (nextchr & 0x80) {
2587 if (OP(scan) == NDIGITUTF8
2588 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2589 : isDIGIT_LC_utf8((U8*)locinput))
2590 {
2591 sayNO;
2592 }
2593 locinput += PL_utf8skip[nextchr];
2594 nextchr = UCHARAT(locinput);
a0d0e21e 2595 break;
b8c5462f
JH
2596 }
2597 if (!isDIGIT(nextchr))
2598 sayNO;
2599 nextchr = UCHARAT(++locinput);
2600 break;
2601 case NDIGITL:
2602 PL_reg_flags |= RF_tainted;
2603 /* FALL THROUGH */
2604 case NDIGIT:
2605 if (!nextchr)
2606 sayNO;
2607 if (OP(scan) == DIGIT
2608 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2609 sayNO;
2610 nextchr = UCHARAT(++locinput);
2611 break;
2612 case NDIGITLUTF8:
2613 PL_reg_flags |= RF_tainted;
2614 /* FALL THROUGH */
2615 case NDIGITUTF8:
2616 if (!nextchr && locinput >= PL_regeol)
2617 sayNO;
2618 if (nextchr & 0x80) {
2619 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2620 sayNO;
2621 locinput += PL_utf8skip[nextchr];
2622 nextchr = UCHARAT(locinput);
a0d0e21e 2623 break;
b8c5462f
JH
2624 }
2625 if (isDIGIT(nextchr))
4633a7c4 2626 sayNO;
b8c5462f 2627 nextchr = UCHARAT(++locinput);
a0d0e21e 2628 break;
b8c5462f
JH
2629 case ALNUMCL:
2630 PL_reg_flags |= RF_tainted;
2631 /* FALL THROUGH */
2632 case ALNUMC:
2633 if (!nextchr)
4633a7c4 2634 sayNO;
b8c5462f
JH
2635 if (!(OP(scan) == ALNUMC
2636 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2637 sayNO;
2638 nextchr = UCHARAT(++locinput);
2639 break;
2640 case ALNUMCLUTF8:
2641 PL_reg_flags |= RF_tainted;
2642 /* FALL THROUGH */
2643 case ALNUMCUTF8:
2644 if (!nextchr)
2645 sayNO;
2646 if (nextchr & 0x80) {
2647 if (!(OP(scan) == ALNUMCUTF8
2648 ? swash_fetch(PL_utf8_alnumc, (U8*)locinput)
2649 : isALNUMC_LC_utf8((U8*)locinput)))
2650 {
2651 sayNO;
2652 }
2653 locinput += PL_utf8skip[nextchr];
2654 nextchr = UCHARAT(locinput);
2655 break;
2656 }
2657 if (!(OP(scan) == ALNUMCUTF8
2658 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2659 sayNO;
2660 nextchr = UCHARAT(++locinput);
2661 break;
2662 case NALNUMCL:
2663 PL_reg_flags |= RF_tainted;
2664 /* FALL THROUGH */
2665 case NALNUMC:
2666 if (!nextchr)
2667 sayNO;
2668 if (OP(scan) == ALNUMC
2669 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr))
2670 sayNO;
2671 nextchr = UCHARAT(++locinput);
2672 break;
2673 case NALNUMCLUTF8:
2674 PL_reg_flags |= RF_tainted;
2675 /* FALL THROUGH */
2676 case NALNUMCUTF8:
2677 if (!nextchr && locinput >= PL_regeol)
2678 sayNO;
2679 if (nextchr & 0x80) {
2680 if (swash_fetch(PL_utf8_alnumc,(U8*)locinput))
2681 sayNO;
2682 locinput += PL_utf8skip[nextchr];
2683 nextchr = UCHARAT(locinput);
2684 break;
2685 }
2686 if (isALNUMC(nextchr))
2687 sayNO;
2688 nextchr = UCHARAT(++locinput);
2689 break;
2690 case ALPHAL:
2691 PL_reg_flags |= RF_tainted;
2692 /* FALL THROUGH */
2693 case ALPHA:
2694 if (!nextchr)
2695 sayNO;
2696 if (!(OP(scan) == ALPHA
2697 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2698 sayNO;
2699 nextchr = UCHARAT(++locinput);
2700 break;
2701 case ALPHALUTF8:
2702 PL_reg_flags |= RF_tainted;
2703 /* FALL THROUGH */
2704 case ALPHAUTF8:
2705 if (!nextchr)
2706 sayNO;
2707 if (nextchr & 0x80) {
2708 if (!(OP(scan) == ALPHAUTF8
2709 ? swash_fetch(PL_utf8_alpha, (U8*)locinput)
2710 : isALPHA_LC_utf8((U8*)locinput)))
2711 {
2712 sayNO;
2713 }
2714 locinput += PL_utf8skip[nextchr];
2715 nextchr = UCHARAT(locinput);
2716 break;
2717 }
2718 if (!(OP(scan) == ALPHAUTF8
2719 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2720 sayNO;
2721 nextchr = UCHARAT(++locinput);
2722 break;
2723 case NALPHAL:
2724 PL_reg_flags |= RF_tainted;
2725 /* FALL THROUGH */
2726 case NALPHA:
2727 if (!nextchr)
2728 sayNO;
2729 if (OP(scan) == ALPHA
2730 ? isALPHA(nextchr) : isALPHA_LC(nextchr))
2731 sayNO;
2732 nextchr = UCHARAT(++locinput);
2733 break;
2734 case NALPHALUTF8:
2735 PL_reg_flags |= RF_tainted;
2736 /* FALL THROUGH */
2737 case NALPHAUTF8:
2738 if (!nextchr && locinput >= PL_regeol)
2739 sayNO;
2740 if (nextchr & 0x80) {
2741 if (swash_fetch(PL_utf8_alpha,(U8*)locinput))
2742 sayNO;
2743 locinput += PL_utf8skip[nextchr];
2744 nextchr = UCHARAT(locinput);
2745 break;
2746 }
2747 if (isALPHA(nextchr))
2748 sayNO;
2749 nextchr = UCHARAT(++locinput);
2750 break;
2751 case ASCII:
2752 if (!nextchr && locinput >= PL_regeol)
2753 sayNO;
2754 if (!isASCII(nextchr))
2755 sayNO;
2756 nextchr = UCHARAT(++locinput);
2757 break;
2758 case NASCII:
2759 if (!nextchr && locinput >= PL_regeol)
2760 sayNO;
2761 if (isASCII(nextchr))
2762 sayNO;
2763 nextchr = UCHARAT(++locinput);
2764 break;
2765 case CNTRLL:
2766 PL_reg_flags |= RF_tainted;
2767 /* FALL THROUGH */
2768 case CNTRL:
2769 if (!nextchr)
2770 sayNO;
2771 if (!(OP(scan) == CNTRL
2772 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2773 sayNO;
2774 nextchr = UCHARAT(++locinput);
2775 break;
2776 case CNTRLLUTF8:
2777 PL_reg_flags |= RF_tainted;
2778 /* FALL THROUGH */
2779 case CNTRLUTF8:
2780 if (!nextchr)
2781 sayNO;
2782 if (nextchr & 0x80) {
2783 if (!(OP(scan) == CNTRLUTF8
2784 ? swash_fetch(PL_utf8_cntrl, (U8*)locinput)
2785 : isCNTRL_LC_utf8((U8*)locinput)))
2786 {
2787 sayNO;
2788 }
2789 locinput += PL_utf8skip[nextchr];
2790 nextchr = UCHARAT(locinput);
2791 break;
2792 }
2793 if (!(OP(scan) == CNTRLUTF8
2794 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
4633a7c4 2795 sayNO;
b8c5462f 2796 nextchr = UCHARAT(++locinput);
a0d0e21e 2797 break;
b8c5462f
JH
2798 case NCNTRLL:
2799 PL_reg_flags |= RF_tainted;
2800 /* FALL THROUGH */
2801 case NCNTRL:
2802 if (!nextchr)
b85d18e9 2803 sayNO;
b8c5462f
JH
2804 if (OP(scan) == CNTRL
2805 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr))
2806 sayNO;
2807 nextchr = UCHARAT(++locinput);
b85d18e9 2808 break;
b8c5462f
JH
2809 case NCNTRLLUTF8:
2810 PL_reg_flags |= RF_tainted;
2811 /* FALL THROUGH */
2812 case NCNTRLUTF8:
2813 if (!nextchr && locinput >= PL_regeol)
2814 sayNO;
a0ed51b3 2815 if (nextchr & 0x80) {
b8c5462f 2816 if (swash_fetch(PL_utf8_cntrl,(U8*)locinput))
a0ed51b3 2817 sayNO;
b8c5462f 2818 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2819 nextchr = UCHARAT(locinput);
2820 break;
2821 }
b8c5462f 2822 if (isCNTRL(nextchr))
a0ed51b3
LW
2823 sayNO;
2824 nextchr = UCHARAT(++locinput);
2825 break;
b8c5462f
JH
2826 case GRAPHL:
2827 PL_reg_flags |= RF_tainted;
2828 /* FALL THROUGH */
2829 case GRAPH:
2830 if (!nextchr)
2831 sayNO;
2832 if (!(OP(scan) == GRAPH
2833 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
4633a7c4 2834 sayNO;
76e3520e 2835 nextchr = UCHARAT(++locinput);
a0d0e21e 2836 break;
b8c5462f
JH
2837 case GRAPHLUTF8:
2838 PL_reg_flags |= RF_tainted;
2839 /* FALL THROUGH */
2840 case GRAPHUTF8:
2841 if (!nextchr)
2842 sayNO;
a0ed51b3 2843 if (nextchr & 0x80) {
b8c5462f
JH
2844 if (!(OP(scan) == GRAPHUTF8
2845 ? swash_fetch(PL_utf8_graph, (U8*)locinput)
2846 : isGRAPH_LC_utf8((U8*)locinput)))
2847 {
a0ed51b3 2848 sayNO;
b8c5462f
JH
2849 }
2850 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2851 nextchr = UCHARAT(locinput);
2852 break;
2853 }
b8c5462f
JH
2854 if (!(OP(scan) == GRAPHUTF8
2855 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
a0ed51b3
LW
2856 sayNO;
2857 nextchr = UCHARAT(++locinput);
2858 break;
b8c5462f
JH
2859 case NGRAPHL:
2860 PL_reg_flags |= RF_tainted;
2861 /* FALL THROUGH */
2862 case NGRAPH:
2863 if (!nextchr)
2864 sayNO;
2865 if (OP(scan) == GRAPH
2866 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr))
4633a7c4 2867 sayNO;
76e3520e 2868 nextchr = UCHARAT(++locinput);
a0d0e21e 2869 break;
b8c5462f
JH
2870 case NGRAPHLUTF8:
2871 PL_reg_flags |= RF_tainted;
2872 /* FALL THROUGH */
2873 case NGRAPHUTF8:
2874 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2875 sayNO;
b8c5462f
JH
2876 if (nextchr & 0x80) {
2877 if (swash_fetch(PL_utf8_graph,(U8*)locinput))
2878 sayNO;
2879 locinput += PL_utf8skip[nextchr];
2880 nextchr = UCHARAT(locinput);
2881 break;
2882 }
2883 if (isGRAPH(nextchr))
4633a7c4 2884 sayNO;
b8c5462f
JH
2885 nextchr = UCHARAT(++locinput);
2886 break;
2887 case LOWERL:
2888 PL_reg_flags |= RF_tainted;
2889 /* FALL THROUGH */
2890 case LOWER:
2891 if (!nextchr)
4633a7c4 2892 sayNO;
b8c5462f
JH
2893 if (!(OP(scan) == LOWER
2894 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2895 sayNO;
2896 nextchr = UCHARAT(++locinput);
bbce6d69 2897 break;
b8c5462f 2898 case LOWERLUTF8:
3280af22 2899 PL_reg_flags |= RF_tainted;
bbce6d69 2900 /* FALL THROUGH */
b8c5462f
JH
2901 case LOWERUTF8:
2902 if (!nextchr)
2903 sayNO;
2904 if (nextchr & 0x80) {
2905 if (!(OP(scan) == LOWERUTF8
2906 ? swash_fetch(PL_utf8_lower, (U8*)locinput)
2907 : isLOWER_LC_utf8((U8*)locinput)))
2908 {
2909 sayNO;
a0ed51b3 2910 }
b8c5462f 2911 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2912 nextchr = UCHARAT(locinput);
2913 break;
2914 }
b8c5462f
JH
2915 if (!(OP(scan) == LOWERUTF8
2916 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
bbce6d69 2917 sayNO;
b8c5462f 2918 nextchr = UCHARAT(++locinput);
a0d0e21e 2919 break;
b8c5462f
JH
2920 case NLOWERL:
2921 PL_reg_flags |= RF_tainted;
2922 /* FALL THROUGH */
2923 case NLOWER:
2924 if (!nextchr)
a0ed51b3 2925 sayNO;
b8c5462f
JH
2926 if (OP(scan) == LOWER
2927 ? isLOWER(nextchr) : isLOWER_LC(nextchr))
a0ed51b3 2928 sayNO;
b8c5462f 2929 nextchr = UCHARAT(++locinput);
a0ed51b3 2930 break;
b8c5462f
JH
2931 case NLOWERLUTF8:
2932 PL_reg_flags |= RF_tainted;
2933 /* FALL THROUGH */
2934 case NLOWERUTF8:
3280af22 2935 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2936 sayNO;
b8c5462f
JH
2937 if (nextchr & 0x80) {
2938 if (swash_fetch(PL_utf8_lower,(U8*)locinput))
2939 sayNO;
2940 locinput += PL_utf8skip[nextchr];
2941 nextchr = UCHARAT(locinput);
2942 break;
2943 }
2944 if (isLOWER(nextchr))
2945 sayNO;
76e3520e 2946 nextchr = UCHARAT(++locinput);
a0d0e21e 2947 break;
b8c5462f 2948 case PRINTL:
3280af22 2949 PL_reg_flags |= RF_tainted;
bbce6d69 2950 /* FALL THROUGH */
b8c5462f 2951 case PRINT:
76e3520e 2952 if (!nextchr)
4633a7c4 2953 sayNO;
b8c5462f
JH
2954 if (!(OP(scan) == PRINT
2955 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
4633a7c4 2956 sayNO;
76e3520e 2957 nextchr = UCHARAT(++locinput);
a0d0e21e 2958 break;
b8c5462f 2959 case PRINTLUTF8:
a0ed51b3
LW
2960 PL_reg_flags |= RF_tainted;
2961 /* FALL THROUGH */
b8c5462f 2962 case PRINTUTF8:
a0ed51b3
LW
2963 if (!nextchr)
2964 sayNO;
2965 if (nextchr & 0x80) {
b8c5462f
JH
2966 if (!(OP(scan) == PRINTUTF8
2967 ? swash_fetch(PL_utf8_print, (U8*)locinput)
2968 : isPRINT_LC_utf8((U8*)locinput)))
dfe13c55 2969 {
a0ed51b3 2970 sayNO;
dfe13c55 2971 }
6f06b55f 2972 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2973 nextchr = UCHARAT(locinput);
2974 break;
2975 }
b8c5462f
JH
2976 if (!(OP(scan) == PRINTUTF8
2977 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
a0ed51b3
LW
2978 sayNO;
2979 nextchr = UCHARAT(++locinput);
2980 break;
b8c5462f 2981 case NPRINTL:
3280af22 2982 PL_reg_flags |= RF_tainted;
bbce6d69 2983 /* FALL THROUGH */
b8c5462f
JH
2984 case NPRINT:
2985 if (!nextchr)
4633a7c4 2986 sayNO;
b8c5462f
JH
2987 if (OP(scan) == PRINT
2988 ? isPRINT(nextchr) : isPRINT_LC(nextchr))
4633a7c4 2989 sayNO;
76e3520e 2990 nextchr = UCHARAT(++locinput);
a0d0e21e 2991 break;
b8c5462f 2992 case NPRINTLUTF8:
a0ed51b3
LW
2993 PL_reg_flags |= RF_tainted;
2994 /* FALL THROUGH */
b8c5462f 2995 case NPRINTUTF8:
a0ed51b3
LW
2996 if (!nextchr && locinput >= PL_regeol)
2997 sayNO;
2998 if (nextchr & 0x80) {
b8c5462f 2999 if (swash_fetch(PL_utf8_print,(U8*)locinput))
a0ed51b3 3000 sayNO;
6f06b55f 3001 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3002 nextchr = UCHARAT(locinput);
3003 break;
3004 }
b8c5462f 3005 if (isPRINT(nextchr))
a0ed51b3
LW
3006 sayNO;
3007 nextchr = UCHARAT(++locinput);
3008 break;
b8c5462f 3009 case PUNCTL:
3280af22 3010 PL_reg_flags |= RF_tainted;
bbce6d69 3011 /* FALL THROUGH */
b8c5462f
JH
3012 case PUNCT:
3013 if (!nextchr)
3014 sayNO;
3015 if (!(OP(scan) == PUNCT
3016 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
4633a7c4 3017 sayNO;
b8c5462f 3018 nextchr = UCHARAT(++locinput);
a0d0e21e 3019 break;
b8c5462f 3020 case PUNCTLUTF8:
a0ed51b3
LW
3021 PL_reg_flags |= RF_tainted;
3022 /* FALL THROUGH */
b8c5462f
JH
3023 case PUNCTUTF8:
3024 if (!nextchr)
3025 sayNO;
3026 if (nextchr & 0x80) {
3027 if (!(OP(scan) == PUNCTUTF8
3028 ? swash_fetch(PL_utf8_punct, (U8*)locinput)
3029 : isPUNCT_LC_utf8((U8*)locinput)))
3030 {
3031 sayNO;
3032 }
3033 locinput += PL_utf8skip[nextchr];
3034 nextchr = UCHARAT(locinput);
3035 break;
a0ed51b3 3036 }
b8c5462f
JH
3037 if (!(OP(scan) == PUNCTUTF8
3038 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
a0ed51b3 3039 sayNO;
b8c5462f 3040 nextchr = UCHARAT(++locinput);
a0ed51b3 3041 break;
b8c5462f 3042 case NPUNCTL:
3280af22 3043 PL_reg_flags |= RF_tainted;
bbce6d69 3044 /* FALL THROUGH */
b8c5462f
JH
3045 case NPUNCT:
3046 if (!nextchr)
4633a7c4 3047 sayNO;
b8c5462f
JH
3048 if (OP(scan) == PUNCT
3049 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr))
4633a7c4 3050 sayNO;
76e3520e 3051 nextchr = UCHARAT(++locinput);
a0d0e21e 3052 break;
b8c5462f 3053 case NPUNCTLUTF8:
a0ed51b3
LW
3054 PL_reg_flags |= RF_tainted;
3055 /* FALL THROUGH */
b8c5462f 3056 case NPUNCTUTF8:
a0ed51b3
LW
3057 if (!nextchr && locinput >= PL_regeol)
3058 sayNO;
3059 if (nextchr & 0x80) {
b8c5462f 3060 if (swash_fetch(PL_utf8_punct,(U8*)locinput))
a0ed51b3 3061 sayNO;
6f06b55f 3062 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3063 nextchr = UCHARAT(locinput);
3064 break;
3065 }
b8c5462f 3066 if (isPUNCT(nextchr))
a0ed51b3
LW
3067 sayNO;
3068 nextchr = UCHARAT(++locinput);
3069 break;
b8c5462f 3070 case UPPERL:
3280af22 3071 PL_reg_flags |= RF_tainted;
bbce6d69 3072 /* FALL THROUGH */
b8c5462f 3073 case UPPER:
76e3520e 3074 if (!nextchr)
4633a7c4 3075 sayNO;
b8c5462f
JH
3076 if (!(OP(scan) == UPPER
3077 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
4633a7c4 3078 sayNO;
76e3520e 3079 nextchr = UCHARAT(++locinput);
a0d0e21e 3080 break;
b8c5462f 3081 case UPPERLUTF8:
a0ed51b3
LW
3082 PL_reg_flags |= RF_tainted;
3083 /* FALL THROUGH */
b8c5462f 3084 case UPPERUTF8:
a0ed51b3
LW
3085 if (!nextchr)
3086 sayNO;
3087 if (nextchr & 0x80) {
b8c5462f
JH
3088 if (!(OP(scan) == UPPERUTF8
3089 ? swash_fetch(PL_utf8_upper, (U8*)locinput)
3090 : isUPPER_LC_utf8((U8*)locinput)))
dfe13c55 3091 {
a0ed51b3 3092 sayNO;
dfe13c55 3093 }
6f06b55f 3094 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3095 nextchr = UCHARAT(locinput);
3096 break;
3097 }
b8c5462f
JH
3098 if (!(OP(scan) == UPPERUTF8
3099 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
a0ed51b3
LW
3100 sayNO;
3101 nextchr = UCHARAT(++locinput);
3102 break;
b8c5462f
JH
3103 case NUPPERL:
3104 PL_reg_flags |= RF_tainted;
3105 /* FALL THROUGH */
3106 case NUPPER:
3107 if (!nextchr)
3108 sayNO;
3109 if (OP(scan) == UPPER
3110 ? isUPPER(nextchr) : isUPPER_LC(nextchr))
4633a7c4 3111 sayNO;
76e3520e 3112 nextchr = UCHARAT(++locinput);
a0d0e21e 3113 break;
b8c5462f
JH
3114 case NUPPERLUTF8:
3115 PL_reg_flags |= RF_tainted;
3116 /* FALL THROUGH */
3117 case NUPPERUTF8:
3118 if (!nextchr && locinput >= PL_regeol)
3119 sayNO;
a0ed51b3 3120 if (nextchr & 0x80) {
b8c5462f 3121 if (swash_fetch(PL_utf8_upper,(U8*)locinput))
a0ed51b3 3122 sayNO;
6f06b55f 3123 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3124 nextchr = UCHARAT(locinput);
3125 break;
3126 }
b8c5462f 3127 if (isUPPER(nextchr))
a0ed51b3
LW
3128 sayNO;
3129 nextchr = UCHARAT(++locinput);
3130 break;
b8c5462f 3131 case XDIGIT:
3280af22 3132 if (!nextchr && locinput >= PL_regeol)
4633a7c4 3133 sayNO;
b8c5462f 3134 if (!isXDIGIT(nextchr))
4633a7c4 3135 sayNO;
76e3520e 3136 nextchr = UCHARAT(++locinput);
a0d0e21e 3137 break;
b8c5462f 3138 case NXDIGIT:
a0ed51b3
LW
3139 if (!nextchr && locinput >= PL_regeol)
3140 sayNO;
b8c5462f 3141 if (isXDIGIT(nextchr))
a0ed51b3
LW
3142 sayNO;
3143 nextchr = UCHARAT(++locinput);
3144 break;
3145 case CLUMP:
dfe13c55 3146 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 3147 sayNO;
6f06b55f 3148 locinput += PL_utf8skip[nextchr];
dfe13c55 3149 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
3150 locinput += UTF8SKIP(locinput);
3151 if (locinput > PL_regeol)
3152 sayNO;
3153 nextchr = UCHARAT(locinput);
3154 break;
c8756f30 3155 case REFFL:
3280af22 3156 PL_reg_flags |= RF_tainted;
c8756f30 3157 /* FALL THROUGH */
c277df42 3158 case REF:
c8756f30 3159 case REFF:
c277df42 3160 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3161 ln = PL_regstartp[n];
3162 if (*PL_reglastparen < n || ln == -1)
af3f8c16 3163 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 3164 if (ln == PL_regendp[n])
a0d0e21e 3165 break;
a0ed51b3 3166
cf93c79d 3167 s = PL_bostr + ln;
a0ed51b3
LW
3168 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
3169 char *l = locinput;
cf93c79d 3170 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3171 /*
3172 * Note that we can't do the "other character" lookup trick as
3173 * in the 8-bit case (no pun intended) because in Unicode we
3174 * have to map both upper and title case to lower case.
3175 */
3176 if (OP(scan) == REFF) {
3177 while (s < e) {
3178 if (l >= PL_regeol)
3179 sayNO;
dfe13c55 3180 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
3181 sayNO;
3182 s += UTF8SKIP(s);
3183 l += UTF8SKIP(l);
3184 }
3185 }
3186 else {
3187 while (s < e) {
3188 if (l >= PL_regeol)
3189 sayNO;
dfe13c55 3190 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
3191 sayNO;
3192 s += UTF8SKIP(s);
3193 l += UTF8SKIP(l);
3194 }
3195 }
3196 locinput = l;
3197 nextchr = UCHARAT(locinput);
3198 break;
3199 }
3200
a0d0e21e 3201 /* Inline the first character, for speed. */
76e3520e 3202 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3203 (OP(scan) == REF ||
3204 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3205 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3206 sayNO;
cf93c79d 3207 ln = PL_regendp[n] - ln;
3280af22 3208 if (locinput + ln > PL_regeol)
4633a7c4 3209 sayNO;
c8756f30
AK
3210 if (ln > 1 && (OP(scan) == REF
3211 ? memNE(s, locinput, ln)
3212 : (OP(scan) == REFF
3213 ? ibcmp(s, locinput, ln)
3214 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3215 sayNO;
a0d0e21e 3216 locinput += ln;
76e3520e 3217 nextchr = UCHARAT(locinput);
a0d0e21e
LW
3218 break;
3219
3220 case NOTHING:
c277df42 3221 case TAIL:
a0d0e21e
LW
3222 break;
3223 case BACK:
3224 break;
c277df42
IZ
3225 case EVAL:
3226 {
3227 dSP;
533c011a 3228 OP_4tree *oop = PL_op;
3280af22
NIS
3229 COP *ocurcop = PL_curcop;
3230 SV **ocurpad = PL_curpad;
c277df42
IZ
3231 SV *ret;
3232
3233 n = ARG(scan);
533c011a
NIS
3234 PL_op = (OP_4tree*)PL_regdata->data[n];
3235 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 3236 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 3237 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 3238
cea2e8a9 3239 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
3240 SPAGAIN;
3241 ret = POPs;
3242 PUTBACK;
3243
0f5d15d6
IZ
3244 PL_op = oop;
3245 PL_curpad = ocurpad;
3246 PL_curcop = ocurcop;
c277df42 3247 if (logical) {
0f5d15d6
IZ
3248 if (logical == 2) { /* Postponed subexpression. */
3249 regexp *re;
22c35a8c 3250 MAGIC *mg = Null(MAGIC*);
0f5d15d6
IZ
3251 re_cc_state state;
3252 CURCUR cctmp;
3253 CHECKPOINT cp, lastcp;
3254
3255 if(SvROK(ret) || SvRMAGICAL(ret)) {
3256 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
3257
3258 if(SvMAGICAL(sv))
3259 mg = mg_find(sv, 'r');
3260 }
3261 if (mg) {
3262 re = (regexp *)mg->mg_obj;
df0003d4 3263 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3264 }
3265 else {
3266 STRLEN len;
3267 char *t = SvPV(ret, len);
3268 PMOP pm;
3269 char *oprecomp = PL_regprecomp;
3270 I32 osize = PL_regsize;
3271 I32 onpar = PL_regnpar;
3272
3273 pm.op_pmflags = 0;
cea2e8a9 3274 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
3275 if (!(SvFLAGS(ret)
3276 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
3277 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
3278 PL_regprecomp = oprecomp;
3279 PL_regsize = osize;
3280 PL_regnpar = onpar;
3281 }
3282 DEBUG_r(
3283 PerlIO_printf(Perl_debug_log,
3284 "Entering embedded `%s%.60s%s%s'\n",
3285 PL_colors[0],
3286 re->precomp,
3287 PL_colors[1],
3288 (strlen(re->precomp) > 60 ? "..." : ""))
3289 );
3290 state.node = next;
3291 state.prev = PL_reg_call_cc;
3292 state.cc = PL_regcc;
3293 state.re = PL_reg_re;
3294
3295 cctmp.cur = 0;
3296 cctmp.oldcc = 0;
3297 PL_regcc = &cctmp;
3298
3299 cp = regcppush(0); /* Save *all* the positions. */
3300 REGCP_SET;
3301 cache_re(re);
3302 state.ss = PL_savestack_ix;
3303 *PL_reglastparen = 0;
3304 PL_reg_call_cc = &state;
3305 PL_reginput = locinput;
3306 if (regmatch(re->program + 1)) {
3307 ReREFCNT_dec(re);
3308 regcpblow(cp);
3309 sayYES;
3310 }
3311 DEBUG_r(
3312 PerlIO_printf(Perl_debug_log,
3313 "%*s failed...\n",
3314 REPORT_CODE_OFF+PL_regindent*2, "")
3315 );
3316 ReREFCNT_dec(re);
3317 REGCP_UNWIND;
3318 regcppop();
3319 PL_reg_call_cc = state.prev;
3320 PL_regcc = state.cc;
3321 PL_reg_re = state.re;
d3790889 3322 cache_re(PL_reg_re);
0f5d15d6
IZ
3323 sayNO;
3324 }
c277df42 3325 sw = SvTRUE(ret);
0f5d15d6 3326 logical = 0;
a0ed51b3
LW
3327 }
3328 else
3280af22 3329 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
3330 break;
3331 }
a0d0e21e 3332 case OPEN:
c277df42 3333 n = ARG(scan); /* which paren pair */
3280af22
NIS
3334 PL_reg_start_tmp[n] = locinput;
3335 if (n > PL_regsize)
3336 PL_regsize = n;
a0d0e21e
LW
3337 break;
3338 case CLOSE:
c277df42 3339 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3340 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3341 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
3342 if (n > *PL_reglastparen)
3343 *PL_reglastparen = n;
a0d0e21e 3344 break;
c277df42
IZ
3345 case GROUPP:
3346 n = ARG(scan); /* which paren pair */
cf93c79d 3347 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3348 break;
3349 case IFTHEN:
3350 if (sw)
3351 next = NEXTOPER(NEXTOPER(scan));
3352 else {
3353 next = scan + ARG(scan);
3354 if (OP(next) == IFTHEN) /* Fake one. */
3355 next = NEXTOPER(NEXTOPER(next));
3356 }
3357 break;
3358 case LOGICAL:
0f5d15d6 3359 logical = scan->flags;
c277df42 3360 break;
a0d0e21e
LW
3361 case CURLYX: {
3362 CURCUR cc;
3280af22 3363 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
3364
3365 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3366 next += ARG(next);
3280af22
NIS
3367 cc.oldcc = PL_regcc;
3368 PL_regcc = &cc;
3369 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
3370 cc.cur = -1;
3371 cc.min = ARG1(scan);
3372 cc.max = ARG2(scan);
c277df42 3373 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3374 cc.next = next;
3375 cc.minmod = minmod;
3376 cc.lastloc = 0;
3280af22 3377 PL_reginput = locinput;
a0d0e21e
LW
3378 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3379 regcpblow(cp);
3280af22 3380 PL_regcc = cc.oldcc;
4633a7c4 3381 saySAME(n);
a0d0e21e
LW
3382 }
3383 /* NOT REACHED */
3384 case WHILEM: {
3385 /*
3386 * This is really hard to understand, because after we match
3387 * what we're trying to match, we must make sure the rest of
3388 * the RE is going to match for sure, and to do that we have
3389 * to go back UP the parse tree by recursing ever deeper. And
3390 * if it fails, we have to reset our parent's current state
3391 * that we can try again after backing off.
3392 */
3393
c277df42 3394 CHECKPOINT cp, lastcp;
3280af22 3395 CURCUR* cc = PL_regcc;
c277df42
IZ
3396 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3397
4633a7c4 3398 n = cc->cur + 1; /* how many we know we matched */
3280af22 3399 PL_reginput = locinput;
a0d0e21e 3400
c277df42
IZ
3401 DEBUG_r(
3402 PerlIO_printf(Perl_debug_log,
3403 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 3404 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
3405 (long)n, (long)cc->min,
3406 (long)cc->max, (long)cc)
3407 );
4633a7c4 3408
a0d0e21e
LW
3409 /* If degenerate scan matches "", assume scan done. */
3410
579cf2c3 3411 if (locinput == cc->lastloc && n >= cc->min) {
3280af22
NIS
3412 PL_regcc = cc->oldcc;
3413 ln = PL_regcc->cur;
c277df42 3414 DEBUG_r(
c3464db5
DD
3415 PerlIO_printf(Perl_debug_log,
3416 "%*s empty match detected, try continuation...\n",
3280af22 3417 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3418 );
a0d0e21e 3419 if (regmatch(cc->next))
4633a7c4 3420 sayYES;
c277df42 3421 DEBUG_r(
c3464db5
DD
3422 PerlIO_printf(Perl_debug_log,
3423 "%*s failed...\n",
3280af22 3424 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3425 );
3280af22
NIS
3426 PL_regcc->cur = ln;
3427 PL_regcc = cc;
4633a7c4 3428 sayNO;
a0d0e21e
LW
3429 }
3430
3431 /* First just match a string of min scans. */
3432
3433 if (n < cc->min) {
3434 cc->cur = n;
3435 cc->lastloc = locinput;
4633a7c4
LW
3436 if (regmatch(cc->scan))
3437 sayYES;
3438 cc->cur = n - 1;
c277df42
IZ
3439 cc->lastloc = lastloc;
3440 DEBUG_r(
c3464db5
DD
3441 PerlIO_printf(Perl_debug_log,
3442 "%*s failed...\n",
3280af22 3443 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3444 );
4633a7c4 3445 sayNO;
a0d0e21e
LW
3446 }
3447
3448 /* Prefer next over scan for minimal matching. */
3449
3450 if (cc->minmod) {
3280af22
NIS
3451 PL_regcc = cc->oldcc;
3452 ln = PL_regcc->cur;
5f05dabc 3453 cp = regcppush(cc->parenfloor);
c277df42 3454 REGCP_SET;
5f05dabc 3455 if (regmatch(cc->next)) {
c277df42 3456 regcpblow(cp);
4633a7c4 3457 sayYES; /* All done. */
5f05dabc 3458 }
c277df42 3459 REGCP_UNWIND;
5f05dabc 3460 regcppop();
3280af22
NIS
3461 PL_regcc->cur = ln;
3462 PL_regcc = cc;
a0d0e21e 3463
c277df42 3464 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 3465 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
3466 && !(PL_reg_flags & RF_warned)) {
3467 PL_reg_flags |= RF_warned;
cea2e8a9 3468 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
3469 "Complex regular subexpression recursion",
3470 REG_INFTY - 1);
c277df42 3471 }
4633a7c4 3472 sayNO;
c277df42 3473 }
a687059c 3474
c277df42 3475 DEBUG_r(
c3464db5
DD
3476 PerlIO_printf(Perl_debug_log,
3477 "%*s trying longer...\n",
3280af22 3478 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3479 );
a0d0e21e 3480 /* Try scanning more and see if it helps. */
3280af22 3481 PL_reginput = locinput;
a0d0e21e
LW
3482 cc->cur = n;
3483 cc->lastloc = locinput;
5f05dabc 3484 cp = regcppush(cc->parenfloor);
c277df42 3485 REGCP_SET;
5f05dabc 3486 if (regmatch(cc->scan)) {
c277df42 3487 regcpblow(cp);
4633a7c4 3488 sayYES;
5f05dabc 3489 }
c277df42 3490 DEBUG_r(
c3464db5
DD
3491 PerlIO_printf(Perl_debug_log,
3492 "%*s failed...\n",
3280af22 3493 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3494 );
3495 REGCP_UNWIND;
5f05dabc 3496 regcppop();
4633a7c4 3497 cc->cur = n - 1;
c277df42 3498 cc->lastloc = lastloc;
4633a7c4 3499 sayNO;
a0d0e21e
LW
3500 }
3501
3502 /* Prefer scan over next for maximal matching. */
3503
3504 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3505 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3506 cc->cur = n;
3507 cc->lastloc = locinput;
c277df42 3508 REGCP_SET;
5f05dabc 3509 if (regmatch(cc->scan)) {
c277df42 3510 regcpblow(cp);
4633a7c4 3511 sayYES;
5f05dabc 3512 }
c277df42 3513 REGCP_UNWIND;
a0d0e21e 3514 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3515 PL_reginput = locinput;
c277df42 3516 DEBUG_r(
c3464db5
DD
3517 PerlIO_printf(Perl_debug_log,
3518 "%*s failed, try continuation...\n",
3280af22 3519 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3520 );
3521 }
599cee73
PM
3522 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3523 && !(PL_reg_flags & RF_warned)) {
3280af22 3524 PL_reg_flags |= RF_warned;
cea2e8a9 3525 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
3526 "Complex regular subexpression recursion",
3527 REG_INFTY - 1);
a0d0e21e
LW
3528 }
3529
3530 /* Failed deeper matches of scan, so see if this one works. */
3280af22
NIS
3531 PL_regcc = cc->oldcc;
3532 ln = PL_regcc->cur;
a0d0e21e 3533 if (regmatch(cc->next))
4633a7c4 3534 sayYES;
c277df42 3535 DEBUG_r(
c3464db5 3536 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 3537 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3538 );
3280af22
NIS
3539 PL_regcc->cur = ln;
3540 PL_regcc = cc;
4633a7c4 3541 cc->cur = n - 1;
c277df42 3542 cc->lastloc = lastloc;
4633a7c4 3543 sayNO;
a0d0e21e
LW
3544 }
3545 /* NOT REACHED */
c277df42
IZ
3546 case BRANCHJ:
3547 next = scan + ARG(scan);
3548 if (next == scan)
3549 next = NULL;
3550 inner = NEXTOPER(NEXTOPER(scan));
3551 goto do_branch;
3552 case BRANCH:
3553 inner = NEXTOPER(scan);
3554 do_branch:
3555 {
3556 CHECKPOINT lastcp;
3557 c1 = OP(scan);
3558 if (OP(next) != c1) /* No choice. */
3559 next = inner; /* Avoid recursion. */
a0d0e21e 3560 else {
3280af22 3561 int lastparen = *PL_reglastparen;
c277df42
IZ
3562
3563 REGCP_SET;
a0d0e21e 3564 do {
3280af22 3565 PL_reginput = locinput;
c277df42 3566 if (regmatch(inner))
4633a7c4 3567 sayYES;
c277df42 3568 REGCP_UNWIND;
3280af22 3569 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 3570 PL_regendp[n] = -1;
3280af22 3571 *PL_reglastparen = n;
c277df42 3572 scan = next;
a0d0e21e 3573 /*SUPPRESS 560*/
c277df42
IZ
3574 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
3575 next += n;
a0d0e21e 3576 else
c277df42 3577 next = NULL;
c277df42
IZ
3578 inner = NEXTOPER(scan);
3579 if (c1 == BRANCHJ) {
3580 inner = NEXTOPER(inner);
3581 }
3582 } while (scan != NULL && OP(scan) == c1);
4633a7c4 3583 sayNO;
a0d0e21e 3584 /* NOTREACHED */
a687059c 3585 }
a0d0e21e
LW
3586 }
3587 break;
3588 case MINMOD:
3589 minmod = 1;
3590 break;
c277df42
IZ
3591 case CURLYM:
3592 {
00db4c45 3593 I32 l = 0;
c277df42
IZ
3594 CHECKPOINT lastcp;
3595
3596 /* We suppose that the next guy does not need
3597 backtracking: in particular, it is of constant length,
3598 and has no parenths to influence future backrefs. */
3599 ln = ARG1(scan); /* min to match */
3600 n = ARG2(scan); /* max to match */
c277df42
IZ
3601 paren = scan->flags;
3602 if (paren) {
3280af22
NIS
3603 if (paren > PL_regsize)
3604 PL_regsize = paren;
3605 if (paren > *PL_reglastparen)
3606 *PL_reglastparen = paren;
c277df42 3607 }
dc45a647 3608 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3609 if (paren)
3610 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3611 PL_reginput = locinput;
c277df42
IZ
3612 if (minmod) {
3613 minmod = 0;
3614 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3615 sayNO;
5f4b28b2 3616 if (ln && l == 0 && n >= ln
c277df42
IZ
3617 /* In fact, this is tricky. If paren, then the
3618 fact that we did/didnot match may influence
3619 future execution. */
3620 && !(paren && ln == 0))
3621 ln = n;
3280af22 3622 locinput = PL_reginput;
22c35a8c 3623 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
3624 c1 = UCHARAT(OPERAND(next) + 1);
3625 if (OP(next) == EXACTF)
22c35a8c 3626 c2 = PL_fold[c1];
c277df42 3627 else if (OP(next) == EXACTFL)
22c35a8c 3628 c2 = PL_fold_locale[c1];
c277df42
IZ
3629 else
3630 c2 = c1;
a0ed51b3
LW
3631 }
3632 else
c277df42
IZ
3633 c1 = c2 = -1000;
3634 REGCP_SET;
5f4b28b2 3635 /* This may be improved if l == 0. */
c277df42
IZ
3636 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3637 /* If it could work, try it. */
3638 if (c1 == -1000 ||
3280af22
NIS
3639 UCHARAT(PL_reginput) == c1 ||
3640 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3641 {
3642 if (paren) {
3643 if (n) {
cf93c79d
IZ
3644 PL_regstartp[paren] =
3645 HOPc(PL_reginput, -l) - PL_bostr;
3646 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3647 }
3648 else
cf93c79d 3649 PL_regendp[paren] = -1;
c277df42
IZ
3650 }
3651 if (regmatch(next))
3652 sayYES;
3653 REGCP_UNWIND;
3654 }
3655 /* Couldn't or didn't -- move forward. */
3280af22 3656 PL_reginput = locinput;
c277df42
IZ
3657 if (regrepeat_hard(scan, 1, &l)) {
3658 ln++;
3280af22 3659 locinput = PL_reginput;
c277df42
IZ
3660 }
3661 else
3662 sayNO;
3663 }
a0ed51b3
LW
3664 }
3665 else {
c277df42
IZ
3666 n = regrepeat_hard(scan, n, &l);
3667 if (n != 0 && l == 0
3668 /* In fact, this is tricky. If paren, then the
3669 fact that we did/didnot match may influence
3670 future execution. */
3671 && !(paren && ln == 0))
3672 ln = n;
3280af22 3673 locinput = PL_reginput;
c277df42 3674 DEBUG_r(
5c0ca799
GS
3675 PerlIO_printf(Perl_debug_log,
3676 "%*s matched %ld times, len=%ld...\n",
3280af22 3677 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
3678 );
3679 if (n >= ln) {
22c35a8c 3680 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
3681 c1 = UCHARAT(OPERAND(next) + 1);
3682 if (OP(next) == EXACTF)
22c35a8c 3683 c2 = PL_fold[c1];
c277df42 3684 else if (OP(next) == EXACTFL)
22c35a8c 3685 c2 = PL_fold_locale[c1];
c277df42
IZ
3686 else
3687 c2 = c1;
a0ed51b3
LW
3688 }
3689 else
c277df42
IZ
3690 c1 = c2 = -1000;
3691 }
3692 REGCP_SET;
3693 while (n >= ln) {
3694 /* If it could work, try it. */
3695 if (c1 == -1000 ||
3280af22
NIS
3696 UCHARAT(PL_reginput) == c1 ||
3697 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3698 {
3699 DEBUG_r(
c3464db5
DD
3700 PerlIO_printf(Perl_debug_log,
3701 "%*s trying tail with n=%ld...\n",
3280af22 3702 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
3703 );
3704 if (paren) {
3705 if (n) {
cf93c79d
IZ
3706 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3707 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3708 }
a0ed51b3 3709 else
cf93c79d 3710 PL_regendp[paren] = -1;
c277df42 3711 }
a0ed51b3
LW
3712 if (regmatch(next))
3713 sayYES;
3714 REGCP_UNWIND;
3715 }
c277df42
IZ
3716 /* Couldn't or didn't -- back up. */
3717 n--;
dfe13c55 3718 locinput = HOPc(locinput, -l);
3280af22 3719 PL_reginput = locinput;
c277df42
IZ
3720 }
3721 }
3722 sayNO;
3723 break;
3724 }
3725 case CURLYN:
3726 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3727 if (paren > PL_regsize)
3728 PL_regsize = paren;
3729 if (paren > *PL_reglastparen)
3730 *PL_reglastparen = paren;
c277df42
IZ
3731 ln = ARG1(scan); /* min to match */
3732 n = ARG2(scan); /* max to match */
dc45a647 3733 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3734 goto repeat;
a0d0e21e 3735 case CURLY:
c277df42 3736 paren = 0;
a0d0e21e
LW
3737 ln = ARG1(scan); /* min to match */
3738 n = ARG2(scan); /* max to match */
dc45a647 3739 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3740 goto repeat;
3741 case STAR:
3742 ln = 0;
c277df42 3743 n = REG_INFTY;
a0d0e21e 3744 scan = NEXTOPER(scan);
c277df42 3745 paren = 0;
a0d0e21e
LW
3746 goto repeat;
3747 case PLUS:
c277df42
IZ
3748 ln = 1;
3749 n = REG_INFTY;
3750 scan = NEXTOPER(scan);
3751 paren = 0;
3752 repeat:
a0d0e21e
LW
3753 /*
3754 * Lookahead to avoid useless match attempts
3755 * when we know what character comes next.
3756 */
22c35a8c 3757 if (PL_regkind[(U8)OP(next)] == EXACT) {
bbce6d69 3758 c1 = UCHARAT(OPERAND(next) + 1);
3759 if (OP(next) == EXACTF)
22c35a8c 3760 c2 = PL_fold[c1];
bbce6d69 3761 else if (OP(next) == EXACTFL)
22c35a8c 3762 c2 = PL_fold_locale[c1];
bbce6d69 3763 else
3764 c2 = c1;
3765 }
a0d0e21e 3766 else
bbce6d69 3767 c1 = c2 = -1000;
3280af22 3768 PL_reginput = locinput;
a0d0e21e 3769 if (minmod) {
c277df42 3770 CHECKPOINT lastcp;
a0d0e21e
LW
3771 minmod = 0;
3772 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3773 sayNO;
a0ed51b3 3774 locinput = PL_reginput;
c277df42 3775 REGCP_SET;
0fe9bf95
IZ
3776 if (c1 != -1000) {
3777 char *e = locinput + n - ln; /* Should not check after this */
3778 char *old = locinput;
3779
3780 if (e >= PL_regeol || (n == REG_INFTY))
3781 e = PL_regeol - 1;
3782 while (1) {
3783 /* Find place 'next' could work */
3784 if (c1 == c2) {
3785 while (locinput <= e && *locinput != c1)
3786 locinput++;
3787 } else {
3788 while (locinput <= e
3789 && *locinput != c1
3790 && *locinput != c2)
3791 locinput++;
3792 }
3793 if (locinput > e)
3794 sayNO;
3795 /* PL_reginput == old now */
3796 if (locinput != old) {
3797 ln = 1; /* Did some */
3798 if (regrepeat(scan, locinput - old) <
3799 locinput - old)
3800 sayNO;
3801 }
3802 /* PL_reginput == locinput now */
3803 if (paren) {
3804 if (ln) {
cf93c79d
IZ
3805 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3806 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
3807 }
3808 else
cf93c79d 3809 PL_regendp[paren] = -1;
0fe9bf95
IZ
3810 }
3811 if (regmatch(next))
3812 sayYES;
3813 PL_reginput = locinput; /* Could be reset... */
3814 REGCP_UNWIND;
3815 /* Couldn't or didn't -- move forward. */
3816 old = locinput++;
3817 }
3818 }
3819 else
c277df42 3820 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3821 /* If it could work, try it. */
bbce6d69 3822 if (c1 == -1000 ||
3280af22
NIS
3823 UCHARAT(PL_reginput) == c1 ||
3824 UCHARAT(PL_reginput) == c2)
bbce6d69 3825 {
c277df42
IZ
3826 if (paren) {
3827 if (n) {
cf93c79d
IZ
3828 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3829 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3830 }
3831 else
cf93c79d 3832 PL_regendp[paren] = -1;
c277df42 3833 }
a0d0e21e 3834 if (regmatch(next))
4633a7c4 3835 sayYES;
c277df42 3836 REGCP_UNWIND;
bbce6d69 3837 }
c277df42 3838 /* Couldn't or didn't -- move forward. */
a0ed51b3 3839 PL_reginput = locinput;
a0d0e21e
LW
3840 if (regrepeat(scan, 1)) {
3841 ln++;
a0ed51b3
LW
3842 locinput = PL_reginput;
3843 }
3844 else
4633a7c4 3845 sayNO;
a0d0e21e
LW
3846 }
3847 }
3848 else {
c277df42 3849 CHECKPOINT lastcp;
a0d0e21e 3850 n = regrepeat(scan, n);
a0ed51b3 3851 locinput = PL_reginput;
22c35a8c 3852 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3853 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3854 ln = n; /* why back off? */
c277df42
IZ
3855 REGCP_SET;
3856 if (paren) {
3857 while (n >= ln) {
3858 /* If it could work, try it. */
3859 if (c1 == -1000 ||
3280af22
NIS
3860 UCHARAT(PL_reginput) == c1 ||
3861 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3862 {
3863 if (paren && n) {
3864 if (n) {
cf93c79d
IZ
3865 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3866 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3867 }
3868 else
cf93c79d 3869 PL_regendp[paren] = -1;
c277df42
IZ
3870 }
3871 if (regmatch(next))
3872 sayYES;
3873 REGCP_UNWIND;
3874 }
3875 /* Couldn't or didn't -- back up. */
3876 n--;
dfe13c55 3877 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3878 }
a0ed51b3
LW
3879 }
3880 else {
c277df42
IZ
3881 while (n >= ln) {
3882 /* If it could work, try it. */
3883 if (c1 == -1000 ||
3280af22
NIS
3884 UCHARAT(PL_reginput) == c1 ||
3885 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3886 {
3887 if (regmatch(next))
3888 sayYES;
3889 REGCP_UNWIND;
3890 }
3891 /* Couldn't or didn't -- back up. */
3892 n--;
dfe13c55 3893 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3894 }
a0d0e21e
LW
3895 }
3896 }
4633a7c4 3897 sayNO;
c277df42 3898 break;
a0d0e21e 3899 case END:
0f5d15d6
IZ
3900 if (PL_reg_call_cc) {
3901 re_cc_state *cur_call_cc = PL_reg_call_cc;
3902 CURCUR *cctmp = PL_regcc;
3903 regexp *re = PL_reg_re;
3904 CHECKPOINT cp, lastcp;
3905
3906 cp = regcppush(0); /* Save *all* the positions. */
3907 REGCP_SET;
3908 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3909 the caller. */
3910 PL_reginput = locinput; /* Make position available to
3911 the callcc. */
3912 cache_re(PL_reg_call_cc->re);
3913 PL_regcc = PL_reg_call_cc->cc;
3914 PL_reg_call_cc = PL_reg_call_cc->prev;
3915 if (regmatch(cur_call_cc->node)) {
3916 PL_reg_call_cc = cur_call_cc;
3917 regcpblow(cp);
3918 sayYES;
3919 }
3920 REGCP_UNWIND;
3921 regcppop();
3922 PL_reg_call_cc = cur_call_cc;
3923 PL_regcc = cctmp;
3924 PL_reg_re = re;
3925 cache_re(re);
3926
3927 DEBUG_r(
3928 PerlIO_printf(Perl_debug_log,
3929 "%*s continuation failed...\n",
3930 REPORT_CODE_OFF+PL_regindent*2, "")
3931 );
3932 sayNO;
3933 }
3280af22 3934 if (locinput < PL_regtill)
7e5428c5
IZ
3935 sayNO; /* Cannot match: too short. */
3936 /* Fall through */
3937 case SUCCEED:
3280af22 3938 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 3939 sayYES; /* Success! */
c277df42
IZ
3940 case SUSPEND:
3941 n = 1;
9fe1d20c 3942 PL_reginput = locinput;
c277df42 3943 goto do_ifmatch;
a0d0e21e 3944 case UNLESSM:
c277df42 3945 n = 0;
a0ed51b3 3946 if (scan->flags) {
0fe9bf95
IZ
3947 if (UTF) { /* XXXX This is absolutely
3948 broken, we read before
3949 start of string. */
3950 s = HOPMAYBEc(locinput, -scan->flags);
3951 if (!s)
3952 goto say_yes;
3953 PL_reginput = s;
3954 }
3955 else {
3956 if (locinput < PL_bostr + scan->flags)
3957 goto say_yes;
3958 PL_reginput = locinput - scan->flags;
3959 goto do_ifmatch;
3960 }
a0ed51b3
LW
3961 }
3962 else
3963 PL_reginput = locinput;
c277df42
IZ
3964 goto do_ifmatch;
3965 case IFMATCH:
3966 n = 1;
a0ed51b3 3967 if (scan->flags) {
0fe9bf95
IZ
3968 if (UTF) { /* XXXX This is absolutely
3969 broken, we read before
3970 start of string. */
3971 s = HOPMAYBEc(locinput, -scan->flags);
3972 if (!s || s < PL_bostr)
3973 goto say_no;
3974 PL_reginput = s;
3975 }
3976 else {
3977 if (locinput < PL_bostr + scan->flags)
3978 goto say_no;
3979 PL_reginput = locinput - scan->flags;
3980 goto do_ifmatch;
3981 }
a0ed51b3
LW
3982 }
3983 else
3984 PL_reginput = locinput;
3985
c277df42 3986 do_ifmatch:
c277df42
IZ
3987 inner = NEXTOPER(NEXTOPER(scan));
3988 if (regmatch(inner) != n) {
3989 say_no:
3990 if (logical) {
3991 logical = 0;
3992 sw = 0;
3993 goto do_longjump;
a0ed51b3
LW
3994 }
3995 else
c277df42
IZ
3996 sayNO;
3997 }
3998 say_yes:
3999 if (logical) {
4000 logical = 0;
4001 sw = 1;
4002 }
fe44a5e8 4003 if (OP(scan) == SUSPEND) {
3280af22 4004 locinput = PL_reginput;
565764a8 4005 nextchr = UCHARAT(locinput);
fe44a5e8 4006 }
c277df42
IZ
4007 /* FALL THROUGH. */
4008 case LONGJMP:
4009 do_longjump:
4010 next = scan + ARG(scan);
4011 if (next == scan)
4012 next = NULL;
a0d0e21e
LW
4013 break;
4014 default:
c030ccd9 4015 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 4016 (unsigned long)scan, OP(scan));
cea2e8a9 4017 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4018 }
a0d0e21e
LW
4019 scan = next;
4020 }
a687059c 4021
a0d0e21e
LW
4022 /*
4023 * We get here only if there's trouble -- normally "case END" is
4024 * the terminating point.
4025 */
cea2e8a9 4026 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4027 /*NOTREACHED*/
4633a7c4
LW
4028 sayNO;
4029
4030yes:
4031#ifdef DEBUGGING
3280af22 4032 PL_regindent--;
4633a7c4
LW
4033#endif
4034 return 1;
4035
4036no:
4037#ifdef DEBUGGING
3280af22 4038 PL_regindent--;
4633a7c4 4039#endif
a0d0e21e 4040 return 0;
a687059c
LW
4041}
4042
4043/*
4044 - regrepeat - repeatedly match something simple, report how many
4045 */
4046/*
4047 * [This routine now assumes that it will only match on things of length 1.
4048 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4049 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4050 */
76e3520e 4051STATIC I32
cea2e8a9 4052S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 4053{
5c0ca799 4054 dTHR;
a0d0e21e
LW
4055 register char *scan;
4056 register char *opnd;
4057 register I32 c;
3280af22 4058 register char *loceol = PL_regeol;
a0ed51b3 4059 register I32 hardcount = 0;
a0d0e21e 4060
3280af22 4061 scan = PL_reginput;
c277df42 4062 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 4063 loceol = scan + max;
161b471a 4064 opnd = (char *) OPERAND(p);
a0d0e21e 4065 switch (OP(p)) {
22c35a8c 4066 case REG_ANY:
a0d0e21e
LW
4067 while (scan < loceol && *scan != '\n')
4068 scan++;
4069 break;
4070 case SANY:
4071 scan = loceol;
4072 break;
a0ed51b3
LW
4073 case ANYUTF8:
4074 loceol = PL_regeol;
4075 while (scan < loceol && *scan != '\n') {
4076 scan += UTF8SKIP(scan);
4077 hardcount++;
4078 }
4079 break;
4080 case SANYUTF8:
4081 loceol = PL_regeol;
4082 while (scan < loceol) {
4083 scan += UTF8SKIP(scan);
4084 hardcount++;
4085 }
4086 break;
bbce6d69 4087 case EXACT: /* length of string is 1 */
4088 c = UCHARAT(++opnd);
4089 while (scan < loceol && UCHARAT(scan) == c)
4090 scan++;
4091 break;
4092 case EXACTF: /* length of string is 1 */
4093 c = UCHARAT(++opnd);
4094 while (scan < loceol &&
22c35a8c 4095 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4096 scan++;
4097 break;
4098 case EXACTFL: /* length of string is 1 */
3280af22 4099 PL_reg_flags |= RF_tainted;
bbce6d69 4100 c = UCHARAT(++opnd);
4101 while (scan < loceol &&
22c35a8c 4102 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4103 scan++;
4104 break;
a0ed51b3
LW
4105 case ANYOFUTF8:
4106 loceol = PL_regeol;
4107 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
4108 scan += UTF8SKIP(scan);
4109 hardcount++;
4110 }
4111 break;
a0d0e21e 4112 case ANYOF:
ae5c130c 4113 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 4114 scan++;
a0d0e21e
LW
4115 break;
4116 case ALNUM:
4117 while (scan < loceol && isALNUM(*scan))
4118 scan++;
4119 break;
a0ed51b3
LW
4120 case ALNUMUTF8:
4121 loceol = PL_regeol;
dfe13c55 4122 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
4123 scan += UTF8SKIP(scan);
4124 hardcount++;
4125 }
4126 break;
bbce6d69 4127 case ALNUML:
3280af22 4128 PL_reg_flags |= RF_tainted;
bbce6d69 4129 while (scan < loceol && isALNUM_LC(*scan))
4130 scan++;
4131 break;
a0ed51b3
LW
4132 case ALNUMLUTF8:
4133 PL_reg_flags |= RF_tainted;
4134 loceol = PL_regeol;
dfe13c55 4135 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
4136 scan += UTF8SKIP(scan);
4137 hardcount++;
4138 }
4139 break;
4140 break;
a0d0e21e
LW
4141 case NALNUM:
4142 while (scan < loceol && !isALNUM(*scan))
4143 scan++;
4144 break;
a0ed51b3
LW
4145 case NALNUMUTF8:
4146 loceol = PL_regeol;
dfe13c55 4147 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
4148 scan += UTF8SKIP(scan);
4149 hardcount++;
4150 }
4151 break;
bbce6d69 4152 case NALNUML:
3280af22 4153 PL_reg_flags |= RF_tainted;
bbce6d69 4154 while (scan < loceol && !isALNUM_LC(*scan))
4155 scan++;
4156 break;
a0ed51b3
LW
4157 case NALNUMLUTF8:
4158 PL_reg_flags |= RF_tainted;
4159 loceol = PL_regeol;
dfe13c55 4160 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
4161 scan += UTF8SKIP(scan);
4162 hardcount++;
4163 }
4164 break;
a0d0e21e
LW
4165 case SPACE:
4166 while (scan < loceol && isSPACE(*scan))
4167 scan++;
4168 break;
a0ed51b3
LW
4169 case SPACEUTF8:
4170 loceol = PL_regeol;
dfe13c55 4171 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
4172 scan += UTF8SKIP(scan);
4173 hardcount++;
4174 }
4175 break;
bbce6d69 4176 case SPACEL:
3280af22 4177 PL_reg_flags |= RF_tainted;
bbce6d69 4178 while (scan < loceol && isSPACE_LC(*scan))
4179 scan++;
4180 break;
a0ed51b3
LW
4181 case SPACELUTF8:
4182 PL_reg_flags |= RF_tainted;
4183 loceol = PL_regeol;
dfe13c55 4184 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
4185 scan += UTF8SKIP(scan);
4186 hardcount++;
4187 }
4188 break;
a0d0e21e
LW
4189 case NSPACE:
4190 while (scan < loceol && !isSPACE(*scan))
4191 scan++;
4192 break;
a0ed51b3
LW
4193 case NSPACEUTF8:
4194 loceol = PL_regeol;
dfe13c55 4195 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
4196 scan += UTF8SKIP(scan);
4197 hardcount++;
4198 }
4199 break;
bbce6d69 4200 case NSPACEL:
3280af22 4201 PL_reg_flags |= RF_tainted;
bbce6d69 4202 while (scan < loceol && !isSPACE_LC(*scan))
4203 scan++;
4204 break;
a0ed51b3
LW
4205 case NSPACELUTF8:
4206 PL_reg_flags |= RF_tainted;
4207 loceol = PL_regeol;
dfe13c55 4208 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
4209 scan += UTF8SKIP(scan);
4210 hardcount++;
4211 }
4212 break;
a0d0e21e
LW
4213 case DIGIT:
4214 while (scan < loceol && isDIGIT(*scan))
4215 scan++;
4216 break;
a0ed51b3
LW
4217 case DIGITUTF8:
4218 loceol = PL_regeol;
dfe13c55 4219 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
4220 scan += UTF8SKIP(scan);
4221 hardcount++;
4222 }
4223 break;
4224 break;
a0d0e21e
LW
4225 case NDIGIT:
4226 while (scan < loceol && !isDIGIT(*scan))
4227 scan++;
4228 break;
a0ed51b3
LW
4229 case NDIGITUTF8:
4230 loceol = PL_regeol;
dfe13c55 4231 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
4232 scan += UTF8SKIP(scan);
4233 hardcount++;
4234 }
4235 break;
a0d0e21e
LW
4236 default: /* Called on something of 0 width. */
4237 break; /* So match right here or not at all. */
4238 }
a687059c 4239
a0ed51b3
LW
4240 if (hardcount)
4241 c = hardcount;
4242 else
4243 c = scan - PL_reginput;
3280af22 4244 PL_reginput = scan;
a687059c 4245
c277df42
IZ
4246 DEBUG_r(
4247 {
4248 SV *prop = sv_newmortal();
4249
4250 regprop(prop, p);
4251 PerlIO_printf(Perl_debug_log,
4252 "%*s %s can match %ld times out of %ld...\n",
4253 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
4254 });
4255
a0d0e21e 4256 return(c);
a687059c
LW
4257}
4258
4259/*
c277df42
IZ
4260 - regrepeat_hard - repeatedly match something, report total lenth and length
4261 *
4262 * The repeater is supposed to have constant length.
4263 */
4264
76e3520e 4265STATIC I32
cea2e8a9 4266S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4267{
5c0ca799 4268 dTHR;
c277df42
IZ
4269 register char *scan;
4270 register char *start;
3280af22 4271 register char *loceol = PL_regeol;
a0ed51b3 4272 I32 l = 0;
708e3b05 4273 I32 count = 0, res = 1;
a0ed51b3
LW
4274
4275 if (!max)
4276 return 0;
c277df42 4277
3280af22 4278 start = PL_reginput;
a0ed51b3 4279 if (UTF) {
708e3b05 4280 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4281 if (!count++) {
4282 l = 0;
4283 while (start < PL_reginput) {
4284 l++;
4285 start += UTF8SKIP(start);
4286 }
4287 *lp = l;
4288 if (l == 0)
4289 return max;
4290 }
4291 if (count == max)
4292 return count;
4293 }
4294 }
4295 else {
708e3b05 4296 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4297 if (!count++) {
4298 *lp = l = PL_reginput - start;
4299 if (max != REG_INFTY && l*max < loceol - scan)
4300 loceol = scan + l*max;
4301 if (l == 0)
4302 return max;
c277df42
IZ
4303 }
4304 }
4305 }
708e3b05 4306 if (!res)
3280af22 4307 PL_reginput = scan;
c277df42 4308
a0ed51b3 4309 return count;
c277df42
IZ
4310}
4311
4312/*
cb8d8820 4313 - reginclass - determine if a character falls into a character class
bbce6d69 4314 */
4315
76e3520e 4316STATIC bool
cea2e8a9 4317S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 4318{
5c0ca799 4319 dTHR;
b8c5462f 4320 char flags = ANYOF_FLAGS(p);
bbce6d69 4321 bool match = FALSE;
4322
4323 c &= 0xFF;
b8c5462f 4324 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 4325 match = TRUE;
4326 else if (flags & ANYOF_FOLD) {
4327 I32 cf;
4328 if (flags & ANYOF_LOCALE) {
3280af22 4329 PL_reg_flags |= RF_tainted;
22c35a8c 4330 cf = PL_fold_locale[c];
bbce6d69 4331 }
4332 else
22c35a8c 4333 cf = PL_fold[c];
b8c5462f 4334 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 4335 match = TRUE;
4336 }
4337
b8c5462f 4338 if (!match && (flags & ANYOF_CLASS)) {
3280af22 4339 PL_reg_flags |= RF_tainted;
b8c5462f
JH
4340 if (
4341 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4342 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4343 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
4344 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4345 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4346 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4347 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4348 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4349 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4350 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4351 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
4352 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
4353 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4354 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4355 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4356 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4357 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
4358 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4359 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
4360 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4361 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4362 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4363 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
4364 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4365 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4366 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
4367 ) /* How's that for a conditional? */
bbce6d69 4368 {
4369 match = TRUE;
4370 }
4371 }
4372
ae5c130c 4373 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 4374}
4375
a0ed51b3 4376STATIC bool
cea2e8a9 4377S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
4378{
4379 dTHR;
a0ed51b3
LW
4380 char flags = ARG1(f);
4381 bool match = FALSE;
4382 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
4383
4384 if (swash_fetch(sv, p))
4385 match = TRUE;
4386 else if (flags & ANYOF_FOLD) {
4387 I32 cf;
dfe13c55 4388 U8 tmpbuf[10];
a0ed51b3
LW
4389 if (flags & ANYOF_LOCALE) {
4390 PL_reg_flags |= RF_tainted;
4391 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
4392 }
4393 else
4394 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
4395 if (swash_fetch(sv, tmpbuf))
4396 match = TRUE;
4397 }
4398
b8c5462f 4399 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
4400
4401 return (flags & ANYOF_INVERT) ? !match : match;
4402}
161b471a 4403
dfe13c55 4404STATIC U8 *
cea2e8a9 4405S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
4406{
4407 dTHR;
a0ed51b3
LW
4408 if (off >= 0) {
4409 while (off-- && s < (U8*)PL_regeol)
4410 s += UTF8SKIP(s);
4411 }
4412 else {
4413 while (off++) {
4414 if (s > (U8*)PL_bostr) {
4415 s--;
4416 if (*s & 0x80) {
4417 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4418 s--;
4419 } /* XXX could check well-formedness here */
4420 }
4421 }
4422 }
4423 return s;
4424}
161b471a 4425
dfe13c55 4426STATIC U8 *
cea2e8a9 4427S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 4428{
c485e607 4429 dTHR;
a0ed51b3
LW
4430 if (off >= 0) {
4431 while (off-- && s < (U8*)PL_regeol)
4432 s += UTF8SKIP(s);
4433 if (off >= 0)
4434 return 0;
4435 }
4436 else {
4437 while (off++) {
4438 if (s > (U8*)PL_bostr) {
4439 s--;
4440 if (*s & 0x80) {
4441 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4442 s--;
4443 } /* XXX could check well-formedness here */
4444 }
4445 else
4446 break;
4447 }
4448 if (off <= 0)
4449 return 0;
4450 }
4451 return s;
4452}
51371543
GS
4453
4454#ifdef PERL_OBJECT
4455#define NO_XSLOCKS
4456#include "XSUB.h"
4457#endif
4458
4459static void
4460restore_pos(pTHXo_ void *arg)
4461{
4462 dTHR;
4463 if (PL_reg_eval_set) {
4464 if (PL_reg_oldsaved) {
4465 PL_reg_re->subbeg = PL_reg_oldsaved;
4466 PL_reg_re->sublen = PL_reg_oldsavedlen;
4467 RX_MATCH_COPIED_on(PL_reg_re);
4468 }
4469 PL_reg_magic->mg_len = PL_reg_oldpos;
4470 PL_reg_eval_set = 0;
4471 PL_curpm = PL_reg_oldcurpm;
4472 }
4473}
4474