This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
sundry cleanups for clean build on windows
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
56953603
IZ
42#endif
43
f0fcb552 44/*SUPPRESS 112*/
a687059c 45/*
e50aee73 46 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
47 *
48 * Copyright (c) 1986 by University of Toronto.
49 * Written by Henry Spencer. Not derived from licensed software.
50 *
51 * Permission is granted to anyone to use this software for any
52 * purpose on any computer system, and to redistribute it freely,
53 * subject to the following restrictions:
54 *
55 * 1. The author is not responsible for the consequences of use of
56 * this software, no matter how awful, even if they arise
57 * from defects in it.
58 *
59 * 2. The origin of this software must not be misrepresented, either
60 * by explicit claim or by omission.
61 *
62 * 3. Altered versions must be plainly marked as such, and must not
63 * be misrepresented as being the original software.
64 *
65 **** Alterations to Henry's code are...
66 ****
4eb8286e 67 **** Copyright (c) 1991-1999, Larry Wall
a687059c 68 ****
9ef589d8
LW
69 **** You may distribute under the terms of either the GNU General Public
70 **** License or the Artistic License, as specified in the README file.
a687059c
LW
71 *
72 * Beware that some of this code is subtly aware of the way operator
73 * precedence is structured in regular expressions. Serious changes in
74 * regular-expression syntax might require a total rethink.
75 */
76#include "EXTERN.h"
864dbfa3 77#define PERL_IN_REGEXEC_C
a687059c 78#include "perl.h"
0f5d15d6 79
a687059c
LW
80#include "regcomp.h"
81
c277df42
IZ
82#define RF_tainted 1 /* tainted information used? */
83#define RF_warned 2 /* warned about big count? */
ce862d02 84#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
85#define RF_utf8 8 /* String contains multibyte chars? */
86
87#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
88
89#define RS_init 1 /* eval environment created */
90#define RS_set 2 /* replsv value is set */
c277df42 91
a687059c
LW
92#ifndef STATIC
93#define STATIC static
94#endif
95
c277df42
IZ
96/*
97 * Forwards.
98 */
99
b8c5462f 100#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
101#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
102
103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
104#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
108#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
76e3520e 113STATIC CHECKPOINT
cea2e8a9 114S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 115{
11343788 116 dTHR;
3280af22
NIS
117 int retval = PL_savestack_ix;
118 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
119 int p;
120
121 SSCHECK(i + 5);
3280af22 122 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
123 SSPUSHINT(PL_regendp[p]);
124 SSPUSHINT(PL_regstartp[p]);
3280af22 125 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
126 SSPUSHINT(p);
127 }
3280af22
NIS
128 SSPUSHINT(PL_regsize);
129 SSPUSHINT(*PL_reglastparen);
130 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
131 SSPUSHINT(i + 3);
132 SSPUSHINT(SAVEt_REGCONTEXT);
133 return retval;
134}
135
c277df42 136/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
137# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
138 " Setting an EVAL scope, savestack=%i\n", \
3280af22 139 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 140
3280af22 141# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
142 PerlIO_printf(Perl_debug_log, \
143 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 144 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 145
76e3520e 146STATIC char *
cea2e8a9 147S_regcppop(pTHX)
a0d0e21e 148{
11343788 149 dTHR;
a0d0e21e
LW
150 I32 i = SSPOPINT;
151 U32 paren = 0;
152 char *input;
cf93c79d 153 I32 tmps;
a0d0e21e
LW
154 assert(i == SAVEt_REGCONTEXT);
155 i = SSPOPINT;
156 input = (char *) SSPOPPTR;
3280af22
NIS
157 *PL_reglastparen = SSPOPINT;
158 PL_regsize = SSPOPINT;
c277df42 159 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 160 paren = (U32)SSPOPINT;
3280af22 161 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
162 PL_regstartp[paren] = SSPOPINT;
163 tmps = SSPOPINT;
3280af22
NIS
164 if (paren <= *PL_reglastparen)
165 PL_regendp[paren] = tmps;
c277df42 166 DEBUG_r(
c3464db5
DD
167 PerlIO_printf(Perl_debug_log,
168 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
169 paren, PL_regstartp[paren],
170 PL_reg_start_tmp[paren] - PL_bostr,
171 PL_regendp[paren],
3280af22 172 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 173 );
a0d0e21e 174 }
c277df42 175 DEBUG_r(
3280af22 176 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
177 PerlIO_printf(Perl_debug_log,
178 " restoring \\%d..\\%d to undef\n",
3280af22 179 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
180 }
181 );
3280af22
NIS
182 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
183 if (paren > PL_regsize)
cf93c79d
IZ
184 PL_regstartp[paren] = -1;
185 PL_regendp[paren] = -1;
a0d0e21e
LW
186 }
187 return input;
188}
189
0f5d15d6 190STATIC char *
cea2e8a9 191S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 192{
46124e9e 193 dTHR;
0f5d15d6
IZ
194 I32 tmp = PL_savestack_ix;
195
196 PL_savestack_ix = ss;
197 regcppop();
198 PL_savestack_ix = tmp;
942e002e 199 return Nullch;
0f5d15d6
IZ
200}
201
202typedef struct re_cc_state
203{
204 I32 ss;
205 regnode *node;
206 struct re_cc_state *prev;
207 CURCUR *cc;
208 regexp *re;
209} re_cc_state;
210
c277df42 211#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 212
a687059c 213/*
e50aee73 214 * pregexec and friends
a687059c
LW
215 */
216
217/*
c277df42 218 - pregexec - match a regexp against a string
a687059c 219 */
c277df42 220I32
864dbfa3 221Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 222 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
223/* strend: pointer to null at end of string */
224/* strbeg: real beginning of string */
225/* minend: end of match must be >=minend after stringarg. */
226/* nosave: For optimizations. */
227{
228 return
229 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
230 nosave ? 0 : REXEC_COPY_STR);
231}
0f5d15d6
IZ
232
233STATIC void
cea2e8a9 234S_cache_re(pTHX_ regexp *prog)
0f5d15d6 235{
46124e9e 236 dTHR;
0f5d15d6
IZ
237 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
238#ifdef DEBUGGING
239 PL_regprogram = prog->program;
240#endif
241 PL_regnpar = prog->nparens;
242 PL_regdata = prog->data;
243 PL_reg_re = prog;
244}
22e551b9 245
9661b544 246STATIC void
cea2e8a9 247S_restore_pos(pTHX_ void *arg)
35ef4773
GS
248{
249 dTHR;
cf93c79d
IZ
250 if (PL_reg_eval_set) {
251 if (PL_reg_oldsaved) {
252 PL_reg_re->subbeg = PL_reg_oldsaved;
253 PL_reg_re->sublen = PL_reg_oldsavedlen;
254 RX_MATCH_COPIED_on(PL_reg_re);
255 }
9661b544
IZ
256 PL_reg_magic->mg_len = PL_reg_oldpos;
257 PL_reg_eval_set = 0;
5c5e4c24 258 PL_curpm = PL_reg_oldcurpm;
9661b544
IZ
259 }
260}
261
cad2e5aa
JH
262/*
263 * Need to implement the following flags for reg_anch:
264 *
265 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
266 * USE_INTUIT_ML
267 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
268 * INTUIT_AUTORITATIVE_ML
269 * INTUIT_ONCE_NOML - Intuit can match in one location only.
270 * INTUIT_ONCE_ML
271 *
272 * Another flag for this function: SECOND_TIME (so that float substrs
273 * with giant delta may be not rechecked).
274 */
275
276/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
277
278/* If SCREAM, then sv should be compatible with strpos and strend.
279 Otherwise, only SvCUR(sv) is used to get strbeg. */
280
281/* XXXX We assume that strpos is strbeg unless sv. */
282
283char *
284Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
285 char *strend, U32 flags, re_scream_pos_data *data)
286{
287 I32 start_shift;
288 /* Should be nonnegative! */
289 I32 end_shift;
290 char *s;
291 char *t;
292 I32 ml_anch;
293
294 DEBUG_r( if (!PL_colorset) reginitcolors() );
295 DEBUG_r(PerlIO_printf(Perl_debug_log,
296 "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
297 PL_colors[4],PL_colors[5],PL_colors[0],
298 prog->precomp,
299 PL_colors[1],
300 (strlen(prog->precomp) > 60 ? "..." : ""),
301 PL_colors[0],
302 (strend - strpos > 60 ? 60 : strend - strpos),
303 strpos, PL_colors[1],
304 (strend - strpos > 60 ? "..." : ""))
305 );
306
307 if (prog->minlen > strend - strpos)
308 goto fail;
309
310 /* XXXX Move further down? */
311 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
312 /* Should be nonnegative! */
313 end_shift = prog->minlen - start_shift -
314 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
315
316 if (prog->reganch & ROPT_ANCH) {
317 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
318 || ( (prog->reganch & ROPT_ANCH_BOL)
319 && !PL_multiline ) );
320
321 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
322 /* Anchored... */
323 I32 slen;
324
325 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
326 && (sv && (strpos + SvCUR(sv) != strend)) )
327 goto fail;
328
329 s = (char*)HOP((U8*)strpos, prog->check_offset_min);
330 if (SvTAIL(prog->check_substr)) {
331 slen = SvCUR(prog->check_substr); /* >= 1 */
332
333 if ( strend - s > slen || strend - s < slen - 1 ) {
334 s = Nullch;
335 goto finish;
336 }
337 if ( strend - s == slen && strend[-1] != '\n') {
338 s = Nullch;
339 goto finish;
340 }
341 /* Now should match s[0..slen-2] */
342 slen--;
343 if (slen && (*SvPVX(prog->check_substr) != *s
344 || (slen > 1
345 && memNE(SvPVX(prog->check_substr), s, slen))))
346 s = Nullch;
347 }
348 else if (*SvPVX(prog->check_substr) != *s
349 || ((slen = SvCUR(prog->check_substr)) > 1
350 && memNE(SvPVX(prog->check_substr), s, slen)))
351 s = Nullch;
352 else
353 s = strpos;
354 goto finish;
355 }
356 s = strpos;
357 if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
358 end_shift += strend - s - prog->minlen - prog->check_offset_max;
359 }
360 else {
361 ml_anch = 0;
362 s = strpos;
363 }
364
365 restart:
366 if (flags & REXEC_SCREAM) {
367 SV *c = prog->check_substr;
368 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
369 I32 p = -1; /* Internal iterator of scream. */
370 I32 *pp = data ? data->scream_pos : &p;
371
372 if (PL_screamfirst[BmRARE(c)] >= 0
373 || ( BmRARE(c) == '\n'
374 && (BmPREVIOUS(c) == SvCUR(c) - 1)
375 && SvTAIL(c) ))
376 s = screaminstr(sv, prog->check_substr,
377 start_shift + (strpos - strbeg), end_shift, pp, 0);
378 else
379 s = Nullch;
380 if (data)
381 *data->scream_olds = s;
382 }
383 else
384 s = fbm_instr((unsigned char*)s + start_shift,
385 (unsigned char*)strend - end_shift,
386 prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
387
388 /* Update the count-of-usability, remove useless subpatterns,
389 unshift s. */
390 finish:
391 if (!s) {
392 ++BmUSEFUL(prog->check_substr); /* hooray */
393 goto fail; /* not present */
394 }
395 else if (s - strpos > prog->check_offset_max &&
396 ((prog->reganch & ROPT_UTF8)
397 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
398 && t >= strpos)
399 : (t = s - prog->check_offset_max) != 0) ) {
400 if (ml_anch && t[-1] != '\n') {
401 find_anchor:
402 while (t < strend - end_shift - prog->minlen) {
403 if (*t == '\n') {
404 if (t < s - prog->check_offset_min) {
405 s = t + 1;
406 goto set_useful;
407 }
408 s = t + 1;
409 goto restart;
410 }
411 t++;
412 }
413 s = Nullch;
414 goto finish;
415 }
416 s = t;
417 set_useful:
418 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
419 }
420 else {
421 if (ml_anch && sv
422 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
423 t = strpos;
424 goto find_anchor;
425 }
426 if (!(prog->reganch & ROPT_NAUGHTY)
427 && --BmUSEFUL(prog->check_substr) < 0
428 && prog->check_substr == prog->float_substr) { /* boo */
429 /* If flags & SOMETHING - do not do it many times on the same match */
430 SvREFCNT_dec(prog->check_substr);
431 prog->check_substr = Nullsv; /* disable */
432 prog->float_substr = Nullsv; /* clear */
433 s = strpos;
434 prog->reganch &= ~RE_USE_INTUIT;
435 }
436 else
437 s = strpos;
438 }
439
440 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
441 PL_colors[4],PL_colors[5], (long)(s - strpos)) );
442 return s;
443 fail:
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
445 PL_colors[4],PL_colors[5]));
446 return Nullch;
447}
9661b544 448
a687059c 449/*
c277df42 450 - regexec_flags - match a regexp against a string
a687059c 451 */
79072805 452I32
864dbfa3 453Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 454 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
455/* strend: pointer to null at end of string */
456/* strbeg: real beginning of string */
457/* minend: end of match must be >=minend after stringarg. */
458/* data: May be used for some additional optimizations. */
459/* nosave: For optimizations. */
a687059c 460{
5c0ca799 461 dTHR;
a0d0e21e 462 register char *s;
c277df42 463 register regnode *c;
a0d0e21e
LW
464 register char *startpos = stringarg;
465 register I32 tmp;
c277df42 466 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
467 I32 dontbother = 0; /* how many characters not to try at end */
468 CURCUR cc;
c277df42 469 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
470 constant substr. */ /* CC */
471 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
472 I32 scream_pos = -1; /* Internal iterator of scream. */
473 char *scream_olds;
3280af22 474 SV* oreplsv = GvSV(PL_replgv);
a687059c 475
a0d0e21e 476 cc.cur = 0;
4633a7c4 477 cc.oldcc = 0;
3280af22 478 PL_regcc = &cc;
a0d0e21e 479
0f5d15d6 480 cache_re(prog);
a0d0e21e 481#ifdef DEBUGGING
3280af22 482 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
483#endif
484
485 /* Be paranoid... */
486 if (prog == NULL || startpos == NULL) {
cea2e8a9 487 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
488 return 0;
489 }
490
c277df42
IZ
491 minlen = prog->minlen;
492 if (strend - startpos < minlen) goto phooey;
493
a0d0e21e 494 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 495 PL_regprev = '\n';
a0d0e21e 496 else {
a0ed51b3 497 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
498 if (!PL_multiline && PL_regprev == '\n')
499 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 500 }
bbce6d69 501
a0d0e21e 502 /* Check validity of program. */
22c35a8c 503 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 504 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
505 }
506
3280af22
NIS
507 PL_reg_flags = 0;
508 PL_reg_eval_set = 0;
a0d0e21e 509
a0ed51b3
LW
510 if (prog->reganch & ROPT_UTF8)
511 PL_reg_flags |= RF_utf8;
512
513 /* Mark beginning of line for ^ and lookbehind. */
514 PL_regbol = startpos;
515 PL_bostr = strbeg;
9661b544 516 PL_reg_sv = sv;
a0ed51b3
LW
517
518 /* Mark end of line for $ (and such) */
519 PL_regeol = strend;
520
521 /* see how far we have to get to not match where we matched before */
522 PL_regtill = startpos+minend;
523
0f5d15d6
IZ
524 /* We start without call_cc context. */
525 PL_reg_call_cc = 0;
526
a0d0e21e
LW
527 /* If there is a "must appear" string, look for it. */
528 s = startpos;
cad2e5aa
JH
529
530 if (prog->reganch & ROPT_GPOS_SEEN) {
531 MAGIC *mg;
532
533 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
534 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
535 PL_reg_ganch = strbeg + mg->mg_len;
a0d0e21e 536 else
cad2e5aa
JH
537 PL_reg_ganch = startpos;
538 if (prog->reganch & ROPT_ANCH_GPOS) {
539 if (s > PL_reg_ganch)
540 goto phooey;
541 s = PL_reg_ganch;
a0ed51b3 542 }
a0d0e21e 543 }
a687059c 544
cad2e5aa
JH
545 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
546 re_scream_pos_data d;
547
548 d.scream_olds = &scream_olds;
549 d.scream_pos = &scream_pos;
550 s = re_intuit_start(prog, sv, s, strend, flags, &d);
551 if (!s)
552 goto phooey; /* not present */
553 }
554
555 DEBUG_r( if (!PL_colorset) reginitcolors() );
556 DEBUG_r(PerlIO_printf(Perl_debug_log,
8d300b32
GS
557 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
558 PL_colors[4],PL_colors[5],PL_colors[0],
559 prog->precomp,
560 PL_colors[1],
c277df42 561 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 562 PL_colors[0],
c277df42 563 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 564 startpos, PL_colors[1],
c277df42
IZ
565 (strend - startpos > 60 ? "..." : ""))
566 );
567
a0d0e21e 568 /* Simplest case: anchored match need be tried only once. */
774d564b 569 /* [unless only anchor is BOL and multiline is set] */
22e551b9 570 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 571 if (s == startpos && regtry(prog, startpos))
a0d0e21e 572 goto got_it;
22e551b9
IZ
573 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
574 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 575 {
cad2e5aa
JH
576 char *end;
577
a0d0e21e
LW
578 if (minlen)
579 dontbother = minlen - 1;
cad2e5aa 580 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 581 /* for multiline we only have to try after newlines */
cad2e5aa
JH
582 if (prog->check_substr) {
583 while (1) {
584 if (regtry(prog, s))
a0d0e21e 585 goto got_it;
cad2e5aa
JH
586 if (s >= end)
587 goto phooey;
588 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
589 if (!s)
590 goto phooey;
591 }
592 } else {
593 if (s > startpos)
594 s--;
595 while (s < end) {
596 if (*s++ == '\n') { /* don't need PL_utf8skip here */
597 if (regtry(prog, s))
598 goto got_it;
599 }
600 }
35c8bce7 601 }
35c8bce7 602 }
a0d0e21e 603 goto phooey;
22e551b9
IZ
604 } else if (prog->reganch & ROPT_ANCH_GPOS) {
605 if (regtry(prog, PL_reg_ganch))
606 goto got_it;
607 goto phooey;
a0d0e21e 608 }
35c8bce7 609
a0d0e21e 610 /* Messy cases: unanchored match. */
c277df42
IZ
611 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
612 /* we have /x+whatever/ */
cad2e5aa 613 /* it must be a one character string (XXXX Except UTF?) */
c277df42 614 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
615 if (UTF) {
616 while (s < strend) {
617 if (*s == ch) {
618 if (regtry(prog, s)) goto got_it;
619 s += UTF8SKIP(s);
620 while (s < strend && *s == ch)
621 s += UTF8SKIP(s);
622 }
623 s += UTF8SKIP(s);
624 }
625 }
626 else {
627 while (s < strend) {
628 if (*s == ch) {
629 if (regtry(prog, s)) goto got_it;
c277df42 630 s++;
a0ed51b3
LW
631 while (s < strend && *s == ch)
632 s++;
633 }
634 s++;
a0d0e21e 635 }
a687059c 636 }
c277df42
IZ
637 }
638 /*SUPPRESS 560*/
639 else if (prog->anchored_substr != Nullsv
640 || (prog->float_substr != Nullsv
641 && prog->float_max_offset < strend - s)) {
642 SV *must = prog->anchored_substr
643 ? prog->anchored_substr : prog->float_substr;
644 I32 back_max =
645 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
646 I32 back_min =
647 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
648 I32 delta = back_max - back_min;
cf93c79d 649 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
650 -(I32)(CHR_SVLEN(must)
651 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
652 char *last1; /* Last position checked before */
653
654 if (s > PL_bostr)
dfe13c55 655 last1 = HOPc(s, -1);
a0ed51b3
LW
656 else
657 last1 = s - 1; /* bogus */
c277df42
IZ
658
659 /* XXXX check_substr already used to find `s', can optimize if
660 check_substr==must. */
661 scream_pos = -1;
662 dontbother = end_shift;
dfe13c55 663 strend = HOPc(strend, -dontbother);
c277df42 664 while ( (s <= last) &&
22e551b9
IZ
665 ((flags & REXEC_SCREAM)
666 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 667 end_shift, &scream_pos, 0))
a0ed51b3 668 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
669 (unsigned char*)strend, must,
670 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
671 if (HOPc(s, -back_max) > last1) {
672 last1 = HOPc(s, -back_min);
673 s = HOPc(s, -back_max);
a0ed51b3
LW
674 }
675 else {
dfe13c55 676 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 677
dfe13c55 678 last1 = HOPc(s, -back_min);
c277df42 679 s = t;
a0d0e21e 680 }
a0ed51b3
LW
681 if (UTF) {
682 while (s <= last1) {
683 if (regtry(prog, s))
684 goto got_it;
685 s += UTF8SKIP(s);
686 }
687 }
688 else {
689 while (s <= last1) {
690 if (regtry(prog, s))
691 goto got_it;
692 s++;
693 }
a0d0e21e
LW
694 }
695 }
696 goto phooey;
a0ed51b3
LW
697 }
698 else if (c = prog->regstclass) {
a0d0e21e 699 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 700 char *cc;
a687059c 701
a0d0e21e
LW
702 if (minlen)
703 dontbother = minlen - 1;
dfe13c55 704 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
705 tmp = 1;
706 /* We know what class it must start with. */
707 switch (OP(c)) {
a0ed51b3
LW
708 case ANYOFUTF8:
709 cc = (char *) OPERAND(c);
710 while (s < strend) {
711 if (REGINCLASSUTF8(c, (U8*)s)) {
712 if (tmp && regtry(prog, s))
713 goto got_it;
714 else
715 tmp = doevery;
716 }
717 else
718 tmp = 1;
719 s += UTF8SKIP(s);
720 }
721 break;
a0d0e21e 722 case ANYOF:
a0ed51b3 723 cc = (char *) OPERAND(c);
a0d0e21e 724 while (s < strend) {
a0ed51b3 725 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
726 if (tmp && regtry(prog, s))
727 goto got_it;
728 else
729 tmp = doevery;
a687059c 730 }
a0d0e21e
LW
731 else
732 tmp = 1;
733 s++;
734 }
735 break;
bbce6d69 736 case BOUNDL:
3280af22 737 PL_reg_flags |= RF_tainted;
bbce6d69 738 /* FALL THROUGH */
a0d0e21e 739 case BOUND:
a0ed51b3
LW
740 if (minlen) {
741 dontbother++;
742 strend -= 1;
743 }
3280af22 744 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 745 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 746 while (s < strend) {
95bac841 747 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
748 tmp = !tmp;
749 if (regtry(prog, s))
750 goto got_it;
a687059c 751 }
a0d0e21e
LW
752 s++;
753 }
754 if ((minlen || tmp) && regtry(prog,s))
755 goto got_it;
756 break;
a0ed51b3
LW
757 case BOUNDLUTF8:
758 PL_reg_flags |= RF_tainted;
759 /* FALL THROUGH */
760 case BOUNDUTF8:
761 if (minlen) {
762 dontbother++;
dfe13c55 763 strend = reghop_c(strend, -1);
a0ed51b3 764 }
dfe13c55 765 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
766 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
767 while (s < strend) {
dfe13c55
GS
768 if (tmp == !(OP(c) == BOUND ?
769 swash_fetch(PL_utf8_alnum, (U8*)s) :
770 isALNUM_LC_utf8((U8*)s)))
771 {
a0ed51b3
LW
772 tmp = !tmp;
773 if (regtry(prog, s))
774 goto got_it;
775 }
776 s += UTF8SKIP(s);
777 }
778 if ((minlen || tmp) && regtry(prog,s))
779 goto got_it;
780 break;
bbce6d69 781 case NBOUNDL:
3280af22 782 PL_reg_flags |= RF_tainted;
bbce6d69 783 /* FALL THROUGH */
a0d0e21e 784 case NBOUND:
a0ed51b3
LW
785 if (minlen) {
786 dontbother++;
787 strend -= 1;
788 }
3280af22 789 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 790 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 791 while (s < strend) {
95bac841 792 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
793 tmp = !tmp;
794 else if (regtry(prog, s))
795 goto got_it;
796 s++;
797 }
798 if ((minlen || !tmp) && regtry(prog,s))
799 goto got_it;
800 break;
a0ed51b3
LW
801 case NBOUNDLUTF8:
802 PL_reg_flags |= RF_tainted;
803 /* FALL THROUGH */
804 case NBOUNDUTF8:
805 if (minlen) {
806 dontbother++;
dfe13c55 807 strend = reghop_c(strend, -1);
a0ed51b3 808 }
dfe13c55 809 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
810 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
811 while (s < strend) {
dfe13c55
GS
812 if (tmp == !(OP(c) == NBOUND ?
813 swash_fetch(PL_utf8_alnum, (U8*)s) :
814 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
815 tmp = !tmp;
816 else if (regtry(prog, s))
817 goto got_it;
818 s += UTF8SKIP(s);
819 }
820 if ((minlen || !tmp) && regtry(prog,s))
821 goto got_it;
822 break;
a0d0e21e
LW
823 case ALNUM:
824 while (s < strend) {
bbce6d69 825 if (isALNUM(*s)) {
826 if (tmp && regtry(prog, s))
827 goto got_it;
828 else
829 tmp = doevery;
830 }
831 else
832 tmp = 1;
833 s++;
834 }
835 break;
a0ed51b3
LW
836 case ALNUMUTF8:
837 while (s < strend) {
dfe13c55 838 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
839 if (tmp && regtry(prog, s))
840 goto got_it;
841 else
842 tmp = doevery;
843 }
844 else
845 tmp = 1;
846 s += UTF8SKIP(s);
847 }
848 break;
bbce6d69 849 case ALNUML:
3280af22 850 PL_reg_flags |= RF_tainted;
bbce6d69 851 while (s < strend) {
852 if (isALNUM_LC(*s)) {
a0d0e21e
LW
853 if (tmp && regtry(prog, s))
854 goto got_it;
a687059c 855 else
a0d0e21e
LW
856 tmp = doevery;
857 }
858 else
859 tmp = 1;
860 s++;
861 }
862 break;
a0ed51b3
LW
863 case ALNUMLUTF8:
864 PL_reg_flags |= RF_tainted;
865 while (s < strend) {
dfe13c55 866 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
867 if (tmp && regtry(prog, s))
868 goto got_it;
869 else
870 tmp = doevery;
871 }
872 else
873 tmp = 1;
874 s += UTF8SKIP(s);
875 }
876 break;
a0d0e21e
LW
877 case NALNUM:
878 while (s < strend) {
bbce6d69 879 if (!isALNUM(*s)) {
880 if (tmp && regtry(prog, s))
881 goto got_it;
882 else
883 tmp = doevery;
884 }
885 else
886 tmp = 1;
887 s++;
888 }
889 break;
a0ed51b3
LW
890 case NALNUMUTF8:
891 while (s < strend) {
dfe13c55 892 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
893 if (tmp && regtry(prog, s))
894 goto got_it;
895 else
896 tmp = doevery;
897 }
898 else
899 tmp = 1;
900 s += UTF8SKIP(s);
901 }
902 break;
bbce6d69 903 case NALNUML:
3280af22 904 PL_reg_flags |= RF_tainted;
bbce6d69 905 while (s < strend) {
906 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
907 if (tmp && regtry(prog, s))
908 goto got_it;
a687059c 909 else
a0d0e21e 910 tmp = doevery;
a687059c 911 }
a0d0e21e
LW
912 else
913 tmp = 1;
914 s++;
915 }
916 break;
a0ed51b3
LW
917 case NALNUMLUTF8:
918 PL_reg_flags |= RF_tainted;
919 while (s < strend) {
dfe13c55 920 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
921 if (tmp && regtry(prog, s))
922 goto got_it;
923 else
924 tmp = doevery;
925 }
926 else
927 tmp = 1;
928 s += UTF8SKIP(s);
929 }
930 break;
a0d0e21e
LW
931 case SPACE:
932 while (s < strend) {
933 if (isSPACE(*s)) {
934 if (tmp && regtry(prog, s))
935 goto got_it;
936 else
937 tmp = doevery;
2304df62 938 }
a0d0e21e
LW
939 else
940 tmp = 1;
941 s++;
942 }
943 break;
a0ed51b3
LW
944 case SPACEUTF8:
945 while (s < strend) {
dfe13c55 946 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
947 if (tmp && regtry(prog, s))
948 goto got_it;
949 else
950 tmp = doevery;
951 }
952 else
953 tmp = 1;
954 s += UTF8SKIP(s);
955 }
956 break;
bbce6d69 957 case SPACEL:
3280af22 958 PL_reg_flags |= RF_tainted;
bbce6d69 959 while (s < strend) {
960 if (isSPACE_LC(*s)) {
961 if (tmp && regtry(prog, s))
962 goto got_it;
963 else
964 tmp = doevery;
965 }
966 else
967 tmp = 1;
968 s++;
969 }
970 break;
a0ed51b3
LW
971 case SPACELUTF8:
972 PL_reg_flags |= RF_tainted;
973 while (s < strend) {
dfe13c55 974 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
975 if (tmp && regtry(prog, s))
976 goto got_it;
977 else
978 tmp = doevery;
979 }
980 else
981 tmp = 1;
982 s += UTF8SKIP(s);
983 }
984 break;
a0d0e21e
LW
985 case NSPACE:
986 while (s < strend) {
987 if (!isSPACE(*s)) {
988 if (tmp && regtry(prog, s))
989 goto got_it;
990 else
991 tmp = doevery;
a687059c 992 }
a0d0e21e
LW
993 else
994 tmp = 1;
995 s++;
996 }
997 break;
a0ed51b3
LW
998 case NSPACEUTF8:
999 while (s < strend) {
dfe13c55 1000 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
1001 if (tmp && regtry(prog, s))
1002 goto got_it;
1003 else
1004 tmp = doevery;
1005 }
1006 else
1007 tmp = 1;
1008 s += UTF8SKIP(s);
1009 }
1010 break;
bbce6d69 1011 case NSPACEL:
3280af22 1012 PL_reg_flags |= RF_tainted;
bbce6d69 1013 while (s < strend) {
1014 if (!isSPACE_LC(*s)) {
1015 if (tmp && regtry(prog, s))
1016 goto got_it;
1017 else
1018 tmp = doevery;
1019 }
1020 else
1021 tmp = 1;
1022 s++;
1023 }
1024 break;
a0ed51b3
LW
1025 case NSPACELUTF8:
1026 PL_reg_flags |= RF_tainted;
1027 while (s < strend) {
dfe13c55 1028 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1029 if (tmp && regtry(prog, s))
1030 goto got_it;
1031 else
1032 tmp = doevery;
1033 }
1034 else
1035 tmp = 1;
1036 s += UTF8SKIP(s);
1037 }
1038 break;
a0d0e21e
LW
1039 case DIGIT:
1040 while (s < strend) {
1041 if (isDIGIT(*s)) {
1042 if (tmp && regtry(prog, s))
1043 goto got_it;
1044 else
1045 tmp = doevery;
2b69d0c2 1046 }
a0d0e21e
LW
1047 else
1048 tmp = 1;
1049 s++;
1050 }
1051 break;
a0ed51b3
LW
1052 case DIGITUTF8:
1053 while (s < strend) {
dfe13c55 1054 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1055 if (tmp && regtry(prog, s))
1056 goto got_it;
1057 else
1058 tmp = doevery;
1059 }
1060 else
1061 tmp = 1;
1062 s += UTF8SKIP(s);
1063 }
1064 break;
b8c5462f
JH
1065 case DIGITL:
1066 PL_reg_flags |= RF_tainted;
1067 while (s < strend) {
1068 if (isDIGIT_LC(*s)) {
1069 if (tmp && regtry(prog, s))
1070 goto got_it;
1071 else
1072 tmp = doevery;
1073 }
1074 else
1075 tmp = 1;
1076 s++;
1077 }
1078 break;
1079 case DIGITLUTF8:
1080 PL_reg_flags |= RF_tainted;
1081 while (s < strend) {
1082 if (isDIGIT_LC_utf8((U8*)s)) {
1083 if (tmp && regtry(prog, s))
1084 goto got_it;
1085 else
1086 tmp = doevery;
1087 }
1088 else
1089 tmp = 1;
1090 s += UTF8SKIP(s);
1091 }
1092 break;
a0d0e21e
LW
1093 case NDIGIT:
1094 while (s < strend) {
1095 if (!isDIGIT(*s)) {
1096 if (tmp && regtry(prog, s))
1097 goto got_it;
1098 else
1099 tmp = doevery;
a687059c 1100 }
a0d0e21e
LW
1101 else
1102 tmp = 1;
1103 s++;
1104 }
1105 break;
a0ed51b3
LW
1106 case NDIGITUTF8:
1107 while (s < strend) {
dfe13c55 1108 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1109 if (tmp && regtry(prog, s))
1110 goto got_it;
1111 else
1112 tmp = doevery;
1113 }
1114 else
1115 tmp = 1;
1116 s += UTF8SKIP(s);
1117 }
1118 break;
b8c5462f
JH
1119 case NDIGITL:
1120 PL_reg_flags |= RF_tainted;
1121 while (s < strend) {
1122 if (!isDIGIT_LC(*s)) {
1123 if (tmp && regtry(prog, s))
1124 goto got_it;
1125 else
1126 tmp = doevery;
1127 }
1128 else
1129 tmp = 1;
1130 s++;
a0ed51b3 1131 }
b8c5462f
JH
1132 break;
1133 case NDIGITLUTF8:
1134 PL_reg_flags |= RF_tainted;
1135 while (s < strend) {
1136 if (!isDIGIT_LC_utf8((U8*)s)) {
1137 if (tmp && regtry(prog, s))
1138 goto got_it;
cf93c79d 1139 else
b8c5462f
JH
1140 tmp = doevery;
1141 }
1142 else
1143 tmp = 1;
1144 s += UTF8SKIP(s);
1145 }
1146 break;
1147 case ALNUMC:
1148 while (s < strend) {
1149 if (isALNUMC(*s)) {
1150 if (tmp && regtry(prog, s))
1151 goto got_it;
cf93c79d 1152 else
b8c5462f 1153 tmp = doevery;
cf93c79d 1154 }
b8c5462f
JH
1155 else
1156 tmp = 1;
1157 s++;
c277df42 1158 }
b8c5462f
JH
1159 break;
1160 case ALNUMCUTF8:
1161 while (s < strend) {
1162 if (swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1163 if (tmp && regtry(prog, s))
1164 goto got_it;
1165 else
1166 tmp = doevery;
1167 }
1168 else
1169 tmp = 1;
a0ed51b3 1170 s += UTF8SKIP(s);
9661b544 1171 }
b8c5462f
JH
1172 break;
1173 case ALNUMCL:
1174 PL_reg_flags |= RF_tainted;
1175 while (s < strend) {
1176 if (isALNUMC_LC(*s)) {
1177 if (tmp && regtry(prog, s))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
1182 else
1183 tmp = 1;
1184 s++;
9661b544 1185 }
b8c5462f
JH
1186 break;
1187 case ALNUMCLUTF8:
1188 PL_reg_flags |= RF_tainted;
1189 while (s < strend) {
1190 if (isALNUMC_LC_utf8((U8*)s)) {
1191 if (tmp && regtry(prog, s))
1192 goto got_it;
1193 else
1194 tmp = doevery;
1195 }
1196 else
1197 tmp = 1;
1198 s += UTF8SKIP(s);
1199 }
1200 break;
1201 case NALNUMC:
1202 while (s < strend) {
1203 if (!isALNUMC(*s)) {
1204 if (tmp && regtry(prog, s))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
1209 else
1210 tmp = 1;
1211 s++;
1212 }
1213 break;
1214 case NALNUMCUTF8:
1215 while (s < strend) {
1216 if (!swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1217 if (tmp && regtry(prog, s))
1218 goto got_it;
1219 else
1220 tmp = doevery;
1221 }
1222 else
1223 tmp = 1;
1224 s += UTF8SKIP(s);
1225 }
1226 break;
1227 case NALNUMCL:
1228 PL_reg_flags |= RF_tainted;
1229 while (s < strend) {
1230 if (!isALNUMC_LC(*s)) {
1231 if (tmp && regtry(prog, s))
1232 goto got_it;
1233 else
1234 tmp = doevery;
1235 }
1236 else
1237 tmp = 1;
1238 s++;
1239 }
1240 break;
1241 case NALNUMCLUTF8:
1242 PL_reg_flags |= RF_tainted;
1243 while (s < strend) {
1244 if (!isALNUMC_LC_utf8((U8*)s)) {
1245 if (tmp && regtry(prog, s))
1246 goto got_it;
1247 else
1248 tmp = doevery;
1249 }
1250 else
1251 tmp = 1;
1252 s += UTF8SKIP(s);
1253 }
1254 break;
1255 case ASCII:
1256 while (s < strend) {
f248d071 1257 if (isASCII(*(U8*)s)) {
b8c5462f
JH
1258 if (tmp && regtry(prog, s))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
1263 else
1264 tmp = 1;
1265 s++;
1266 }
1267 break;
1268 case NASCII:
1269 while (s < strend) {
f248d071 1270 if (!isASCII(*(U8*)s)) {
b8c5462f
JH
1271 if (tmp && regtry(prog, s))
1272 goto got_it;
1273 else
1274 tmp = doevery;
1275 }
1276 else
1277 tmp = 1;
1278 s++;
1279 }
1280 break;
1281 case CNTRL:
1282 while (s < strend) {
1283 if (isCNTRL(*s)) {
1284 if (tmp && regtry(prog, s))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s++;
1292 }
1293 break;
1294 case CNTRLUTF8:
1295 while (s < strend) {
1296 if (swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1297 if (tmp && regtry(prog, s))
1298 goto got_it;
1299 else
1300 tmp = doevery;
1301 }
1302 else
1303 tmp = 1;
1304 s += UTF8SKIP(s);
1305 }
1306 break;
1307 case CNTRLL:
1308 PL_reg_flags |= RF_tainted;
1309 while (s < strend) {
1310 if (isCNTRL_LC(*s)) {
1311 if (tmp && regtry(prog, s))
1312 goto got_it;
1313 else
1314 tmp = doevery;
1315 }
1316 else
1317 tmp = 1;
1318 s++;
1319 }
1320 break;
1321 case CNTRLLUTF8:
1322 PL_reg_flags |= RF_tainted;
1323 while (s < strend) {
1324 if (*s == ' ' || isCNTRL_LC_utf8((U8*)s)) {
1325 if (tmp && regtry(prog, s))
1326 goto got_it;
1327 else
1328 tmp = doevery;
1329 }
1330 else
1331 tmp = 1;
1332 s += UTF8SKIP(s);
1333 }
1334 break;
1335 case NCNTRL:
1336 while (s < strend) {
1337 if (!isCNTRL(*s)) {
1338 if (tmp && regtry(prog, s))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
1343 else
1344 tmp = 1;
1345 s++;
1346 }
1347 break;
1348 case NCNTRLUTF8:
1349 while (s < strend) {
1350 if (!swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1351 if (tmp && regtry(prog, s))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
1356 else
1357 tmp = 1;
1358 s += UTF8SKIP(s);
1359 }
1360 break;
1361 case NCNTRLL:
1362 PL_reg_flags |= RF_tainted;
1363 while (s < strend) {
1364 if (!isCNTRL_LC(*s)) {
1365 if (tmp && regtry(prog, s))
1366 goto got_it;
1367 else
1368 tmp = doevery;
1369 }
1370 else
1371 tmp = 1;
1372 s++;
1373 }
1374 break;
1375 case NCNTRLLUTF8:
1376 PL_reg_flags |= RF_tainted;
1377 while (s < strend) {
1378 if (!isCNTRL_LC_utf8((U8*)s)) {
1379 if (tmp && regtry(prog, s))
1380 goto got_it;
1381 else
1382 tmp = doevery;
1383 }
1384 else
1385 tmp = 1;
1386 s += UTF8SKIP(s);
1387 }
1388 break;
1389 case GRAPH:
1390 while (s < strend) {
1391 if (isGRAPH(*s)) {
1392 if (tmp && regtry(prog, s))
1393 goto got_it;
1394 else
1395 tmp = doevery;
1396 }
1397 else
1398 tmp = 1;
1399 s++;
1400 }
1401 break;
1402 case GRAPHUTF8:
1403 while (s < strend) {
1404 if (swash_fetch(PL_utf8_graph,(U8*)s)) {
1405 if (tmp && regtry(prog, s))
1406 goto got_it;
1407 else
1408 tmp = doevery;
1409 }
1410 else
1411 tmp = 1;
1412 s += UTF8SKIP(s);
1413 }
1414 break;
1415 case GRAPHL:
1416 PL_reg_flags |= RF_tainted;
1417 while (s < strend) {
1418 if (isGRAPH_LC(*s)) {
1419 if (tmp && regtry(prog, s))
1420 goto got_it;
1421 else
1422 tmp = doevery;
1423 }
1424 else
1425 tmp = 1;
1426 s++;
1427 }
1428 break;
1429 case GRAPHLUTF8:
1430 PL_reg_flags |= RF_tainted;
1431 while (s < strend) {
1432 if (*s == ' ' || isGRAPH_LC_utf8((U8*)s)) {
1433 if (tmp && regtry(prog, s))
1434 goto got_it;
1435 else
1436 tmp = doevery;
1437 }
1438 else
1439 tmp = 1;
1440 s += UTF8SKIP(s);
1441 }
1442 break;
1443 case NGRAPH:
1444 while (s < strend) {
1445 if (!isGRAPH(*s)) {
1446 if (tmp && regtry(prog, s))
1447 goto got_it;
1448 else
1449 tmp = doevery;
1450 }
1451 else
1452 tmp = 1;
1453 s++;
1454 }
1455 break;
1456 case NGRAPHUTF8:
1457 while (s < strend) {
1458 if (!swash_fetch(PL_utf8_graph,(U8*)s)) {
1459 if (tmp && regtry(prog, s))
1460 goto got_it;
1461 else
1462 tmp = doevery;
1463 }
1464 else
1465 tmp = 1;
1466 s += UTF8SKIP(s);
1467 }
1468 break;
1469 case NGRAPHL:
1470 PL_reg_flags |= RF_tainted;
1471 while (s < strend) {
1472 if (!isGRAPH_LC(*s)) {
1473 if (tmp && regtry(prog, s))
1474 goto got_it;
1475 else
1476 tmp = doevery;
1477 }
1478 else
1479 tmp = 1;
1480 s++;
1481 }
1482 break;
1483 case NGRAPHLUTF8:
1484 PL_reg_flags |= RF_tainted;
1485 while (s < strend) {
1486 if (!isGRAPH_LC_utf8((U8*)s)) {
1487 if (tmp && regtry(prog, s))
1488 goto got_it;
1489 else
1490 tmp = doevery;
1491 }
1492 else
1493 tmp = 1;
1494 s += UTF8SKIP(s);
1495 }
1496 break;
1497 case LOWER:
1498 while (s < strend) {
1499 if (isLOWER(*s)) {
1500 if (tmp && regtry(prog, s))
1501 goto got_it;
1502 else
1503 tmp = doevery;
1504 }
1505 else
1506 tmp = 1;
1507 s++;
1508 }
1509 break;
1510 case LOWERUTF8:
1511 while (s < strend) {
1512 if (swash_fetch(PL_utf8_lower,(U8*)s)) {
1513 if (tmp && regtry(prog, s))
1514 goto got_it;
1515 else
1516 tmp = doevery;
1517 }
1518 else
1519 tmp = 1;
1520 s += UTF8SKIP(s);
1521 }
1522 break;
1523 case LOWERL:
1524 PL_reg_flags |= RF_tainted;
1525 while (s < strend) {
1526 if (isLOWER_LC(*s)) {
1527 if (tmp && regtry(prog, s))
1528 goto got_it;
1529 else
1530 tmp = doevery;
1531 }
1532 else
1533 tmp = 1;
1534 s++;
1535 }
1536 break;
1537 case LOWERLUTF8:
1538 PL_reg_flags |= RF_tainted;
1539 while (s < strend) {
1540 if (*s == ' ' || isLOWER_LC_utf8((U8*)s)) {
1541 if (tmp && regtry(prog, s))
1542 goto got_it;
1543 else
1544 tmp = doevery;
1545 }
1546 else
1547 tmp = 1;
1548 s += UTF8SKIP(s);
1549 }
1550 break;
1551 case NLOWER:
1552 while (s < strend) {
1553 if (!isLOWER(*s)) {
1554 if (tmp && regtry(prog, s))
1555 goto got_it;
1556 else
1557 tmp = doevery;
1558 }
1559 else
1560 tmp = 1;
1561 s++;
1562 }
1563 break;
1564 case NLOWERUTF8:
1565 while (s < strend) {
1566 if (!swash_fetch(PL_utf8_lower,(U8*)s)) {
1567 if (tmp && regtry(prog, s))
1568 goto got_it;
1569 else
1570 tmp = doevery;
1571 }
1572 else
1573 tmp = 1;
1574 s += UTF8SKIP(s);
1575 }
1576 break;
1577 case NLOWERL:
1578 PL_reg_flags |= RF_tainted;
1579 while (s < strend) {
1580 if (!isLOWER_LC(*s)) {
1581 if (tmp && regtry(prog, s))
1582 goto got_it;
1583 else
1584 tmp = doevery;
1585 }
1586 else
1587 tmp = 1;
1588 s++;
1589 }
1590 break;
1591 case NLOWERLUTF8:
1592 PL_reg_flags |= RF_tainted;
1593 while (s < strend) {
1594 if (!isLOWER_LC_utf8((U8*)s)) {
1595 if (tmp && regtry(prog, s))
1596 goto got_it;
1597 else
1598 tmp = doevery;
1599 }
1600 else
1601 tmp = 1;
1602 s += UTF8SKIP(s);
1603 }
1604 break;
1605 case PRINT:
1606 while (s < strend) {
1607 if (isPRINT(*s)) {
1608 if (tmp && regtry(prog, s))
1609 goto got_it;
1610 else
1611 tmp = doevery;
1612 }
1613 else
1614 tmp = 1;
1615 s++;
1616 }
1617 break;
1618 case PRINTUTF8:
1619 while (s < strend) {
1620 if (swash_fetch(PL_utf8_print,(U8*)s)) {
1621 if (tmp && regtry(prog, s))
1622 goto got_it;
1623 else
1624 tmp = doevery;
1625 }
1626 else
1627 tmp = 1;
1628 s += UTF8SKIP(s);
1629 }
1630 break;
1631 case PRINTL:
1632 PL_reg_flags |= RF_tainted;
1633 while (s < strend) {
1634 if (isPRINT_LC(*s)) {
1635 if (tmp && regtry(prog, s))
1636 goto got_it;
1637 else
1638 tmp = doevery;
1639 }
1640 else
1641 tmp = 1;
1642 s++;
1643 }
1644 break;
1645 case PRINTLUTF8:
1646 PL_reg_flags |= RF_tainted;
1647 while (s < strend) {
1648 if (*s == ' ' || isPRINT_LC_utf8((U8*)s)) {
1649 if (tmp && regtry(prog, s))
1650 goto got_it;
1651 else
1652 tmp = doevery;
1653 }
1654 else
1655 tmp = 1;
1656 s += UTF8SKIP(s);
1657 }
1658 break;
1659 case NPRINT:
1660 while (s < strend) {
1661 if (!isPRINT(*s)) {
1662 if (tmp && regtry(prog, s))
1663 goto got_it;
1664 else
1665 tmp = doevery;
1666 }
1667 else
1668 tmp = 1;
1669 s++;
1670 }
1671 break;
1672 case NPRINTUTF8:
1673 while (s < strend) {
1674 if (!swash_fetch(PL_utf8_print,(U8*)s)) {
1675 if (tmp && regtry(prog, s))
1676 goto got_it;
1677 else
1678 tmp = doevery;
1679 }
1680 else
1681 tmp = 1;
1682 s += UTF8SKIP(s);
1683 }
1684 break;
1685 case NPRINTL:
1686 PL_reg_flags |= RF_tainted;
1687 while (s < strend) {
1688 if (!isPRINT_LC(*s)) {
1689 if (tmp && regtry(prog, s))
1690 goto got_it;
1691 else
1692 tmp = doevery;
1693 }
1694 else
1695 tmp = 1;
1696 s++;
1697 }
1698 break;
1699 case NPRINTLUTF8:
1700 PL_reg_flags |= RF_tainted;
1701 while (s < strend) {
1702 if (!isPRINT_LC_utf8((U8*)s)) {
1703 if (tmp && regtry(prog, s))
1704 goto got_it;
1705 else
1706 tmp = doevery;
1707 }
1708 else
1709 tmp = 1;
1710 s += UTF8SKIP(s);
1711 }
1712 break;
1713 case PUNCT:
1714 while (s < strend) {
1715 if (isPUNCT(*s)) {
1716 if (tmp && regtry(prog, s))
1717 goto got_it;
1718 else
1719 tmp = doevery;
1720 }
1721 else
1722 tmp = 1;
1723 s++;
1724 }
1725 break;
1726 case PUNCTUTF8:
1727 while (s < strend) {
1728 if (swash_fetch(PL_utf8_punct,(U8*)s)) {
1729 if (tmp && regtry(prog, s))
1730 goto got_it;
1731 else
1732 tmp = doevery;
1733 }
1734 else
1735 tmp = 1;
1736 s += UTF8SKIP(s);
1737 }
1738 break;
1739 case PUNCTL:
1740 PL_reg_flags |= RF_tainted;
1741 while (s < strend) {
1742 if (isPUNCT_LC(*s)) {
1743 if (tmp && regtry(prog, s))
1744 goto got_it;
1745 else
1746 tmp = doevery;
1747 }
1748 else
1749 tmp = 1;
1750 s++;
1751 }
1752 break;
1753 case PUNCTLUTF8:
1754 PL_reg_flags |= RF_tainted;
1755 while (s < strend) {
1756 if (*s == ' ' || isPUNCT_LC_utf8((U8*)s)) {
1757 if (tmp && regtry(prog, s))
1758 goto got_it;
1759 else
1760 tmp = doevery;
1761 }
1762 else
1763 tmp = 1;
1764 s += UTF8SKIP(s);
1765 }
1766 break;
1767 case NPUNCT:
1768 while (s < strend) {
1769 if (!isPUNCT(*s)) {
1770 if (tmp && regtry(prog, s))
1771 goto got_it;
1772 else
1773 tmp = doevery;
1774 }
1775 else
1776 tmp = 1;
1777 s++;
1778 }
1779 break;
1780 case NPUNCTUTF8:
1781 while (s < strend) {
1782 if (!swash_fetch(PL_utf8_punct,(U8*)s)) {
1783 if (tmp && regtry(prog, s))
1784 goto got_it;
1785 else
1786 tmp = doevery;
1787 }
1788 else
1789 tmp = 1;
1790 s += UTF8SKIP(s);
1791 }
1792 break;
1793 case NPUNCTL:
1794 PL_reg_flags |= RF_tainted;
1795 while (s < strend) {
1796 if (!isPUNCT_LC(*s)) {
1797 if (tmp && regtry(prog, s))
1798 goto got_it;
1799 else
1800 tmp = doevery;
1801 }
1802 else
1803 tmp = 1;
1804 s++;
1805 }
1806 break;
1807 case NPUNCTLUTF8:
1808 PL_reg_flags |= RF_tainted;
1809 while (s < strend) {
1810 if (!isPUNCT_LC_utf8((U8*)s)) {
1811 if (tmp && regtry(prog, s))
1812 goto got_it;
1813 else
1814 tmp = doevery;
1815 }
1816 else
1817 tmp = 1;
1818 s += UTF8SKIP(s);
1819 }
1820 break;
1821 case UPPER:
1822 while (s < strend) {
1823 if (isUPPER(*s)) {
1824 if (tmp && regtry(prog, s))
1825 goto got_it;
1826 else
1827 tmp = doevery;
1828 }
1829 else
1830 tmp = 1;
1831 s++;
1832 }
1833 break;
1834 case UPPERUTF8:
1835 while (s < strend) {
1836 if (swash_fetch(PL_utf8_upper,(U8*)s)) {
1837 if (tmp && regtry(prog, s))
1838 goto got_it;
1839 else
1840 tmp = doevery;
1841 }
1842 else
1843 tmp = 1;
1844 s += UTF8SKIP(s);
1845 }
1846 break;
1847 case UPPERL:
1848 PL_reg_flags |= RF_tainted;
1849 while (s < strend) {
1850 if (isUPPER_LC(*s)) {
1851 if (tmp && regtry(prog, s))
1852 goto got_it;
1853 else
1854 tmp = doevery;
1855 }
1856 else
1857 tmp = 1;
1858 s++;
1859 }
1860 break;
1861 case UPPERLUTF8:
1862 PL_reg_flags |= RF_tainted;
1863 while (s < strend) {
1864 if (*s == ' ' || isUPPER_LC_utf8((U8*)s)) {
1865 if (tmp && regtry(prog, s))
1866 goto got_it;
1867 else
1868 tmp = doevery;
1869 }
1870 else
1871 tmp = 1;
1872 s += UTF8SKIP(s);
1873 }
1874 break;
1875 case NUPPER:
1876 while (s < strend) {
1877 if (!isUPPER(*s)) {
1878 if (tmp && regtry(prog, s))
1879 goto got_it;
1880 else
1881 tmp = doevery;
1882 }
1883 else
1884 tmp = 1;
1885 s++;
1886 }
1887 break;
1888 case NUPPERUTF8:
1889 while (s < strend) {
1890 if (!swash_fetch(PL_utf8_upper,(U8*)s)) {
1891 if (tmp && regtry(prog, s))
1892 goto got_it;
1893 else
1894 tmp = doevery;
1895 }
1896 else
1897 tmp = 1;
1898 s += UTF8SKIP(s);
1899 }
1900 break;
1901 case NUPPERL:
1902 PL_reg_flags |= RF_tainted;
1903 while (s < strend) {
1904 if (!isUPPER_LC(*s)) {
1905 if (tmp && regtry(prog, s))
1906 goto got_it;
1907 else
1908 tmp = doevery;
1909 }
1910 else
1911 tmp = 1;
1912 s++;
1913 }
1914 break;
1915 case NUPPERLUTF8:
1916 PL_reg_flags |= RF_tainted;
1917 while (s < strend) {
1918 if (!isUPPER_LC_utf8((U8*)s)) {
1919 if (tmp && regtry(prog, s))
1920 goto got_it;
1921 else
1922 tmp = doevery;
1923 }
1924 else
1925 tmp = 1;
1926 s += UTF8SKIP(s);
1927 }
1928 break;
1929 case XDIGIT:
1930 while (s < strend) {
1931 if (isXDIGIT(*s)) {
1932 if (tmp && regtry(prog, s))
1933 goto got_it;
1934 else
1935 tmp = doevery;
1936 }
1937 else
1938 tmp = 1;
1939 s++;
1940 }
1941 break;
1942 case NXDIGIT:
1943 while (s < strend) {
1944 if (!isXDIGIT(*s)) {
1945 if (tmp && regtry(prog, s))
1946 goto got_it;
1947 else
1948 tmp = doevery;
1949 }
1950 else
1951 tmp = 1;
1952 s++;
1953 }
1954 break;
1955 }
1956 }
1957 else {
1958 dontbother = 0;
1959 if (prog->float_substr != Nullsv) { /* Trim the end. */
1960 char *last;
1961 I32 oldpos = scream_pos;
1962
1963 if (flags & REXEC_SCREAM) {
1964 last = screaminstr(sv, prog->float_substr, s - strbeg,
1965 end_shift, &scream_pos, 1); /* last one */
1966 if (!last)
1967 last = scream_olds; /* Only one occurence. */
1968 }
1969 else {
1970 STRLEN len;
1971 char *little = SvPV(prog->float_substr, len);
1972
1973 if (SvTAIL(prog->float_substr)) {
1974 if (memEQ(strend - len + 1, little, len - 1))
1975 last = strend - len + 1;
1976 else if (!PL_multiline)
1977 last = memEQ(strend - len, little, len)
1978 ? strend - len : Nullch;
1979 else
1980 goto find_last;
1981 } else {
1982 find_last:
1983 if (len)
1984 last = rninstr(s, strend, little, little + len);
1985 else
1986 last = strend; /* matching `$' */
1987 }
1988 }
1989 if (last == NULL) goto phooey; /* Should not happen! */
1990 dontbother = strend - last + prog->float_min_offset;
1991 }
1992 if (minlen && (dontbother < minlen))
1993 dontbother = minlen - 1;
1994 strend -= dontbother; /* this one's always in bytes! */
1995 /* We don't know much -- general case. */
1996 if (UTF) {
1997 for (;;) {
1998 if (regtry(prog, s))
1999 goto got_it;
2000 if (s >= strend)
2001 break;
2002 s += UTF8SKIP(s);
2003 };
2004 }
2005 else {
2006 do {
2007 if (regtry(prog, s))
2008 goto got_it;
2009 } while (s++ < strend);
2010 }
2011 }
2012
2013 /* Failure. */
2014 goto phooey;
2015
2016got_it:
2017 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2018
2019 if (PL_reg_eval_set) {
2020 /* Preserve the current value of $^R */
2021 if (oreplsv != GvSV(PL_replgv))
2022 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2023 restored, the value remains
2024 the same. */
2025 restore_pos(0);
2026 }
2027
2028 /* make sure $`, $&, $', and $digit will work later */
2029 if ( !(flags & REXEC_NOT_FIRST) ) {
2030 if (RX_MATCH_COPIED(prog)) {
2031 Safefree(prog->subbeg);
2032 RX_MATCH_COPIED_off(prog);
2033 }
2034 if (flags & REXEC_COPY_STR) {
2035 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2036
2037 s = savepvn(strbeg, i);
2038 prog->subbeg = s;
2039 prog->sublen = i;
2040 RX_MATCH_COPIED_on(prog);
2041 }
2042 else {
2043 prog->subbeg = strbeg;
2044 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2045 }
2046 }
2047
2048 return 1;
2049
2050phooey:
2051 if (PL_reg_eval_set)
2052 restore_pos(0);
2053 return 0;
2054}
2055
2056/*
2057 - regtry - try match at specific point
2058 */
2059STATIC I32 /* 0 failure, 1 success */
2060S_regtry(pTHX_ regexp *prog, char *startpos)
2061{
2062 dTHR;
2063 register I32 i;
2064 register I32 *sp;
2065 register I32 *ep;
2066 CHECKPOINT lastcp;
2067
2068 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2069 MAGIC *mg;
2070
2071 PL_reg_eval_set = RS_init;
2072 DEBUG_r(DEBUG_s(
2073 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
2074 PL_stack_sp - PL_stack_base);
2075 ));
2076 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
2077 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2078 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2079 SAVETMPS;
2080 /* Apparently this is not needed, judging by wantarray. */
2081 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
2082 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2083
2084 if (PL_reg_sv) {
2085 /* Make $_ available to executed code. */
2086 if (PL_reg_sv != DEFSV) {
2087 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
2088 SAVESPTR(DEFSV);
2089 DEFSV = PL_reg_sv;
2090 }
2091
2092 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2093 && (mg = mg_find(PL_reg_sv, 'g')))) {
2094 /* prepare for quick setting of pos */
2095 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
2096 mg = mg_find(PL_reg_sv, 'g');
2097 mg->mg_len = -1;
2098 }
2099 PL_reg_magic = mg;
2100 PL_reg_oldpos = mg->mg_len;
2101 SAVEDESTRUCTOR(S_restore_pos, 0);
2102 }
2103 if (!PL_reg_curpm)
2104 New(22,PL_reg_curpm, 1, PMOP);
2105 PL_reg_curpm->op_pmregexp = prog;
2106 PL_reg_oldcurpm = PL_curpm;
2107 PL_curpm = PL_reg_curpm;
2108 if (RX_MATCH_COPIED(prog)) {
2109 /* Here is a serious problem: we cannot rewrite subbeg,
2110 since it may be needed if this match fails. Thus
2111 $` inside (?{}) could fail... */
2112 PL_reg_oldsaved = prog->subbeg;
2113 PL_reg_oldsavedlen = prog->sublen;
2114 RX_MATCH_COPIED_off(prog);
2115 }
2116 else
2117 PL_reg_oldsaved = Nullch;
2118 prog->subbeg = PL_bostr;
2119 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2120 }
2121 prog->startp[0] = startpos - PL_bostr;
2122 PL_reginput = startpos;
2123 PL_regstartp = prog->startp;
2124 PL_regendp = prog->endp;
2125 PL_reglastparen = &prog->lastparen;
2126 prog->lastparen = 0;
2127 PL_regsize = 0;
2128 DEBUG_r(PL_reg_starttry = startpos);
2129 if (PL_reg_start_tmpl <= prog->nparens) {
2130 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2131 if(PL_reg_start_tmp)
2132 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2133 else
2134 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2135 }
2136
2137 /* XXXX What this code is doing here?!!! There should be no need
2138 to do this again and again, PL_reglastparen should take care of
2139 this! */
2140 sp = prog->startp;
2141 ep = prog->endp;
2142 if (prog->nparens) {
2143 for (i = prog->nparens; i >= 1; i--) {
2144 *++sp = -1;
2145 *++ep = -1;
a687059c 2146 }
a0d0e21e 2147 }
c277df42 2148 REGCP_SET;
7e5428c5 2149 if (regmatch(prog->program + 1)) {
cf93c79d 2150 prog->endp[0] = PL_reginput - PL_bostr;
a0d0e21e
LW
2151 return 1;
2152 }
c277df42
IZ
2153 REGCP_UNWIND;
2154 return 0;
a687059c
LW
2155}
2156
2157/*
2158 - regmatch - main matching routine
2159 *
2160 * Conceptually the strategy is simple: check to see whether the current
2161 * node matches, call self recursively to see whether the rest matches,
2162 * and then act accordingly. In practice we make some effort to avoid
2163 * recursion, in particular by going through "ordinary" nodes (that don't
2164 * need to know whether the rest of the match failed) by a loop instead of
2165 * by recursion.
2166 */
2167/* [lwall] I've hoisted the register declarations to the outer block in order to
2168 * maybe save a little bit of pushing and popping on the stack. It also takes
2169 * advantage of machines that use a register save mask on subroutine entry.
2170 */
76e3520e 2171STATIC I32 /* 0 failure, 1 success */
cea2e8a9 2172S_regmatch(pTHX_ regnode *prog)
a687059c 2173{
c277df42
IZ
2174 dTHR;
2175 register regnode *scan; /* Current node. */
2176 regnode *next; /* Next node. */
2177 regnode *inner; /* Next node in internal branch. */
c3464db5
DD
2178 register I32 nextchr; /* renamed nextchr - nextchar colides with
2179 function of same name */
a0d0e21e
LW
2180 register I32 n; /* no or next */
2181 register I32 ln; /* len or last */
2182 register char *s; /* operand or save */
3280af22 2183 register char *locinput = PL_reginput;
c277df42
IZ
2184 register I32 c1, c2, paren; /* case fold search, parenth */
2185 int minmod = 0, sw = 0, logical = 0;
4633a7c4 2186#ifdef DEBUGGING
3280af22 2187 PL_regindent++;
4633a7c4 2188#endif
a0d0e21e 2189
a0ed51b3 2190 /* Note that nextchr is a byte even in UTF */
76e3520e 2191 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2192 scan = prog;
2193 while (scan != NULL) {
c277df42 2194#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 2195#ifdef DEBUGGING
c277df42
IZ
2196# define sayYES goto yes
2197# define sayNO goto no
2198# define saySAME(x) if (x) goto yes; else goto no
2199# define REPORT_CODE_OFF 24
4633a7c4 2200#else
c277df42
IZ
2201# define sayYES return 1
2202# define sayNO return 0
2203# define saySAME(x) return x
a687059c 2204#endif
c277df42
IZ
2205 DEBUG_r( {
2206 SV *prop = sv_newmortal();
3280af22 2207 int docolor = *PL_colors[0];
c277df42 2208 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22 2209 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
beed8111
IZ
2210 /* The part of the string before starttry has one color
2211 (pref0_len chars), between starttry and current
2212 position another one (pref_len - pref0_len chars),
2213 after the current position the third one.
2214 We assume that pref0_len <= pref_len, otherwise we
2215 decrease pref0_len. */
3280af22
NIS
2216 int pref_len = (locinput - PL_bostr > (5 + taill) - l
2217 ? (5 + taill) - l : locinput - PL_bostr);
364723c2 2218 int pref0_len = pref_len - (locinput - PL_reg_starttry);
c277df42 2219
3280af22
NIS
2220 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2221 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2222 ? (5 + taill) - pref_len : PL_regeol - locinput);
8d300b32
GS
2223 if (pref0_len < 0)
2224 pref0_len = 0;
beed8111
IZ
2225 if (pref0_len > pref_len)
2226 pref0_len = pref_len;
c277df42
IZ
2227 regprop(prop, scan);
2228 PerlIO_printf(Perl_debug_log,
8d300b32 2229 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22 2230 locinput - PL_bostr,
8d300b32
GS
2231 PL_colors[4], pref0_len,
2232 locinput - pref_len, PL_colors[5],
2233 PL_colors[2], pref_len - pref0_len,
2234 locinput - pref_len + pref0_len, PL_colors[3],
c277df42 2235 (docolor ? "" : "> <"),
3280af22 2236 PL_colors[0], l, locinput, PL_colors[1],
c277df42
IZ
2237 15 - l - pref_len + 1,
2238 "",
3280af22 2239 scan - PL_regprogram, PL_regindent*2, "",
c277df42
IZ
2240 SvPVX(prop));
2241 } );
a687059c 2242
c277df42 2243 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
2244 if (next == scan)
2245 next = NULL;
a687059c 2246
a0d0e21e
LW
2247 switch (OP(scan)) {
2248 case BOL:
3280af22
NIS
2249 if (locinput == PL_bostr
2250 ? PL_regprev == '\n'
2251 : (PL_multiline &&
2252 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 2253 {
a0ed51b3 2254 /* regtill = regbol; */
a0d0e21e
LW
2255 break;
2256 }
b8c5462f
JH
2257 sayNO;
2258 case MBOL:
2259 if (locinput == PL_bostr
2260 ? PL_regprev == '\n'
2261 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2262 {
2263 break;
2264 }
2265 sayNO;
2266 case SBOL:
2267 if (locinput == PL_regbol && PL_regprev == '\n')
2268 break;
2269 sayNO;
2270 case GPOS:
2271 if (locinput == PL_reg_ganch)
2272 break;
2273 sayNO;
2274 case EOL:
2275 if (PL_multiline)
2276 goto meol;
2277 else
2278 goto seol;
2279 case MEOL:
2280 meol:
2281 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2282 sayNO;
2283 break;
2284 case SEOL:
2285 seol:
2286 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2287 sayNO;
2288 if (PL_regeol - locinput > 1)
2289 sayNO;
2290 break;
2291 case EOS:
2292 if (PL_regeol != locinput)
2293 sayNO;
2294 break;
2295 case SANYUTF8:
2296 if (nextchr & 0x80) {
2297 locinput += PL_utf8skip[nextchr];
2298 if (locinput > PL_regeol)
2299 sayNO;
2300 nextchr = UCHARAT(locinput);
2301 break;
2302 }
2303 if (!nextchr && locinput >= PL_regeol)
2304 sayNO;
2305 nextchr = UCHARAT(++locinput);
2306 break;
2307 case SANY:
2308 if (!nextchr && locinput >= PL_regeol)
2309 sayNO;
2310 nextchr = UCHARAT(++locinput);
2311 break;
2312 case ANYUTF8:
2313 if (nextchr & 0x80) {
2314 locinput += PL_utf8skip[nextchr];
2315 if (locinput > PL_regeol)
2316 sayNO;
2317 nextchr = UCHARAT(locinput);
2318 break;
2319 }
2320 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2321 sayNO;
2322 nextchr = UCHARAT(++locinput);
2323 break;
2324 case REG_ANY:
2325 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2326 sayNO;
2327 nextchr = UCHARAT(++locinput);
2328 break;
2329 case EXACT:
2330 s = (char *) OPERAND(scan);
2331 ln = UCHARAT(s++);
2332 /* Inline the first character, for speed. */
2333 if (UCHARAT(s) != nextchr)
2334 sayNO;
2335 if (PL_regeol - locinput < ln)
2336 sayNO;
2337 if (ln > 1 && memNE(s, locinput, ln))
2338 sayNO;
2339 locinput += ln;
2340 nextchr = UCHARAT(locinput);
2341 break;
2342 case EXACTFL:
2343 PL_reg_flags |= RF_tainted;
2344 /* FALL THROUGH */
2345 case EXACTF:
2346 s = (char *) OPERAND(scan);
2347 ln = UCHARAT(s++);
2348
2349 if (UTF) {
2350 char *l = locinput;
2351 char *e = s + ln;
2352 c1 = OP(scan) == EXACTF;
2353 while (s < e) {
2354 if (l >= PL_regeol)
2355 sayNO;
2356 if (utf8_to_uv((U8*)s, 0) != (c1 ?
2357 toLOWER_utf8((U8*)l) :
2358 toLOWER_LC_utf8((U8*)l)))
2359 {
2360 sayNO;
2361 }
2362 s += UTF8SKIP(s);
2363 l += UTF8SKIP(l);
2364 }
2365 locinput = l;
2366 nextchr = UCHARAT(locinput);
2367 break;
2368 }
2369
2370 /* Inline the first character, for speed. */
2371 if (UCHARAT(s) != nextchr &&
2372 UCHARAT(s) != ((OP(scan) == EXACTF)
2373 ? PL_fold : PL_fold_locale)[nextchr])
2374 sayNO;
2375 if (PL_regeol - locinput < ln)
2376 sayNO;
2377 if (ln > 1 && (OP(scan) == EXACTF
2378 ? ibcmp(s, locinput, ln)
2379 : ibcmp_locale(s, locinput, ln)))
2380 sayNO;
2381 locinput += ln;
2382 nextchr = UCHARAT(locinput);
2383 break;
2384 case ANYOFUTF8:
2385 s = (char *) OPERAND(scan);
2386 if (!REGINCLASSUTF8(scan, (U8*)locinput))
2387 sayNO;
2388 if (locinput >= PL_regeol)
2389 sayNO;
2390 locinput += PL_utf8skip[nextchr];
2391 nextchr = UCHARAT(locinput);
2392 break;
2393 case ANYOF:
2394 s = (char *) OPERAND(scan);
2395 if (nextchr < 0)
2396 nextchr = UCHARAT(locinput);
2397 if (!REGINCLASS(s, nextchr))
2398 sayNO;
2399 if (!nextchr && locinput >= PL_regeol)
2400 sayNO;
2401 nextchr = UCHARAT(++locinput);
2402 break;
2403 case ALNUML:
2404 PL_reg_flags |= RF_tainted;
2405 /* FALL THROUGH */
2406 case ALNUM:
2407 if (!nextchr)
2408 sayNO;
2409 if (!(OP(scan) == ALNUM
2410 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2411 sayNO;
2412 nextchr = UCHARAT(++locinput);
2413 break;
2414 case ALNUMLUTF8:
2415 PL_reg_flags |= RF_tainted;
2416 /* FALL THROUGH */
2417 case ALNUMUTF8:
2418 if (!nextchr)
2419 sayNO;
2420 if (nextchr & 0x80) {
2421 if (!(OP(scan) == ALNUMUTF8
2422 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2423 : isALNUM_LC_utf8((U8*)locinput)))
2424 {
2425 sayNO;
2426 }
2427 locinput += PL_utf8skip[nextchr];
2428 nextchr = UCHARAT(locinput);
2429 break;
2430 }
2431 if (!(OP(scan) == ALNUMUTF8
2432 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2433 sayNO;
2434 nextchr = UCHARAT(++locinput);
2435 break;
2436 case NALNUML:
2437 PL_reg_flags |= RF_tainted;
2438 /* FALL THROUGH */
2439 case NALNUM:
2440 if (!nextchr && locinput >= PL_regeol)
2441 sayNO;
2442 if (OP(scan) == NALNUM
2443 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2444 sayNO;
2445 nextchr = UCHARAT(++locinput);
2446 break;
2447 case NALNUMLUTF8:
2448 PL_reg_flags |= RF_tainted;
2449 /* FALL THROUGH */
2450 case NALNUMUTF8:
2451 if (!nextchr && locinput >= PL_regeol)
2452 sayNO;
2453 if (nextchr & 0x80) {
2454 if (OP(scan) == NALNUMUTF8
2455 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2456 : isALNUM_LC_utf8((U8*)locinput))
2457 {
2458 sayNO;
2459 }
2460 locinput += PL_utf8skip[nextchr];
2461 nextchr = UCHARAT(locinput);
2462 break;
2463 }
2464 if (OP(scan) == NALNUMUTF8
2465 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2466 sayNO;
2467 nextchr = UCHARAT(++locinput);
2468 break;
2469 case BOUNDL:
2470 case NBOUNDL:
2471 PL_reg_flags |= RF_tainted;
2472 /* FALL THROUGH */
2473 case BOUND:
2474 case NBOUND:
2475 /* was last char in word? */
2476 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2477 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2478 ln = isALNUM(ln);
2479 n = isALNUM(nextchr);
2480 }
2481 else {
2482 ln = isALNUM_LC(ln);
2483 n = isALNUM_LC(nextchr);
2484 }
2485 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2486 sayNO;
2487 break;
2488 case BOUNDLUTF8:
2489 case NBOUNDLUTF8:
2490 PL_reg_flags |= RF_tainted;
2491 /* FALL THROUGH */
2492 case BOUNDUTF8:
2493 case NBOUNDUTF8:
2494 /* was last char in word? */
2495 ln = (locinput != PL_regbol)
2496 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2497 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2498 ln = isALNUM_uni(ln);
2499 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2500 }
2501 else {
2502 ln = isALNUM_LC_uni(ln);
2503 n = isALNUM_LC_utf8((U8*)locinput);
2504 }
2505 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2506 sayNO;
2507 break;
2508 case SPACEL:
2509 PL_reg_flags |= RF_tainted;
2510 /* FALL THROUGH */
2511 case SPACE:
2512 if (!nextchr && locinput >= PL_regeol)
2513 sayNO;
2514 if (!(OP(scan) == SPACE
2515 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2516 sayNO;
2517 nextchr = UCHARAT(++locinput);
2518 break;
2519 case SPACELUTF8:
2520 PL_reg_flags |= RF_tainted;
2521 /* FALL THROUGH */
2522 case SPACEUTF8:
2523 if (!nextchr && locinput >= PL_regeol)
2524 sayNO;
2525 if (nextchr & 0x80) {
2526 if (!(OP(scan) == SPACEUTF8
2527 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2528 : isSPACE_LC_utf8((U8*)locinput)))
2529 {
2530 sayNO;
2531 }
2532 locinput += PL_utf8skip[nextchr];
2533 nextchr = UCHARAT(locinput);
2534 break;
2535 }
2536 if (!(OP(scan) == SPACEUTF8
2537 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2538 sayNO;
2539 nextchr = UCHARAT(++locinput);
2540 break;
2541 case NSPACEL:
2542 PL_reg_flags |= RF_tainted;
2543 /* FALL THROUGH */
2544 case NSPACE:
2545 if (!nextchr)
2546 sayNO;
2547 if (OP(scan) == SPACE
2548 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2549 sayNO;
2550 nextchr = UCHARAT(++locinput);
2551 break;
2552 case NSPACELUTF8:
2553 PL_reg_flags |= RF_tainted;
2554 /* FALL THROUGH */
2555 case NSPACEUTF8:
2556 if (!nextchr)
2557 sayNO;
2558 if (nextchr & 0x80) {
2559 if (OP(scan) == NSPACEUTF8
2560 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2561 : isSPACE_LC_utf8((U8*)locinput))
2562 {
2563 sayNO;
2564 }
2565 locinput += PL_utf8skip[nextchr];
2566 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2567 break;
2568 }
b8c5462f
JH
2569 if (OP(scan) == NSPACEUTF8
2570 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2571 sayNO;
2572 nextchr = UCHARAT(++locinput);
2573 break;
2574 case DIGITL:
2575 PL_reg_flags |= RF_tainted;
2576 /* FALL THROUGH */
2577 case DIGIT:
2578 if (!nextchr && locinput >= PL_regeol)
2579 sayNO;
2580 if (!(OP(scan) == DIGIT
2581 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2582 sayNO;
2583 nextchr = UCHARAT(++locinput);
2584 break;
2585 case DIGITLUTF8:
2586 PL_reg_flags |= RF_tainted;
2587 /* FALL THROUGH */
2588 case DIGITUTF8:
2589 if (!nextchr)
2590 sayNO;
2591 if (nextchr & 0x80) {
2592 if (OP(scan) == NDIGITUTF8
2593 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2594 : isDIGIT_LC_utf8((U8*)locinput))
2595 {
2596 sayNO;
2597 }
2598 locinput += PL_utf8skip[nextchr];
2599 nextchr = UCHARAT(locinput);
a0d0e21e 2600 break;
b8c5462f
JH
2601 }
2602 if (!isDIGIT(nextchr))
2603 sayNO;
2604 nextchr = UCHARAT(++locinput);
2605 break;
2606 case NDIGITL:
2607 PL_reg_flags |= RF_tainted;
2608 /* FALL THROUGH */
2609 case NDIGIT:
2610 if (!nextchr)
2611 sayNO;
2612 if (OP(scan) == DIGIT
2613 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2614 sayNO;
2615 nextchr = UCHARAT(++locinput);
2616 break;
2617 case NDIGITLUTF8:
2618 PL_reg_flags |= RF_tainted;
2619 /* FALL THROUGH */
2620 case NDIGITUTF8:
2621 if (!nextchr && locinput >= PL_regeol)
2622 sayNO;
2623 if (nextchr & 0x80) {
2624 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2625 sayNO;
2626 locinput += PL_utf8skip[nextchr];
2627 nextchr = UCHARAT(locinput);
a0d0e21e 2628 break;
b8c5462f
JH
2629 }
2630 if (isDIGIT(nextchr))
4633a7c4 2631 sayNO;
b8c5462f 2632 nextchr = UCHARAT(++locinput);
a0d0e21e 2633 break;
b8c5462f
JH
2634 case ALNUMCL:
2635 PL_reg_flags |= RF_tainted;
2636 /* FALL THROUGH */
2637 case ALNUMC:
2638 if (!nextchr)
4633a7c4 2639 sayNO;
b8c5462f
JH
2640 if (!(OP(scan) == ALNUMC
2641 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2642 sayNO;
2643 nextchr = UCHARAT(++locinput);
2644 break;
2645 case ALNUMCLUTF8:
2646 PL_reg_flags |= RF_tainted;
2647 /* FALL THROUGH */
2648 case ALNUMCUTF8:
2649 if (!nextchr)
2650 sayNO;
2651 if (nextchr & 0x80) {
2652 if (!(OP(scan) == ALNUMCUTF8
2653 ? swash_fetch(PL_utf8_alnumc, (U8*)locinput)
2654 : isALNUMC_LC_utf8((U8*)locinput)))
2655 {
2656 sayNO;
2657 }
2658 locinput += PL_utf8skip[nextchr];
2659 nextchr = UCHARAT(locinput);
2660 break;
2661 }
2662 if (!(OP(scan) == ALNUMCUTF8
2663 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2664 sayNO;
2665 nextchr = UCHARAT(++locinput);
2666 break;
2667 case NALNUMCL:
2668 PL_reg_flags |= RF_tainted;
2669 /* FALL THROUGH */
2670 case NALNUMC:
2671 if (!nextchr)
2672 sayNO;
2673 if (OP(scan) == ALNUMC
2674 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr))
2675 sayNO;
2676 nextchr = UCHARAT(++locinput);
2677 break;
2678 case NALNUMCLUTF8:
2679 PL_reg_flags |= RF_tainted;
2680 /* FALL THROUGH */
2681 case NALNUMCUTF8:
2682 if (!nextchr && locinput >= PL_regeol)
2683 sayNO;
2684 if (nextchr & 0x80) {
2685 if (swash_fetch(PL_utf8_alnumc,(U8*)locinput))
2686 sayNO;
2687 locinput += PL_utf8skip[nextchr];
2688 nextchr = UCHARAT(locinput);
2689 break;
2690 }
2691 if (isALNUMC(nextchr))
2692 sayNO;
2693 nextchr = UCHARAT(++locinput);
2694 break;
2695 case ALPHAL:
2696 PL_reg_flags |= RF_tainted;
2697 /* FALL THROUGH */
2698 case ALPHA:
2699 if (!nextchr)
2700 sayNO;
2701 if (!(OP(scan) == ALPHA
2702 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2703 sayNO;
2704 nextchr = UCHARAT(++locinput);
2705 break;
2706 case ALPHALUTF8:
2707 PL_reg_flags |= RF_tainted;
2708 /* FALL THROUGH */
2709 case ALPHAUTF8:
2710 if (!nextchr)
2711 sayNO;
2712 if (nextchr & 0x80) {
2713 if (!(OP(scan) == ALPHAUTF8
2714 ? swash_fetch(PL_utf8_alpha, (U8*)locinput)
2715 : isALPHA_LC_utf8((U8*)locinput)))
2716 {
2717 sayNO;
2718 }
2719 locinput += PL_utf8skip[nextchr];
2720 nextchr = UCHARAT(locinput);
2721 break;
2722 }
2723 if (!(OP(scan) == ALPHAUTF8
2724 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2725 sayNO;
2726 nextchr = UCHARAT(++locinput);
2727 break;
2728 case NALPHAL:
2729 PL_reg_flags |= RF_tainted;
2730 /* FALL THROUGH */
2731 case NALPHA:
2732 if (!nextchr)
2733 sayNO;
2734 if (OP(scan) == ALPHA
2735 ? isALPHA(nextchr) : isALPHA_LC(nextchr))
2736 sayNO;
2737 nextchr = UCHARAT(++locinput);
2738 break;
2739 case NALPHALUTF8:
2740 PL_reg_flags |= RF_tainted;
2741 /* FALL THROUGH */
2742 case NALPHAUTF8:
2743 if (!nextchr && locinput >= PL_regeol)
2744 sayNO;
2745 if (nextchr & 0x80) {
2746 if (swash_fetch(PL_utf8_alpha,(U8*)locinput))
2747 sayNO;
2748 locinput += PL_utf8skip[nextchr];
2749 nextchr = UCHARAT(locinput);
2750 break;
2751 }
2752 if (isALPHA(nextchr))
2753 sayNO;
2754 nextchr = UCHARAT(++locinput);
2755 break;
2756 case ASCII:
2757 if (!nextchr && locinput >= PL_regeol)
2758 sayNO;
2759 if (!isASCII(nextchr))
2760 sayNO;
2761 nextchr = UCHARAT(++locinput);
2762 break;
2763 case NASCII:
2764 if (!nextchr && locinput >= PL_regeol)
2765 sayNO;
2766 if (isASCII(nextchr))
2767 sayNO;
2768 nextchr = UCHARAT(++locinput);
2769 break;
2770 case CNTRLL:
2771 PL_reg_flags |= RF_tainted;
2772 /* FALL THROUGH */
2773 case CNTRL:
2774 if (!nextchr)
2775 sayNO;
2776 if (!(OP(scan) == CNTRL
2777 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2778 sayNO;
2779 nextchr = UCHARAT(++locinput);
2780 break;
2781 case CNTRLLUTF8:
2782 PL_reg_flags |= RF_tainted;
2783 /* FALL THROUGH */
2784 case CNTRLUTF8:
2785 if (!nextchr)
2786 sayNO;
2787 if (nextchr & 0x80) {
2788 if (!(OP(scan) == CNTRLUTF8
2789 ? swash_fetch(PL_utf8_cntrl, (U8*)locinput)
2790 : isCNTRL_LC_utf8((U8*)locinput)))
2791 {
2792 sayNO;
2793 }
2794 locinput += PL_utf8skip[nextchr];
2795 nextchr = UCHARAT(locinput);
2796 break;
2797 }
2798 if (!(OP(scan) == CNTRLUTF8
2799 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
4633a7c4 2800 sayNO;
b8c5462f 2801 nextchr = UCHARAT(++locinput);
a0d0e21e 2802 break;
b8c5462f
JH
2803 case NCNTRLL:
2804 PL_reg_flags |= RF_tainted;
2805 /* FALL THROUGH */
2806 case NCNTRL:
2807 if (!nextchr)
b85d18e9 2808 sayNO;
b8c5462f
JH
2809 if (OP(scan) == CNTRL
2810 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr))
2811 sayNO;
2812 nextchr = UCHARAT(++locinput);
b85d18e9 2813 break;
b8c5462f
JH
2814 case NCNTRLLUTF8:
2815 PL_reg_flags |= RF_tainted;
2816 /* FALL THROUGH */
2817 case NCNTRLUTF8:
2818 if (!nextchr && locinput >= PL_regeol)
2819 sayNO;
a0ed51b3 2820 if (nextchr & 0x80) {
b8c5462f 2821 if (swash_fetch(PL_utf8_cntrl,(U8*)locinput))
a0ed51b3 2822 sayNO;
b8c5462f 2823 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2824 nextchr = UCHARAT(locinput);
2825 break;
2826 }
b8c5462f 2827 if (isCNTRL(nextchr))
a0ed51b3
LW
2828 sayNO;
2829 nextchr = UCHARAT(++locinput);
2830 break;
b8c5462f
JH
2831 case GRAPHL:
2832 PL_reg_flags |= RF_tainted;
2833 /* FALL THROUGH */
2834 case GRAPH:
2835 if (!nextchr)
2836 sayNO;
2837 if (!(OP(scan) == GRAPH
2838 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
4633a7c4 2839 sayNO;
76e3520e 2840 nextchr = UCHARAT(++locinput);
a0d0e21e 2841 break;
b8c5462f
JH
2842 case GRAPHLUTF8:
2843 PL_reg_flags |= RF_tainted;
2844 /* FALL THROUGH */
2845 case GRAPHUTF8:
2846 if (!nextchr)
2847 sayNO;
a0ed51b3 2848 if (nextchr & 0x80) {
b8c5462f
JH
2849 if (!(OP(scan) == GRAPHUTF8
2850 ? swash_fetch(PL_utf8_graph, (U8*)locinput)
2851 : isGRAPH_LC_utf8((U8*)locinput)))
2852 {
a0ed51b3 2853 sayNO;
b8c5462f
JH
2854 }
2855 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2856 nextchr = UCHARAT(locinput);
2857 break;
2858 }
b8c5462f
JH
2859 if (!(OP(scan) == GRAPHUTF8
2860 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
a0ed51b3
LW
2861 sayNO;
2862 nextchr = UCHARAT(++locinput);
2863 break;
b8c5462f
JH
2864 case NGRAPHL:
2865 PL_reg_flags |= RF_tainted;
2866 /* FALL THROUGH */
2867 case NGRAPH:
2868 if (!nextchr)
2869 sayNO;
2870 if (OP(scan) == GRAPH
2871 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr))
4633a7c4 2872 sayNO;
76e3520e 2873 nextchr = UCHARAT(++locinput);
a0d0e21e 2874 break;
b8c5462f
JH
2875 case NGRAPHLUTF8:
2876 PL_reg_flags |= RF_tainted;
2877 /* FALL THROUGH */
2878 case NGRAPHUTF8:
2879 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2880 sayNO;
b8c5462f
JH
2881 if (nextchr & 0x80) {
2882 if (swash_fetch(PL_utf8_graph,(U8*)locinput))
2883 sayNO;
2884 locinput += PL_utf8skip[nextchr];
2885 nextchr = UCHARAT(locinput);
2886 break;
2887 }
2888 if (isGRAPH(nextchr))
4633a7c4 2889 sayNO;
b8c5462f
JH
2890 nextchr = UCHARAT(++locinput);
2891 break;
2892 case LOWERL:
2893 PL_reg_flags |= RF_tainted;
2894 /* FALL THROUGH */
2895 case LOWER:
2896 if (!nextchr)
4633a7c4 2897 sayNO;
b8c5462f
JH
2898 if (!(OP(scan) == LOWER
2899 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2900 sayNO;
2901 nextchr = UCHARAT(++locinput);
bbce6d69 2902 break;
b8c5462f 2903 case LOWERLUTF8:
3280af22 2904 PL_reg_flags |= RF_tainted;
bbce6d69 2905 /* FALL THROUGH */
b8c5462f
JH
2906 case LOWERUTF8:
2907 if (!nextchr)
2908 sayNO;
2909 if (nextchr & 0x80) {
2910 if (!(OP(scan) == LOWERUTF8
2911 ? swash_fetch(PL_utf8_lower, (U8*)locinput)
2912 : isLOWER_LC_utf8((U8*)locinput)))
2913 {
2914 sayNO;
a0ed51b3 2915 }
b8c5462f 2916 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2917 nextchr = UCHARAT(locinput);
2918 break;
2919 }
b8c5462f
JH
2920 if (!(OP(scan) == LOWERUTF8
2921 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
bbce6d69 2922 sayNO;
b8c5462f 2923 nextchr = UCHARAT(++locinput);
a0d0e21e 2924 break;
b8c5462f
JH
2925 case NLOWERL:
2926 PL_reg_flags |= RF_tainted;
2927 /* FALL THROUGH */
2928 case NLOWER:
2929 if (!nextchr)
a0ed51b3 2930 sayNO;
b8c5462f
JH
2931 if (OP(scan) == LOWER
2932 ? isLOWER(nextchr) : isLOWER_LC(nextchr))
a0ed51b3 2933 sayNO;
b8c5462f 2934 nextchr = UCHARAT(++locinput);
a0ed51b3 2935 break;
b8c5462f
JH
2936 case NLOWERLUTF8:
2937 PL_reg_flags |= RF_tainted;
2938 /* FALL THROUGH */
2939 case NLOWERUTF8:
3280af22 2940 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2941 sayNO;
b8c5462f
JH
2942 if (nextchr & 0x80) {
2943 if (swash_fetch(PL_utf8_lower,(U8*)locinput))
2944 sayNO;
2945 locinput += PL_utf8skip[nextchr];
2946 nextchr = UCHARAT(locinput);
2947 break;
2948 }
2949 if (isLOWER(nextchr))
2950 sayNO;
76e3520e 2951 nextchr = UCHARAT(++locinput);
a0d0e21e 2952 break;
b8c5462f 2953 case PRINTL:
3280af22 2954 PL_reg_flags |= RF_tainted;
bbce6d69 2955 /* FALL THROUGH */
b8c5462f 2956 case PRINT:
76e3520e 2957 if (!nextchr)
4633a7c4 2958 sayNO;
b8c5462f
JH
2959 if (!(OP(scan) == PRINT
2960 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
4633a7c4 2961 sayNO;
76e3520e 2962 nextchr = UCHARAT(++locinput);
a0d0e21e 2963 break;
b8c5462f 2964 case PRINTLUTF8:
a0ed51b3
LW
2965 PL_reg_flags |= RF_tainted;
2966 /* FALL THROUGH */
b8c5462f 2967 case PRINTUTF8:
a0ed51b3
LW
2968 if (!nextchr)
2969 sayNO;
2970 if (nextchr & 0x80) {
b8c5462f
JH
2971 if (!(OP(scan) == PRINTUTF8
2972 ? swash_fetch(PL_utf8_print, (U8*)locinput)
2973 : isPRINT_LC_utf8((U8*)locinput)))
dfe13c55 2974 {
a0ed51b3 2975 sayNO;
dfe13c55 2976 }
6f06b55f 2977 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2978 nextchr = UCHARAT(locinput);
2979 break;
2980 }
b8c5462f
JH
2981 if (!(OP(scan) == PRINTUTF8
2982 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
a0ed51b3
LW
2983 sayNO;
2984 nextchr = UCHARAT(++locinput);
2985 break;
b8c5462f 2986 case NPRINTL:
3280af22 2987 PL_reg_flags |= RF_tainted;
bbce6d69 2988 /* FALL THROUGH */
b8c5462f
JH
2989 case NPRINT:
2990 if (!nextchr)
4633a7c4 2991 sayNO;
b8c5462f
JH
2992 if (OP(scan) == PRINT
2993 ? isPRINT(nextchr) : isPRINT_LC(nextchr))
4633a7c4 2994 sayNO;
76e3520e 2995 nextchr = UCHARAT(++locinput);
a0d0e21e 2996 break;
b8c5462f 2997 case NPRINTLUTF8:
a0ed51b3
LW
2998 PL_reg_flags |= RF_tainted;
2999 /* FALL THROUGH */
b8c5462f 3000 case NPRINTUTF8:
a0ed51b3
LW
3001 if (!nextchr && locinput >= PL_regeol)
3002 sayNO;
3003 if (nextchr & 0x80) {
b8c5462f 3004 if (swash_fetch(PL_utf8_print,(U8*)locinput))
a0ed51b3 3005 sayNO;
6f06b55f 3006 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3007 nextchr = UCHARAT(locinput);
3008 break;
3009 }
b8c5462f 3010 if (isPRINT(nextchr))
a0ed51b3
LW
3011 sayNO;
3012 nextchr = UCHARAT(++locinput);
3013 break;
b8c5462f 3014 case PUNCTL:
3280af22 3015 PL_reg_flags |= RF_tainted;
bbce6d69 3016 /* FALL THROUGH */
b8c5462f
JH
3017 case PUNCT:
3018 if (!nextchr)
3019 sayNO;
3020 if (!(OP(scan) == PUNCT
3021 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
4633a7c4 3022 sayNO;
b8c5462f 3023 nextchr = UCHARAT(++locinput);
a0d0e21e 3024 break;
b8c5462f 3025 case PUNCTLUTF8:
a0ed51b3
LW
3026 PL_reg_flags |= RF_tainted;
3027 /* FALL THROUGH */
b8c5462f
JH
3028 case PUNCTUTF8:
3029 if (!nextchr)
3030 sayNO;
3031 if (nextchr & 0x80) {
3032 if (!(OP(scan) == PUNCTUTF8
3033 ? swash_fetch(PL_utf8_punct, (U8*)locinput)
3034 : isPUNCT_LC_utf8((U8*)locinput)))
3035 {
3036 sayNO;
3037 }
3038 locinput += PL_utf8skip[nextchr];
3039 nextchr = UCHARAT(locinput);
3040 break;
a0ed51b3 3041 }
b8c5462f
JH
3042 if (!(OP(scan) == PUNCTUTF8
3043 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
a0ed51b3 3044 sayNO;
b8c5462f 3045 nextchr = UCHARAT(++locinput);
a0ed51b3 3046 break;
b8c5462f 3047 case NPUNCTL:
3280af22 3048 PL_reg_flags |= RF_tainted;
bbce6d69 3049 /* FALL THROUGH */
b8c5462f
JH
3050 case NPUNCT:
3051 if (!nextchr)
4633a7c4 3052 sayNO;
b8c5462f
JH
3053 if (OP(scan) == PUNCT
3054 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr))
4633a7c4 3055 sayNO;
76e3520e 3056 nextchr = UCHARAT(++locinput);
a0d0e21e 3057 break;
b8c5462f 3058 case NPUNCTLUTF8:
a0ed51b3
LW
3059 PL_reg_flags |= RF_tainted;
3060 /* FALL THROUGH */
b8c5462f 3061 case NPUNCTUTF8:
a0ed51b3
LW
3062 if (!nextchr && locinput >= PL_regeol)
3063 sayNO;
3064 if (nextchr & 0x80) {
b8c5462f 3065 if (swash_fetch(PL_utf8_punct,(U8*)locinput))
a0ed51b3 3066 sayNO;
6f06b55f 3067 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3068 nextchr = UCHARAT(locinput);
3069 break;
3070 }
b8c5462f 3071 if (isPUNCT(nextchr))
a0ed51b3
LW
3072 sayNO;
3073 nextchr = UCHARAT(++locinput);
3074 break;
b8c5462f 3075 case UPPERL:
3280af22 3076 PL_reg_flags |= RF_tainted;
bbce6d69 3077 /* FALL THROUGH */
b8c5462f 3078 case UPPER:
76e3520e 3079 if (!nextchr)
4633a7c4 3080 sayNO;
b8c5462f
JH
3081 if (!(OP(scan) == UPPER
3082 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
4633a7c4 3083 sayNO;
76e3520e 3084 nextchr = UCHARAT(++locinput);
a0d0e21e 3085 break;
b8c5462f 3086 case UPPERLUTF8:
a0ed51b3
LW
3087 PL_reg_flags |= RF_tainted;
3088 /* FALL THROUGH */
b8c5462f 3089 case UPPERUTF8:
a0ed51b3
LW
3090 if (!nextchr)
3091 sayNO;
3092 if (nextchr & 0x80) {
b8c5462f
JH
3093 if (!(OP(scan) == UPPERUTF8
3094 ? swash_fetch(PL_utf8_upper, (U8*)locinput)
3095 : isUPPER_LC_utf8((U8*)locinput)))
dfe13c55 3096 {
a0ed51b3 3097 sayNO;
dfe13c55 3098 }
6f06b55f 3099 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3100 nextchr = UCHARAT(locinput);
3101 break;
3102 }
b8c5462f
JH
3103 if (!(OP(scan) == UPPERUTF8
3104 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
a0ed51b3
LW
3105 sayNO;
3106 nextchr = UCHARAT(++locinput);
3107 break;
b8c5462f
JH
3108 case NUPPERL:
3109 PL_reg_flags |= RF_tainted;
3110 /* FALL THROUGH */
3111 case NUPPER:
3112 if (!nextchr)
3113 sayNO;
3114 if (OP(scan) == UPPER
3115 ? isUPPER(nextchr) : isUPPER_LC(nextchr))
4633a7c4 3116 sayNO;
76e3520e 3117 nextchr = UCHARAT(++locinput);
a0d0e21e 3118 break;
b8c5462f
JH
3119 case NUPPERLUTF8:
3120 PL_reg_flags |= RF_tainted;
3121 /* FALL THROUGH */
3122 case NUPPERUTF8:
3123 if (!nextchr && locinput >= PL_regeol)
3124 sayNO;
a0ed51b3 3125 if (nextchr & 0x80) {
b8c5462f 3126 if (swash_fetch(PL_utf8_upper,(U8*)locinput))
a0ed51b3 3127 sayNO;
6f06b55f 3128 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3129 nextchr = UCHARAT(locinput);
3130 break;
3131 }
b8c5462f 3132 if (isUPPER(nextchr))
a0ed51b3
LW
3133 sayNO;
3134 nextchr = UCHARAT(++locinput);
3135 break;
b8c5462f 3136 case XDIGIT:
3280af22 3137 if (!nextchr && locinput >= PL_regeol)
4633a7c4 3138 sayNO;
b8c5462f 3139 if (!isXDIGIT(nextchr))
4633a7c4 3140 sayNO;
76e3520e 3141 nextchr = UCHARAT(++locinput);
a0d0e21e 3142 break;
b8c5462f 3143 case NXDIGIT:
a0ed51b3
LW
3144 if (!nextchr && locinput >= PL_regeol)
3145 sayNO;
b8c5462f 3146 if (isXDIGIT(nextchr))
a0ed51b3
LW
3147 sayNO;
3148 nextchr = UCHARAT(++locinput);
3149 break;
3150 case CLUMP:
dfe13c55 3151 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 3152 sayNO;
6f06b55f 3153 locinput += PL_utf8skip[nextchr];
dfe13c55 3154 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
3155 locinput += UTF8SKIP(locinput);
3156 if (locinput > PL_regeol)
3157 sayNO;
3158 nextchr = UCHARAT(locinput);
3159 break;
c8756f30 3160 case REFFL:
3280af22 3161 PL_reg_flags |= RF_tainted;
c8756f30 3162 /* FALL THROUGH */
c277df42 3163 case REF:
c8756f30 3164 case REFF:
c277df42 3165 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3166 ln = PL_regstartp[n];
3167 if (*PL_reglastparen < n || ln == -1)
af3f8c16 3168 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 3169 if (ln == PL_regendp[n])
a0d0e21e 3170 break;
a0ed51b3 3171
cf93c79d 3172 s = PL_bostr + ln;
a0ed51b3
LW
3173 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
3174 char *l = locinput;
cf93c79d 3175 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3176 /*
3177 * Note that we can't do the "other character" lookup trick as
3178 * in the 8-bit case (no pun intended) because in Unicode we
3179 * have to map both upper and title case to lower case.
3180 */
3181 if (OP(scan) == REFF) {
3182 while (s < e) {
3183 if (l >= PL_regeol)
3184 sayNO;
dfe13c55 3185 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
3186 sayNO;
3187 s += UTF8SKIP(s);
3188 l += UTF8SKIP(l);
3189 }
3190 }
3191 else {
3192 while (s < e) {
3193 if (l >= PL_regeol)
3194 sayNO;
dfe13c55 3195 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
3196 sayNO;
3197 s += UTF8SKIP(s);
3198 l += UTF8SKIP(l);
3199 }
3200 }
3201 locinput = l;
3202 nextchr = UCHARAT(locinput);
3203 break;
3204 }
3205
a0d0e21e 3206 /* Inline the first character, for speed. */
76e3520e 3207 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3208 (OP(scan) == REF ||
3209 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3210 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3211 sayNO;
cf93c79d 3212 ln = PL_regendp[n] - ln;
3280af22 3213 if (locinput + ln > PL_regeol)
4633a7c4 3214 sayNO;
c8756f30
AK
3215 if (ln > 1 && (OP(scan) == REF
3216 ? memNE(s, locinput, ln)
3217 : (OP(scan) == REFF
3218 ? ibcmp(s, locinput, ln)
3219 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3220 sayNO;
a0d0e21e 3221 locinput += ln;
76e3520e 3222 nextchr = UCHARAT(locinput);
a0d0e21e
LW
3223 break;
3224
3225 case NOTHING:
c277df42 3226 case TAIL:
a0d0e21e
LW
3227 break;
3228 case BACK:
3229 break;
c277df42
IZ
3230 case EVAL:
3231 {
3232 dSP;
533c011a 3233 OP_4tree *oop = PL_op;
3280af22
NIS
3234 COP *ocurcop = PL_curcop;
3235 SV **ocurpad = PL_curpad;
c277df42
IZ
3236 SV *ret;
3237
3238 n = ARG(scan);
533c011a
NIS
3239 PL_op = (OP_4tree*)PL_regdata->data[n];
3240 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 3241 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 3242 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 3243
cea2e8a9 3244 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
3245 SPAGAIN;
3246 ret = POPs;
3247 PUTBACK;
3248
0f5d15d6
IZ
3249 PL_op = oop;
3250 PL_curpad = ocurpad;
3251 PL_curcop = ocurcop;
c277df42 3252 if (logical) {
0f5d15d6
IZ
3253 if (logical == 2) { /* Postponed subexpression. */
3254 regexp *re;
22c35a8c 3255 MAGIC *mg = Null(MAGIC*);
0f5d15d6
IZ
3256 re_cc_state state;
3257 CURCUR cctmp;
3258 CHECKPOINT cp, lastcp;
3259
3260 if(SvROK(ret) || SvRMAGICAL(ret)) {
3261 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
3262
3263 if(SvMAGICAL(sv))
3264 mg = mg_find(sv, 'r');
3265 }
3266 if (mg) {
3267 re = (regexp *)mg->mg_obj;
df0003d4 3268 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3269 }
3270 else {
3271 STRLEN len;
3272 char *t = SvPV(ret, len);
3273 PMOP pm;
3274 char *oprecomp = PL_regprecomp;
3275 I32 osize = PL_regsize;
3276 I32 onpar = PL_regnpar;
3277
3278 pm.op_pmflags = 0;
cea2e8a9 3279 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
3280 if (!(SvFLAGS(ret)
3281 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
3282 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
3283 PL_regprecomp = oprecomp;
3284 PL_regsize = osize;
3285 PL_regnpar = onpar;
3286 }
3287 DEBUG_r(
3288 PerlIO_printf(Perl_debug_log,
3289 "Entering embedded `%s%.60s%s%s'\n",
3290 PL_colors[0],
3291 re->precomp,
3292 PL_colors[1],
3293 (strlen(re->precomp) > 60 ? "..." : ""))
3294 );
3295 state.node = next;
3296 state.prev = PL_reg_call_cc;
3297 state.cc = PL_regcc;
3298 state.re = PL_reg_re;
3299
3300 cctmp.cur = 0;
3301 cctmp.oldcc = 0;
3302 PL_regcc = &cctmp;
3303
3304 cp = regcppush(0); /* Save *all* the positions. */
3305 REGCP_SET;
3306 cache_re(re);
3307 state.ss = PL_savestack_ix;
3308 *PL_reglastparen = 0;
3309 PL_reg_call_cc = &state;
3310 PL_reginput = locinput;
3311 if (regmatch(re->program + 1)) {
3312 ReREFCNT_dec(re);
3313 regcpblow(cp);
3314 sayYES;
3315 }
3316 DEBUG_r(
3317 PerlIO_printf(Perl_debug_log,
3318 "%*s failed...\n",
3319 REPORT_CODE_OFF+PL_regindent*2, "")
3320 );
3321 ReREFCNT_dec(re);
3322 REGCP_UNWIND;
3323 regcppop();
3324 PL_reg_call_cc = state.prev;
3325 PL_regcc = state.cc;
3326 PL_reg_re = state.re;
d3790889 3327 cache_re(PL_reg_re);
0f5d15d6
IZ
3328 sayNO;
3329 }
c277df42 3330 sw = SvTRUE(ret);
0f5d15d6 3331 logical = 0;
a0ed51b3
LW
3332 }
3333 else
3280af22 3334 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
3335 break;
3336 }
a0d0e21e 3337 case OPEN:
c277df42 3338 n = ARG(scan); /* which paren pair */
3280af22
NIS
3339 PL_reg_start_tmp[n] = locinput;
3340 if (n > PL_regsize)
3341 PL_regsize = n;
a0d0e21e
LW
3342 break;
3343 case CLOSE:
c277df42 3344 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3345 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3346 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
3347 if (n > *PL_reglastparen)
3348 *PL_reglastparen = n;
a0d0e21e 3349 break;
c277df42
IZ
3350 case GROUPP:
3351 n = ARG(scan); /* which paren pair */
cf93c79d 3352 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3353 break;
3354 case IFTHEN:
3355 if (sw)
3356 next = NEXTOPER(NEXTOPER(scan));
3357 else {
3358 next = scan + ARG(scan);
3359 if (OP(next) == IFTHEN) /* Fake one. */
3360 next = NEXTOPER(NEXTOPER(next));
3361 }
3362 break;
3363 case LOGICAL:
0f5d15d6 3364 logical = scan->flags;
c277df42 3365 break;
a0d0e21e
LW
3366 case CURLYX: {
3367 CURCUR cc;
3280af22 3368 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
3369
3370 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3371 next += ARG(next);
3280af22
NIS
3372 cc.oldcc = PL_regcc;
3373 PL_regcc = &cc;
3374 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
3375 cc.cur = -1;
3376 cc.min = ARG1(scan);
3377 cc.max = ARG2(scan);
c277df42 3378 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3379 cc.next = next;
3380 cc.minmod = minmod;
3381 cc.lastloc = 0;
3280af22 3382 PL_reginput = locinput;
a0d0e21e
LW
3383 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3384 regcpblow(cp);
3280af22 3385 PL_regcc = cc.oldcc;
4633a7c4 3386 saySAME(n);
a0d0e21e
LW
3387 }
3388 /* NOT REACHED */
3389 case WHILEM: {
3390 /*
3391 * This is really hard to understand, because after we match
3392 * what we're trying to match, we must make sure the rest of
3393 * the RE is going to match for sure, and to do that we have
3394 * to go back UP the parse tree by recursing ever deeper. And
3395 * if it fails, we have to reset our parent's current state
3396 * that we can try again after backing off.
3397 */
3398
c277df42 3399 CHECKPOINT cp, lastcp;
3280af22 3400 CURCUR* cc = PL_regcc;
c277df42
IZ
3401 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3402
4633a7c4 3403 n = cc->cur + 1; /* how many we know we matched */
3280af22 3404 PL_reginput = locinput;
a0d0e21e 3405
c277df42
IZ
3406 DEBUG_r(
3407 PerlIO_printf(Perl_debug_log,
3408 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 3409 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
3410 (long)n, (long)cc->min,
3411 (long)cc->max, (long)cc)
3412 );
4633a7c4 3413
a0d0e21e
LW
3414 /* If degenerate scan matches "", assume scan done. */
3415
579cf2c3 3416 if (locinput == cc->lastloc && n >= cc->min) {
3280af22
NIS
3417 PL_regcc = cc->oldcc;
3418 ln = PL_regcc->cur;
c277df42 3419 DEBUG_r(
c3464db5
DD
3420 PerlIO_printf(Perl_debug_log,
3421 "%*s empty match detected, try continuation...\n",
3280af22 3422 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3423 );
a0d0e21e 3424 if (regmatch(cc->next))
4633a7c4 3425 sayYES;
c277df42 3426 DEBUG_r(
c3464db5
DD
3427 PerlIO_printf(Perl_debug_log,
3428 "%*s failed...\n",
3280af22 3429 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3430 );
3280af22
NIS
3431 PL_regcc->cur = ln;
3432 PL_regcc = cc;
4633a7c4 3433 sayNO;
a0d0e21e
LW
3434 }
3435
3436 /* First just match a string of min scans. */
3437
3438 if (n < cc->min) {
3439 cc->cur = n;
3440 cc->lastloc = locinput;
4633a7c4
LW
3441 if (regmatch(cc->scan))
3442 sayYES;
3443 cc->cur = n - 1;
c277df42
IZ
3444 cc->lastloc = lastloc;
3445 DEBUG_r(
c3464db5
DD
3446 PerlIO_printf(Perl_debug_log,
3447 "%*s failed...\n",
3280af22 3448 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3449 );
4633a7c4 3450 sayNO;
a0d0e21e
LW
3451 }
3452
3453 /* Prefer next over scan for minimal matching. */
3454
3455 if (cc->minmod) {
3280af22
NIS
3456 PL_regcc = cc->oldcc;
3457 ln = PL_regcc->cur;
5f05dabc 3458 cp = regcppush(cc->parenfloor);
c277df42 3459 REGCP_SET;
5f05dabc 3460 if (regmatch(cc->next)) {
c277df42 3461 regcpblow(cp);
4633a7c4 3462 sayYES; /* All done. */
5f05dabc 3463 }
c277df42 3464 REGCP_UNWIND;
5f05dabc 3465 regcppop();
3280af22
NIS
3466 PL_regcc->cur = ln;
3467 PL_regcc = cc;
a0d0e21e 3468
c277df42 3469 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 3470 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
3471 && !(PL_reg_flags & RF_warned)) {
3472 PL_reg_flags |= RF_warned;
cea2e8a9 3473 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
3474 "Complex regular subexpression recursion",
3475 REG_INFTY - 1);
c277df42 3476 }
4633a7c4 3477 sayNO;
c277df42 3478 }
a687059c 3479
c277df42 3480 DEBUG_r(
c3464db5
DD
3481 PerlIO_printf(Perl_debug_log,
3482 "%*s trying longer...\n",
3280af22 3483 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3484 );
a0d0e21e 3485 /* Try scanning more and see if it helps. */
3280af22 3486 PL_reginput = locinput;
a0d0e21e
LW
3487 cc->cur = n;
3488 cc->lastloc = locinput;
5f05dabc 3489 cp = regcppush(cc->parenfloor);
c277df42 3490 REGCP_SET;
5f05dabc 3491 if (regmatch(cc->scan)) {
c277df42 3492 regcpblow(cp);
4633a7c4 3493 sayYES;
5f05dabc 3494 }
c277df42 3495 DEBUG_r(
c3464db5
DD
3496 PerlIO_printf(Perl_debug_log,
3497 "%*s failed...\n",
3280af22 3498 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3499 );
3500 REGCP_UNWIND;
5f05dabc 3501 regcppop();
4633a7c4 3502 cc->cur = n - 1;
c277df42 3503 cc->lastloc = lastloc;
4633a7c4 3504 sayNO;
a0d0e21e
LW
3505 }
3506
3507 /* Prefer scan over next for maximal matching. */
3508
3509 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3510 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3511 cc->cur = n;
3512 cc->lastloc = locinput;
c277df42 3513 REGCP_SET;
5f05dabc 3514 if (regmatch(cc->scan)) {
c277df42 3515 regcpblow(cp);
4633a7c4 3516 sayYES;
5f05dabc 3517 }
c277df42 3518 REGCP_UNWIND;
a0d0e21e 3519 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3520 PL_reginput = locinput;
c277df42 3521 DEBUG_r(
c3464db5
DD
3522 PerlIO_printf(Perl_debug_log,
3523 "%*s failed, try continuation...\n",
3280af22 3524 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3525 );
3526 }
599cee73
PM
3527 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3528 && !(PL_reg_flags & RF_warned)) {
3280af22 3529 PL_reg_flags |= RF_warned;
cea2e8a9 3530 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
3531 "Complex regular subexpression recursion",
3532 REG_INFTY - 1);
a0d0e21e
LW
3533 }
3534
3535 /* Failed deeper matches of scan, so see if this one works. */
3280af22
NIS
3536 PL_regcc = cc->oldcc;
3537 ln = PL_regcc->cur;
a0d0e21e 3538 if (regmatch(cc->next))
4633a7c4 3539 sayYES;
c277df42 3540 DEBUG_r(
c3464db5 3541 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 3542 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3543 );
3280af22
NIS
3544 PL_regcc->cur = ln;
3545 PL_regcc = cc;
4633a7c4 3546 cc->cur = n - 1;
c277df42 3547 cc->lastloc = lastloc;
4633a7c4 3548 sayNO;
a0d0e21e
LW
3549 }
3550 /* NOT REACHED */
c277df42
IZ
3551 case BRANCHJ:
3552 next = scan + ARG(scan);
3553 if (next == scan)
3554 next = NULL;
3555 inner = NEXTOPER(NEXTOPER(scan));
3556 goto do_branch;
3557 case BRANCH:
3558 inner = NEXTOPER(scan);
3559 do_branch:
3560 {
3561 CHECKPOINT lastcp;
3562 c1 = OP(scan);
3563 if (OP(next) != c1) /* No choice. */
3564 next = inner; /* Avoid recursion. */
a0d0e21e 3565 else {
3280af22 3566 int lastparen = *PL_reglastparen;
c277df42
IZ
3567
3568 REGCP_SET;
a0d0e21e 3569 do {
3280af22 3570 PL_reginput = locinput;
c277df42 3571 if (regmatch(inner))
4633a7c4 3572 sayYES;
c277df42 3573 REGCP_UNWIND;
3280af22 3574 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 3575 PL_regendp[n] = -1;
3280af22 3576 *PL_reglastparen = n;
c277df42 3577 scan = next;
a0d0e21e 3578 /*SUPPRESS 560*/
c277df42
IZ
3579 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
3580 next += n;
a0d0e21e 3581 else
c277df42 3582 next = NULL;
c277df42
IZ
3583 inner = NEXTOPER(scan);
3584 if (c1 == BRANCHJ) {
3585 inner = NEXTOPER(inner);
3586 }
3587 } while (scan != NULL && OP(scan) == c1);
4633a7c4 3588 sayNO;
a0d0e21e 3589 /* NOTREACHED */
a687059c 3590 }
a0d0e21e
LW
3591 }
3592 break;
3593 case MINMOD:
3594 minmod = 1;
3595 break;
c277df42
IZ
3596 case CURLYM:
3597 {
00db4c45 3598 I32 l = 0;
c277df42
IZ
3599 CHECKPOINT lastcp;
3600
3601 /* We suppose that the next guy does not need
3602 backtracking: in particular, it is of constant length,
3603 and has no parenths to influence future backrefs. */
3604 ln = ARG1(scan); /* min to match */
3605 n = ARG2(scan); /* max to match */
c277df42
IZ
3606 paren = scan->flags;
3607 if (paren) {
3280af22
NIS
3608 if (paren > PL_regsize)
3609 PL_regsize = paren;
3610 if (paren > *PL_reglastparen)
3611 *PL_reglastparen = paren;
c277df42 3612 }
dc45a647 3613 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3614 if (paren)
3615 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3616 PL_reginput = locinput;
c277df42
IZ
3617 if (minmod) {
3618 minmod = 0;
3619 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3620 sayNO;
5f4b28b2 3621 if (ln && l == 0 && n >= ln
c277df42
IZ
3622 /* In fact, this is tricky. If paren, then the
3623 fact that we did/didnot match may influence
3624 future execution. */
3625 && !(paren && ln == 0))
3626 ln = n;
3280af22 3627 locinput = PL_reginput;
22c35a8c 3628 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
3629 c1 = UCHARAT(OPERAND(next) + 1);
3630 if (OP(next) == EXACTF)
22c35a8c 3631 c2 = PL_fold[c1];
c277df42 3632 else if (OP(next) == EXACTFL)
22c35a8c 3633 c2 = PL_fold_locale[c1];
c277df42
IZ
3634 else
3635 c2 = c1;
a0ed51b3
LW
3636 }
3637 else
c277df42
IZ
3638 c1 = c2 = -1000;
3639 REGCP_SET;
5f4b28b2 3640 /* This may be improved if l == 0. */
c277df42
IZ
3641 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3642 /* If it could work, try it. */
3643 if (c1 == -1000 ||
3280af22
NIS
3644 UCHARAT(PL_reginput) == c1 ||
3645 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3646 {
3647 if (paren) {
3648 if (n) {
cf93c79d
IZ
3649 PL_regstartp[paren] =
3650 HOPc(PL_reginput, -l) - PL_bostr;
3651 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3652 }
3653 else
cf93c79d 3654 PL_regendp[paren] = -1;
c277df42
IZ
3655 }
3656 if (regmatch(next))
3657 sayYES;
3658 REGCP_UNWIND;
3659 }
3660 /* Couldn't or didn't -- move forward. */
3280af22 3661 PL_reginput = locinput;
c277df42
IZ
3662 if (regrepeat_hard(scan, 1, &l)) {
3663 ln++;
3280af22 3664 locinput = PL_reginput;
c277df42
IZ
3665 }
3666 else
3667 sayNO;
3668 }
a0ed51b3
LW
3669 }
3670 else {
c277df42
IZ
3671 n = regrepeat_hard(scan, n, &l);
3672 if (n != 0 && l == 0
3673 /* In fact, this is tricky. If paren, then the
3674 fact that we did/didnot match may influence
3675 future execution. */
3676 && !(paren && ln == 0))
3677 ln = n;
3280af22 3678 locinput = PL_reginput;
c277df42 3679 DEBUG_r(
5c0ca799
GS
3680 PerlIO_printf(Perl_debug_log,
3681 "%*s matched %ld times, len=%ld...\n",
3280af22 3682 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
3683 );
3684 if (n >= ln) {
22c35a8c 3685 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
3686 c1 = UCHARAT(OPERAND(next) + 1);
3687 if (OP(next) == EXACTF)
22c35a8c 3688 c2 = PL_fold[c1];
c277df42 3689 else if (OP(next) == EXACTFL)
22c35a8c 3690 c2 = PL_fold_locale[c1];
c277df42
IZ
3691 else
3692 c2 = c1;
a0ed51b3
LW
3693 }
3694 else
c277df42
IZ
3695 c1 = c2 = -1000;
3696 }
3697 REGCP_SET;
3698 while (n >= ln) {
3699 /* If it could work, try it. */
3700 if (c1 == -1000 ||
3280af22
NIS
3701 UCHARAT(PL_reginput) == c1 ||
3702 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3703 {
3704 DEBUG_r(
c3464db5
DD
3705 PerlIO_printf(Perl_debug_log,
3706 "%*s trying tail with n=%ld...\n",
3280af22 3707 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
3708 );
3709 if (paren) {
3710 if (n) {
cf93c79d
IZ
3711 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3712 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3713 }
a0ed51b3 3714 else
cf93c79d 3715 PL_regendp[paren] = -1;
c277df42 3716 }
a0ed51b3
LW
3717 if (regmatch(next))
3718 sayYES;
3719 REGCP_UNWIND;
3720 }
c277df42
IZ
3721 /* Couldn't or didn't -- back up. */
3722 n--;
dfe13c55 3723 locinput = HOPc(locinput, -l);
3280af22 3724 PL_reginput = locinput;
c277df42
IZ
3725 }
3726 }
3727 sayNO;
3728 break;
3729 }
3730 case CURLYN:
3731 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3732 if (paren > PL_regsize)
3733 PL_regsize = paren;
3734 if (paren > *PL_reglastparen)
3735 *PL_reglastparen = paren;
c277df42
IZ
3736 ln = ARG1(scan); /* min to match */
3737 n = ARG2(scan); /* max to match */
dc45a647 3738 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3739 goto repeat;
a0d0e21e 3740 case CURLY:
c277df42 3741 paren = 0;
a0d0e21e
LW
3742 ln = ARG1(scan); /* min to match */
3743 n = ARG2(scan); /* max to match */
dc45a647 3744 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3745 goto repeat;
3746 case STAR:
3747 ln = 0;
c277df42 3748 n = REG_INFTY;
a0d0e21e 3749 scan = NEXTOPER(scan);
c277df42 3750 paren = 0;
a0d0e21e
LW
3751 goto repeat;
3752 case PLUS:
c277df42
IZ
3753 ln = 1;
3754 n = REG_INFTY;
3755 scan = NEXTOPER(scan);
3756 paren = 0;
3757 repeat:
a0d0e21e
LW
3758 /*
3759 * Lookahead to avoid useless match attempts
3760 * when we know what character comes next.
3761 */
22c35a8c 3762 if (PL_regkind[(U8)OP(next)] == EXACT) {
bbce6d69 3763 c1 = UCHARAT(OPERAND(next) + 1);
3764 if (OP(next) == EXACTF)
22c35a8c 3765 c2 = PL_fold[c1];
bbce6d69 3766 else if (OP(next) == EXACTFL)
22c35a8c 3767 c2 = PL_fold_locale[c1];
bbce6d69 3768 else
3769 c2 = c1;
3770 }
a0d0e21e 3771 else
bbce6d69 3772 c1 = c2 = -1000;
3280af22 3773 PL_reginput = locinput;
a0d0e21e 3774 if (minmod) {
c277df42 3775 CHECKPOINT lastcp;
a0d0e21e
LW
3776 minmod = 0;
3777 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3778 sayNO;
a0ed51b3 3779 locinput = PL_reginput;
c277df42 3780 REGCP_SET;
0fe9bf95
IZ
3781 if (c1 != -1000) {
3782 char *e = locinput + n - ln; /* Should not check after this */
3783 char *old = locinput;
3784
3785 if (e >= PL_regeol || (n == REG_INFTY))
3786 e = PL_regeol - 1;
3787 while (1) {
3788 /* Find place 'next' could work */
3789 if (c1 == c2) {
3790 while (locinput <= e && *locinput != c1)
3791 locinput++;
3792 } else {
3793 while (locinput <= e
3794 && *locinput != c1
3795 && *locinput != c2)
3796 locinput++;
3797 }
3798 if (locinput > e)
3799 sayNO;
3800 /* PL_reginput == old now */
3801 if (locinput != old) {
3802 ln = 1; /* Did some */
3803 if (regrepeat(scan, locinput - old) <
3804 locinput - old)
3805 sayNO;
3806 }
3807 /* PL_reginput == locinput now */
3808 if (paren) {
3809 if (ln) {
cf93c79d
IZ
3810 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3811 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
3812 }
3813 else
cf93c79d 3814 PL_regendp[paren] = -1;
0fe9bf95
IZ
3815 }
3816 if (regmatch(next))
3817 sayYES;
3818 PL_reginput = locinput; /* Could be reset... */
3819 REGCP_UNWIND;
3820 /* Couldn't or didn't -- move forward. */
3821 old = locinput++;
3822 }
3823 }
3824 else
c277df42 3825 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3826 /* If it could work, try it. */
bbce6d69 3827 if (c1 == -1000 ||
3280af22
NIS
3828 UCHARAT(PL_reginput) == c1 ||
3829 UCHARAT(PL_reginput) == c2)
bbce6d69 3830 {
c277df42
IZ
3831 if (paren) {
3832 if (n) {
cf93c79d
IZ
3833 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3834 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3835 }
3836 else
cf93c79d 3837 PL_regendp[paren] = -1;
c277df42 3838 }
a0d0e21e 3839 if (regmatch(next))
4633a7c4 3840 sayYES;
c277df42 3841 REGCP_UNWIND;
bbce6d69 3842 }
c277df42 3843 /* Couldn't or didn't -- move forward. */
a0ed51b3 3844 PL_reginput = locinput;
a0d0e21e
LW
3845 if (regrepeat(scan, 1)) {
3846 ln++;
a0ed51b3
LW
3847 locinput = PL_reginput;
3848 }
3849 else
4633a7c4 3850 sayNO;
a0d0e21e
LW
3851 }
3852 }
3853 else {
c277df42 3854 CHECKPOINT lastcp;
a0d0e21e 3855 n = regrepeat(scan, n);
a0ed51b3 3856 locinput = PL_reginput;
22c35a8c 3857 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3858 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3859 ln = n; /* why back off? */
c277df42
IZ
3860 REGCP_SET;
3861 if (paren) {
3862 while (n >= ln) {
3863 /* If it could work, try it. */
3864 if (c1 == -1000 ||
3280af22
NIS
3865 UCHARAT(PL_reginput) == c1 ||
3866 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3867 {
3868 if (paren && n) {
3869 if (n) {
cf93c79d
IZ
3870 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3871 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3872 }
3873 else
cf93c79d 3874 PL_regendp[paren] = -1;
c277df42
IZ
3875 }
3876 if (regmatch(next))
3877 sayYES;
3878 REGCP_UNWIND;
3879 }
3880 /* Couldn't or didn't -- back up. */
3881 n--;
dfe13c55 3882 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3883 }
a0ed51b3
LW
3884 }
3885 else {
c277df42
IZ
3886 while (n >= ln) {
3887 /* If it could work, try it. */
3888 if (c1 == -1000 ||
3280af22
NIS
3889 UCHARAT(PL_reginput) == c1 ||
3890 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3891 {
3892 if (regmatch(next))
3893 sayYES;
3894 REGCP_UNWIND;
3895 }
3896 /* Couldn't or didn't -- back up. */
3897 n--;
dfe13c55 3898 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3899 }
a0d0e21e
LW
3900 }
3901 }
4633a7c4 3902 sayNO;
c277df42 3903 break;
a0d0e21e 3904 case END:
0f5d15d6
IZ
3905 if (PL_reg_call_cc) {
3906 re_cc_state *cur_call_cc = PL_reg_call_cc;
3907 CURCUR *cctmp = PL_regcc;
3908 regexp *re = PL_reg_re;
3909 CHECKPOINT cp, lastcp;
3910
3911 cp = regcppush(0); /* Save *all* the positions. */
3912 REGCP_SET;
3913 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3914 the caller. */
3915 PL_reginput = locinput; /* Make position available to
3916 the callcc. */
3917 cache_re(PL_reg_call_cc->re);
3918 PL_regcc = PL_reg_call_cc->cc;
3919 PL_reg_call_cc = PL_reg_call_cc->prev;
3920 if (regmatch(cur_call_cc->node)) {
3921 PL_reg_call_cc = cur_call_cc;
3922 regcpblow(cp);
3923 sayYES;
3924 }
3925 REGCP_UNWIND;
3926 regcppop();
3927 PL_reg_call_cc = cur_call_cc;
3928 PL_regcc = cctmp;
3929 PL_reg_re = re;
3930 cache_re(re);
3931
3932 DEBUG_r(
3933 PerlIO_printf(Perl_debug_log,
3934 "%*s continuation failed...\n",
3935 REPORT_CODE_OFF+PL_regindent*2, "")
3936 );
3937 sayNO;
3938 }
3280af22 3939 if (locinput < PL_regtill)
7e5428c5
IZ
3940 sayNO; /* Cannot match: too short. */
3941 /* Fall through */
3942 case SUCCEED:
3280af22 3943 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 3944 sayYES; /* Success! */
c277df42
IZ
3945 case SUSPEND:
3946 n = 1;
9fe1d20c 3947 PL_reginput = locinput;
c277df42 3948 goto do_ifmatch;
a0d0e21e 3949 case UNLESSM:
c277df42 3950 n = 0;
a0ed51b3 3951 if (scan->flags) {
0fe9bf95
IZ
3952 if (UTF) { /* XXXX This is absolutely
3953 broken, we read before
3954 start of string. */
3955 s = HOPMAYBEc(locinput, -scan->flags);
3956 if (!s)
3957 goto say_yes;
3958 PL_reginput = s;
3959 }
3960 else {
3961 if (locinput < PL_bostr + scan->flags)
3962 goto say_yes;
3963 PL_reginput = locinput - scan->flags;
3964 goto do_ifmatch;
3965 }
a0ed51b3
LW
3966 }
3967 else
3968 PL_reginput = locinput;
c277df42
IZ
3969 goto do_ifmatch;
3970 case IFMATCH:
3971 n = 1;
a0ed51b3 3972 if (scan->flags) {
0fe9bf95
IZ
3973 if (UTF) { /* XXXX This is absolutely
3974 broken, we read before
3975 start of string. */
3976 s = HOPMAYBEc(locinput, -scan->flags);
3977 if (!s || s < PL_bostr)
3978 goto say_no;
3979 PL_reginput = s;
3980 }
3981 else {
3982 if (locinput < PL_bostr + scan->flags)
3983 goto say_no;
3984 PL_reginput = locinput - scan->flags;
3985 goto do_ifmatch;
3986 }
a0ed51b3
LW
3987 }
3988 else
3989 PL_reginput = locinput;
3990
c277df42 3991 do_ifmatch:
c277df42
IZ
3992 inner = NEXTOPER(NEXTOPER(scan));
3993 if (regmatch(inner) != n) {
3994 say_no:
3995 if (logical) {
3996 logical = 0;
3997 sw = 0;
3998 goto do_longjump;
a0ed51b3
LW
3999 }
4000 else
c277df42
IZ
4001 sayNO;
4002 }
4003 say_yes:
4004 if (logical) {
4005 logical = 0;
4006 sw = 1;
4007 }
fe44a5e8 4008 if (OP(scan) == SUSPEND) {
3280af22 4009 locinput = PL_reginput;
565764a8 4010 nextchr = UCHARAT(locinput);
fe44a5e8 4011 }
c277df42
IZ
4012 /* FALL THROUGH. */
4013 case LONGJMP:
4014 do_longjump:
4015 next = scan + ARG(scan);
4016 if (next == scan)
4017 next = NULL;
a0d0e21e
LW
4018 break;
4019 default:
c030ccd9 4020 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 4021 (unsigned long)scan, OP(scan));
cea2e8a9 4022 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4023 }
a0d0e21e
LW
4024 scan = next;
4025 }
a687059c 4026
a0d0e21e
LW
4027 /*
4028 * We get here only if there's trouble -- normally "case END" is
4029 * the terminating point.
4030 */
cea2e8a9 4031 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4032 /*NOTREACHED*/
4633a7c4
LW
4033 sayNO;
4034
4035yes:
4036#ifdef DEBUGGING
3280af22 4037 PL_regindent--;
4633a7c4
LW
4038#endif
4039 return 1;
4040
4041no:
4042#ifdef DEBUGGING
3280af22 4043 PL_regindent--;
4633a7c4 4044#endif
a0d0e21e 4045 return 0;
a687059c
LW
4046}
4047
4048/*
4049 - regrepeat - repeatedly match something simple, report how many
4050 */
4051/*
4052 * [This routine now assumes that it will only match on things of length 1.
4053 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4054 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4055 */
76e3520e 4056STATIC I32
cea2e8a9 4057S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 4058{
5c0ca799 4059 dTHR;
a0d0e21e
LW
4060 register char *scan;
4061 register char *opnd;
4062 register I32 c;
3280af22 4063 register char *loceol = PL_regeol;
a0ed51b3 4064 register I32 hardcount = 0;
a0d0e21e 4065
3280af22 4066 scan = PL_reginput;
c277df42 4067 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 4068 loceol = scan + max;
161b471a 4069 opnd = (char *) OPERAND(p);
a0d0e21e 4070 switch (OP(p)) {
22c35a8c 4071 case REG_ANY:
a0d0e21e
LW
4072 while (scan < loceol && *scan != '\n')
4073 scan++;
4074 break;
4075 case SANY:
4076 scan = loceol;
4077 break;
a0ed51b3
LW
4078 case ANYUTF8:
4079 loceol = PL_regeol;
4080 while (scan < loceol && *scan != '\n') {
4081 scan += UTF8SKIP(scan);
4082 hardcount++;
4083 }
4084 break;
4085 case SANYUTF8:
4086 loceol = PL_regeol;
4087 while (scan < loceol) {
4088 scan += UTF8SKIP(scan);
4089 hardcount++;
4090 }
4091 break;
bbce6d69 4092 case EXACT: /* length of string is 1 */
4093 c = UCHARAT(++opnd);
4094 while (scan < loceol && UCHARAT(scan) == c)
4095 scan++;
4096 break;
4097 case EXACTF: /* length of string is 1 */
4098 c = UCHARAT(++opnd);
4099 while (scan < loceol &&
22c35a8c 4100 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4101 scan++;
4102 break;
4103 case EXACTFL: /* length of string is 1 */
3280af22 4104 PL_reg_flags |= RF_tainted;
bbce6d69 4105 c = UCHARAT(++opnd);
4106 while (scan < loceol &&
22c35a8c 4107 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4108 scan++;
4109 break;
a0ed51b3
LW
4110 case ANYOFUTF8:
4111 loceol = PL_regeol;
4112 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
4113 scan += UTF8SKIP(scan);
4114 hardcount++;
4115 }
4116 break;
a0d0e21e 4117 case ANYOF:
ae5c130c 4118 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 4119 scan++;
a0d0e21e
LW
4120 break;
4121 case ALNUM:
4122 while (scan < loceol && isALNUM(*scan))
4123 scan++;
4124 break;
a0ed51b3
LW
4125 case ALNUMUTF8:
4126 loceol = PL_regeol;
dfe13c55 4127 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
4128 scan += UTF8SKIP(scan);
4129 hardcount++;
4130 }
4131 break;
bbce6d69 4132 case ALNUML:
3280af22 4133 PL_reg_flags |= RF_tainted;
bbce6d69 4134 while (scan < loceol && isALNUM_LC(*scan))
4135 scan++;
4136 break;
a0ed51b3
LW
4137 case ALNUMLUTF8:
4138 PL_reg_flags |= RF_tainted;
4139 loceol = PL_regeol;
dfe13c55 4140 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
4141 scan += UTF8SKIP(scan);
4142 hardcount++;
4143 }
4144 break;
4145 break;
a0d0e21e
LW
4146 case NALNUM:
4147 while (scan < loceol && !isALNUM(*scan))
4148 scan++;
4149 break;
a0ed51b3
LW
4150 case NALNUMUTF8:
4151 loceol = PL_regeol;
dfe13c55 4152 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
4153 scan += UTF8SKIP(scan);
4154 hardcount++;
4155 }
4156 break;
bbce6d69 4157 case NALNUML:
3280af22 4158 PL_reg_flags |= RF_tainted;
bbce6d69 4159 while (scan < loceol && !isALNUM_LC(*scan))
4160 scan++;
4161 break;
a0ed51b3
LW
4162 case NALNUMLUTF8:
4163 PL_reg_flags |= RF_tainted;
4164 loceol = PL_regeol;
dfe13c55 4165 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
4166 scan += UTF8SKIP(scan);
4167 hardcount++;
4168 }
4169 break;
a0d0e21e
LW
4170 case SPACE:
4171 while (scan < loceol && isSPACE(*scan))
4172 scan++;
4173 break;
a0ed51b3
LW
4174 case SPACEUTF8:
4175 loceol = PL_regeol;
dfe13c55 4176 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
4177 scan += UTF8SKIP(scan);
4178 hardcount++;
4179 }
4180 break;
bbce6d69 4181 case SPACEL:
3280af22 4182 PL_reg_flags |= RF_tainted;
bbce6d69 4183 while (scan < loceol && isSPACE_LC(*scan))
4184 scan++;
4185 break;
a0ed51b3
LW
4186 case SPACELUTF8:
4187 PL_reg_flags |= RF_tainted;
4188 loceol = PL_regeol;
dfe13c55 4189 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
4190 scan += UTF8SKIP(scan);
4191 hardcount++;
4192 }
4193 break;
a0d0e21e
LW
4194 case NSPACE:
4195 while (scan < loceol && !isSPACE(*scan))
4196 scan++;
4197 break;
a0ed51b3
LW
4198 case NSPACEUTF8:
4199 loceol = PL_regeol;
dfe13c55 4200 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
4201 scan += UTF8SKIP(scan);
4202 hardcount++;
4203 }
4204 break;
bbce6d69 4205 case NSPACEL:
3280af22 4206 PL_reg_flags |= RF_tainted;
bbce6d69 4207 while (scan < loceol && !isSPACE_LC(*scan))
4208 scan++;
4209 break;
a0ed51b3
LW
4210 case NSPACELUTF8:
4211 PL_reg_flags |= RF_tainted;
4212 loceol = PL_regeol;
dfe13c55 4213 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
4214 scan += UTF8SKIP(scan);
4215 hardcount++;
4216 }
4217 break;
a0d0e21e
LW
4218 case DIGIT:
4219 while (scan < loceol && isDIGIT(*scan))
4220 scan++;
4221 break;
a0ed51b3
LW
4222 case DIGITUTF8:
4223 loceol = PL_regeol;
dfe13c55 4224 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
4225 scan += UTF8SKIP(scan);
4226 hardcount++;
4227 }
4228 break;
4229 break;
a0d0e21e
LW
4230 case NDIGIT:
4231 while (scan < loceol && !isDIGIT(*scan))
4232 scan++;
4233 break;
a0ed51b3
LW
4234 case NDIGITUTF8:
4235 loceol = PL_regeol;
dfe13c55 4236 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
4237 scan += UTF8SKIP(scan);
4238 hardcount++;
4239 }
4240 break;
a0d0e21e
LW
4241 default: /* Called on something of 0 width. */
4242 break; /* So match right here or not at all. */
4243 }
a687059c 4244
a0ed51b3
LW
4245 if (hardcount)
4246 c = hardcount;
4247 else
4248 c = scan - PL_reginput;
3280af22 4249 PL_reginput = scan;
a687059c 4250
c277df42
IZ
4251 DEBUG_r(
4252 {
4253 SV *prop = sv_newmortal();
4254
4255 regprop(prop, p);
4256 PerlIO_printf(Perl_debug_log,
4257 "%*s %s can match %ld times out of %ld...\n",
4258 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
4259 });
4260
a0d0e21e 4261 return(c);
a687059c
LW
4262}
4263
4264/*
c277df42
IZ
4265 - regrepeat_hard - repeatedly match something, report total lenth and length
4266 *
4267 * The repeater is supposed to have constant length.
4268 */
4269
76e3520e 4270STATIC I32
cea2e8a9 4271S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4272{
5c0ca799 4273 dTHR;
c277df42
IZ
4274 register char *scan;
4275 register char *start;
3280af22 4276 register char *loceol = PL_regeol;
a0ed51b3 4277 I32 l = 0;
708e3b05 4278 I32 count = 0, res = 1;
a0ed51b3
LW
4279
4280 if (!max)
4281 return 0;
c277df42 4282
3280af22 4283 start = PL_reginput;
a0ed51b3 4284 if (UTF) {
708e3b05 4285 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4286 if (!count++) {
4287 l = 0;
4288 while (start < PL_reginput) {
4289 l++;
4290 start += UTF8SKIP(start);
4291 }
4292 *lp = l;
4293 if (l == 0)
4294 return max;
4295 }
4296 if (count == max)
4297 return count;
4298 }
4299 }
4300 else {
708e3b05 4301 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4302 if (!count++) {
4303 *lp = l = PL_reginput - start;
4304 if (max != REG_INFTY && l*max < loceol - scan)
4305 loceol = scan + l*max;
4306 if (l == 0)
4307 return max;
c277df42
IZ
4308 }
4309 }
4310 }
708e3b05 4311 if (!res)
3280af22 4312 PL_reginput = scan;
c277df42 4313
a0ed51b3 4314 return count;
c277df42
IZ
4315}
4316
4317/*
cb8d8820 4318 - reginclass - determine if a character falls into a character class
bbce6d69 4319 */
4320
76e3520e 4321STATIC bool
cea2e8a9 4322S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 4323{
5c0ca799 4324 dTHR;
b8c5462f 4325 char flags = ANYOF_FLAGS(p);
bbce6d69 4326 bool match = FALSE;
4327
4328 c &= 0xFF;
b8c5462f 4329 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 4330 match = TRUE;
4331 else if (flags & ANYOF_FOLD) {
4332 I32 cf;
4333 if (flags & ANYOF_LOCALE) {
3280af22 4334 PL_reg_flags |= RF_tainted;
22c35a8c 4335 cf = PL_fold_locale[c];
bbce6d69 4336 }
4337 else
22c35a8c 4338 cf = PL_fold[c];
b8c5462f 4339 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 4340 match = TRUE;
4341 }
4342
b8c5462f 4343 if (!match && (flags & ANYOF_CLASS)) {
3280af22 4344 PL_reg_flags |= RF_tainted;
b8c5462f
JH
4345 if (
4346 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4347 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4348 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
4349 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4350 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4351 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4352 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4353 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4354 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4355 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4356 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
4357 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
4358 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4359 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4360 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4361 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4362 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
4363 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4364 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
4365 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4366 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4367 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4368 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
4369 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4370 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4371 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
4372 ) /* How's that for a conditional? */
bbce6d69 4373 {
4374 match = TRUE;
4375 }
4376 }
4377
ae5c130c 4378 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 4379}
4380
a0ed51b3 4381STATIC bool
cea2e8a9 4382S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
4383{
4384 dTHR;
a0ed51b3
LW
4385 char flags = ARG1(f);
4386 bool match = FALSE;
4387 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
4388
4389 if (swash_fetch(sv, p))
4390 match = TRUE;
4391 else if (flags & ANYOF_FOLD) {
4392 I32 cf;
dfe13c55 4393 U8 tmpbuf[10];
a0ed51b3
LW
4394 if (flags & ANYOF_LOCALE) {
4395 PL_reg_flags |= RF_tainted;
4396 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
4397 }
4398 else
4399 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
4400 if (swash_fetch(sv, tmpbuf))
4401 match = TRUE;
4402 }
4403
b8c5462f 4404 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
4405
4406 return (flags & ANYOF_INVERT) ? !match : match;
4407}
161b471a 4408
dfe13c55 4409STATIC U8 *
cea2e8a9 4410S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
4411{
4412 dTHR;
a0ed51b3
LW
4413 if (off >= 0) {
4414 while (off-- && s < (U8*)PL_regeol)
4415 s += UTF8SKIP(s);
4416 }
4417 else {
4418 while (off++) {
4419 if (s > (U8*)PL_bostr) {
4420 s--;
4421 if (*s & 0x80) {
4422 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4423 s--;
4424 } /* XXX could check well-formedness here */
4425 }
4426 }
4427 }
4428 return s;
4429}
161b471a 4430
dfe13c55 4431STATIC U8 *
cea2e8a9 4432S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 4433{
c485e607 4434 dTHR;
a0ed51b3
LW
4435 if (off >= 0) {
4436 while (off-- && s < (U8*)PL_regeol)
4437 s += UTF8SKIP(s);
4438 if (off >= 0)
4439 return 0;
4440 }
4441 else {
4442 while (off++) {
4443 if (s > (U8*)PL_bostr) {
4444 s--;
4445 if (*s & 0x80) {
4446 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4447 s--;
4448 } /* XXX could check well-formedness here */
4449 }
4450 else
4451 break;
4452 }
4453 if (off <= 0)
4454 return 0;
4455 }
4456 return s;
4457}