This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add a regression test for RT #68182.
[perl5.git] / regexec.c
... / ...
CommitLineData
1/* regexec.c
2 */
3
4/*
5 * One Ring to rule them all, One Ring to find them
6 &
7 * [p.v of _The Lord of the Rings_, opening poem]
8 * [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
9 * [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
10 */
11
12/* This file contains functions for executing a regular expression. See
13 * also regcomp.c which funnily enough, contains functions for compiling
14 * a regular expression.
15 *
16 * This file is also copied at build time to ext/re/re_exec.c, where
17 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
18 * This causes the main functions to be compiled under new names and with
19 * debugging support added, which makes "use re 'debug'" work.
20 */
21
22/* NOTE: this is derived from Henry Spencer's regexp code, and should not
23 * confused with the original package (see point 3 below). Thanks, Henry!
24 */
25
26/* Additional note: this code is very heavily munged from Henry's version
27 * in places. In some spots I've traded clarity for efficiency, so don't
28 * blame Henry for some of the lack of readability.
29 */
30
31/* The names of the functions have been changed from regcomp and
32 * regexec to pregcomp and pregexec in order to avoid conflicts
33 * with the POSIX routines of the same names.
34*/
35
36#ifdef PERL_EXT_RE_BUILD
37#include "re_top.h"
38#endif
39
40/*
41 * pregcomp and pregexec -- regsub and regerror are not used in perl
42 *
43 * Copyright (c) 1986 by University of Toronto.
44 * Written by Henry Spencer. Not derived from licensed software.
45 *
46 * Permission is granted to anyone to use this software for any
47 * purpose on any computer system, and to redistribute it freely,
48 * subject to the following restrictions:
49 *
50 * 1. The author is not responsible for the consequences of use of
51 * this software, no matter how awful, even if they arise
52 * from defects in it.
53 *
54 * 2. The origin of this software must not be misrepresented, either
55 * by explicit claim or by omission.
56 *
57 * 3. Altered versions must be plainly marked as such, and must not
58 * be misrepresented as being the original software.
59 *
60 **** Alterations to Henry's code are...
61 ****
62 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
63 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
64 **** by Larry Wall and others
65 ****
66 **** You may distribute under the terms of either the GNU General Public
67 **** License or the Artistic License, as specified in the README file.
68 *
69 * Beware that some of this code is subtly aware of the way operator
70 * precedence is structured in regular expressions. Serious changes in
71 * regular-expression syntax might require a total rethink.
72 */
73#include "EXTERN.h"
74#define PERL_IN_REGEXEC_C
75#include "perl.h"
76
77#ifdef PERL_IN_XSUB_RE
78# include "re_comp.h"
79#else
80# include "regcomp.h"
81#endif
82
83#define RF_tainted 1 /* tainted information used? */
84#define RF_warned 2 /* warned about big count? */
85
86#define RF_utf8 8 /* Pattern contains multibyte chars? */
87
88#define UTF ((PL_reg_flags & RF_utf8) != 0)
89
90#define RS_init 1 /* eval environment created */
91#define RS_set 2 /* replsv value is set */
92
93#ifndef STATIC
94#define STATIC static
95#endif
96
97#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
98
99/*
100 * Forwards.
101 */
102
103#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
105
106#define HOPc(pos,off) \
107 (char *)(PL_reg_match_utf8 \
108 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
109 : (U8*)(pos + off))
110#define HOPBACKc(pos, off) \
111 (char*)(PL_reg_match_utf8\
112 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
113 : (pos - off >= PL_bostr) \
114 ? (U8*)pos - off \
115 : NULL)
116
117#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
118#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
119
120#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
121 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
122#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
123#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
124#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
125#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
126
127/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
128
129/* for use after a quantifier and before an EXACT-like node -- japhy */
130/* it would be nice to rework regcomp.sym to generate this stuff. sigh */
131#define JUMPABLE(rn) ( \
132 OP(rn) == OPEN || \
133 (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
134 OP(rn) == EVAL || \
135 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
136 OP(rn) == PLUS || OP(rn) == MINMOD || \
137 OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
138 (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
139)
140#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
141
142#define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
143
144#if 0
145/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
146 we don't need this definition. */
147#define IS_TEXT(rn) ( OP(rn)==EXACT || OP(rn)==REF || OP(rn)==NREF )
148#define IS_TEXTF(rn) ( OP(rn)==EXACTF || OP(rn)==REFF || OP(rn)==NREFF )
149#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
150
151#else
152/* ... so we use this as its faster. */
153#define IS_TEXT(rn) ( OP(rn)==EXACT )
154#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
155#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
156
157#endif
158
159/*
160 Search for mandatory following text node; for lookahead, the text must
161 follow but for lookbehind (rn->flags != 0) we skip to the next step.
162*/
163#define FIND_NEXT_IMPT(rn) STMT_START { \
164 while (JUMPABLE(rn)) { \
165 const OPCODE type = OP(rn); \
166 if (type == SUSPEND || PL_regkind[type] == CURLY) \
167 rn = NEXTOPER(NEXTOPER(rn)); \
168 else if (type == PLUS) \
169 rn = NEXTOPER(rn); \
170 else if (type == IFMATCH) \
171 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
172 else rn += NEXT_OFF(rn); \
173 } \
174} STMT_END
175
176
177static void restore_pos(pTHX_ void *arg);
178
179STATIC CHECKPOINT
180S_regcppush(pTHX_ I32 parenfloor)
181{
182 dVAR;
183 const int retval = PL_savestack_ix;
184#define REGCP_PAREN_ELEMS 4
185 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
186 int p;
187 GET_RE_DEBUG_FLAGS_DECL;
188
189 if (paren_elems_to_push < 0)
190 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
191
192#define REGCP_OTHER_ELEMS 7
193 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
194
195 for (p = PL_regsize; p > parenfloor; p--) {
196/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
197 SSPUSHINT(PL_regoffs[p].end);
198 SSPUSHINT(PL_regoffs[p].start);
199 SSPUSHPTR(PL_reg_start_tmp[p]);
200 SSPUSHINT(p);
201 DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
202 " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
203 (UV)p, (IV)PL_regoffs[p].start,
204 (IV)(PL_reg_start_tmp[p] - PL_bostr),
205 (IV)PL_regoffs[p].end
206 ));
207 }
208/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
209 SSPUSHPTR(PL_regoffs);
210 SSPUSHINT(PL_regsize);
211 SSPUSHINT(*PL_reglastparen);
212 SSPUSHINT(*PL_reglastcloseparen);
213 SSPUSHPTR(PL_reginput);
214#define REGCP_FRAME_ELEMS 2
215/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
216 * are needed for the regexp context stack bookkeeping. */
217 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
219
220 return retval;
221}
222
223/* These are needed since we do not localize EVAL nodes: */
224#define REGCP_SET(cp) \
225 DEBUG_STATE_r( \
226 PerlIO_printf(Perl_debug_log, \
227 " Setting an EVAL scope, savestack=%"IVdf"\n", \
228 (IV)PL_savestack_ix)); \
229 cp = PL_savestack_ix
230
231#define REGCP_UNWIND(cp) \
232 DEBUG_STATE_r( \
233 if (cp != PL_savestack_ix) \
234 PerlIO_printf(Perl_debug_log, \
235 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
236 (IV)(cp), (IV)PL_savestack_ix)); \
237 regcpblow(cp)
238
239STATIC char *
240S_regcppop(pTHX_ const regexp *rex)
241{
242 dVAR;
243 U32 i;
244 char *input;
245 GET_RE_DEBUG_FLAGS_DECL;
246
247 PERL_ARGS_ASSERT_REGCPPOP;
248
249 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
250 i = SSPOPINT;
251 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
252 i = SSPOPINT; /* Parentheses elements to pop. */
253 input = (char *) SSPOPPTR;
254 *PL_reglastcloseparen = SSPOPINT;
255 *PL_reglastparen = SSPOPINT;
256 PL_regsize = SSPOPINT;
257 PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
258
259
260 /* Now restore the parentheses context. */
261 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
262 i > 0; i -= REGCP_PAREN_ELEMS) {
263 I32 tmps;
264 U32 paren = (U32)SSPOPINT;
265 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
266 PL_regoffs[paren].start = SSPOPINT;
267 tmps = SSPOPINT;
268 if (paren <= *PL_reglastparen)
269 PL_regoffs[paren].end = tmps;
270 DEBUG_BUFFERS_r(
271 PerlIO_printf(Perl_debug_log,
272 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
273 (UV)paren, (IV)PL_regoffs[paren].start,
274 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
275 (IV)PL_regoffs[paren].end,
276 (paren > *PL_reglastparen ? "(no)" : ""));
277 );
278 }
279 DEBUG_BUFFERS_r(
280 if (*PL_reglastparen + 1 <= rex->nparens) {
281 PerlIO_printf(Perl_debug_log,
282 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
283 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
284 }
285 );
286#if 1
287 /* It would seem that the similar code in regtry()
288 * already takes care of this, and in fact it is in
289 * a better location to since this code can #if 0-ed out
290 * but the code in regtry() is needed or otherwise tests
291 * requiring null fields (pat.t#187 and split.t#{13,14}
292 * (as of patchlevel 7877) will fail. Then again,
293 * this code seems to be necessary or otherwise
294 * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
295 * --jhi updated by dapm */
296 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
297 if (i > PL_regsize)
298 PL_regoffs[i].start = -1;
299 PL_regoffs[i].end = -1;
300 }
301#endif
302 return input;
303}
304
305#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
306
307/*
308 * pregexec and friends
309 */
310
311#ifndef PERL_IN_XSUB_RE
312/*
313 - pregexec - match a regexp against a string
314 */
315I32
316Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
317 char *strbeg, I32 minend, SV *screamer, U32 nosave)
318/* strend: pointer to null at end of string */
319/* strbeg: real beginning of string */
320/* minend: end of match must be >=minend after stringarg. */
321/* nosave: For optimizations. */
322{
323 PERL_ARGS_ASSERT_PREGEXEC;
324
325 return
326 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
327 nosave ? 0 : REXEC_COPY_STR);
328}
329#endif
330
331/*
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
347/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.)
358 NOTE: Some of this comment is not correct. minlen does now take account
359 of lookahead/behind. Further research is required. -- demerphq
360
361*/
362
363/* A failure to find a constant substring means that there is no need to make
364 an expensive call to REx engine, thus we celebrate a failure. Similarly,
365 finding a substring too deep into the string means that less calls to
366 regtry() should be needed.
367
368 REx compiler's optimizer found 4 possible hints:
369 a) Anchored substring;
370 b) Fixed substring;
371 c) Whether we are anchored (beginning-of-line or \G);
372 d) First node (of those at offset 0) which may distingush positions;
373 We use a)b)d) and multiline-part of c), and try to find a position in the
374 string which does not contradict any of them.
375 */
376
377/* Most of decisions we do here should have been done at compile time.
378 The nodes of the REx which we used for the search should have been
379 deleted from the finite automaton. */
380
381char *
382Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
383 char *strend, const U32 flags, re_scream_pos_data *data)
384{
385 dVAR;
386 struct regexp *const prog = (struct regexp *)SvANY(rx);
387 register I32 start_shift = 0;
388 /* Should be nonnegative! */
389 register I32 end_shift = 0;
390 register char *s;
391 register SV *check;
392 char *strbeg;
393 char *t;
394 const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
395 I32 ml_anch;
396 register char *other_last = NULL; /* other substr checked before this */
397 char *check_at = NULL; /* check substr found at this pos */
398 const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
399 RXi_GET_DECL(prog,progi);
400#ifdef DEBUGGING
401 const char * const i_strpos = strpos;
402#endif
403 GET_RE_DEBUG_FLAGS_DECL;
404
405 PERL_ARGS_ASSERT_RE_INTUIT_START;
406
407 RX_MATCH_UTF8_set(rx,do_utf8);
408
409 if (RX_UTF8(rx)) {
410 PL_reg_flags |= RF_utf8;
411 }
412 DEBUG_EXECUTE_r(
413 debug_start_match(rx, do_utf8, strpos, strend,
414 sv ? "Guessing start of match in sv for"
415 : "Guessing start of match in string for");
416 );
417
418 /* CHR_DIST() would be more correct here but it makes things slow. */
419 if (prog->minlen > strend - strpos) {
420 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
421 "String too short... [re_intuit_start]\n"));
422 goto fail;
423 }
424
425 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
426 PL_regeol = strend;
427 if (do_utf8) {
428 if (!prog->check_utf8 && prog->check_substr)
429 to_utf8_substr(prog);
430 check = prog->check_utf8;
431 } else {
432 if (!prog->check_substr && prog->check_utf8)
433 to_byte_substr(prog);
434 check = prog->check_substr;
435 }
436 if (check == &PL_sv_undef) {
437 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
438 "Non-utf8 string cannot match utf8 check string\n"));
439 goto fail;
440 }
441 if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
442 ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
443 || ( (prog->extflags & RXf_ANCH_BOL)
444 && !multiline ) ); /* Check after \n? */
445
446 if (!ml_anch) {
447 if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
448 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
449 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
450 && sv && !SvROK(sv)
451 && (strpos != strbeg)) {
452 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
453 goto fail;
454 }
455 if (prog->check_offset_min == prog->check_offset_max &&
456 !(prog->extflags & RXf_CANY_SEEN)) {
457 /* Substring at constant offset from beg-of-str... */
458 I32 slen;
459
460 s = HOP3c(strpos, prog->check_offset_min, strend);
461
462 if (SvTAIL(check)) {
463 slen = SvCUR(check); /* >= 1 */
464
465 if ( strend - s > slen || strend - s < slen - 1
466 || (strend - s == slen && strend[-1] != '\n')) {
467 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
468 goto fail_finish;
469 }
470 /* Now should match s[0..slen-2] */
471 slen--;
472 if (slen && (*SvPVX_const(check) != *s
473 || (slen > 1
474 && memNE(SvPVX_const(check), s, slen)))) {
475 report_neq:
476 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
477 goto fail_finish;
478 }
479 }
480 else if (*SvPVX_const(check) != *s
481 || ((slen = SvCUR(check)) > 1
482 && memNE(SvPVX_const(check), s, slen)))
483 goto report_neq;
484 check_at = s;
485 goto success_at_start;
486 }
487 }
488 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
489 s = strpos;
490 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
491 end_shift = prog->check_end_shift;
492
493 if (!ml_anch) {
494 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
495 - (SvTAIL(check) != 0);
496 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
497
498 if (end_shift < eshift)
499 end_shift = eshift;
500 }
501 }
502 else { /* Can match at random position */
503 ml_anch = 0;
504 s = strpos;
505 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
506 end_shift = prog->check_end_shift;
507
508 /* end shift should be non negative here */
509 }
510
511#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
512 if (end_shift < 0)
513 Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
514 (IV)end_shift, RX_PRECOMP(prog));
515#endif
516
517 restart:
518 /* Find a possible match in the region s..strend by looking for
519 the "check" substring in the region corrected by start/end_shift. */
520
521 {
522 I32 srch_start_shift = start_shift;
523 I32 srch_end_shift = end_shift;
524 if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
525 srch_end_shift -= ((strbeg - s) - srch_start_shift);
526 srch_start_shift = strbeg - s;
527 }
528 DEBUG_OPTIMISE_MORE_r({
529 PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
530 (IV)prog->check_offset_min,
531 (IV)srch_start_shift,
532 (IV)srch_end_shift,
533 (IV)prog->check_end_shift);
534 });
535
536 if (flags & REXEC_SCREAM) {
537 I32 p = -1; /* Internal iterator of scream. */
538 I32 * const pp = data ? data->scream_pos : &p;
539
540 if (PL_screamfirst[BmRARE(check)] >= 0
541 || ( BmRARE(check) == '\n'
542 && (BmPREVIOUS(check) == SvCUR(check) - 1)
543 && SvTAIL(check) ))
544 s = screaminstr(sv, check,
545 srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
546 else
547 goto fail_finish;
548 /* we may be pointing at the wrong string */
549 if (s && RXp_MATCH_COPIED(prog))
550 s = strbeg + (s - SvPVX_const(sv));
551 if (data)
552 *data->scream_olds = s;
553 }
554 else {
555 U8* start_point;
556 U8* end_point;
557 if (prog->extflags & RXf_CANY_SEEN) {
558 start_point= (U8*)(s + srch_start_shift);
559 end_point= (U8*)(strend - srch_end_shift);
560 } else {
561 start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
562 end_point= HOP3(strend, -srch_end_shift, strbeg);
563 }
564 DEBUG_OPTIMISE_MORE_r({
565 PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
566 (int)(end_point - start_point),
567 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
568 start_point);
569 });
570
571 s = fbm_instr( start_point, end_point,
572 check, multiline ? FBMrf_MULTILINE : 0);
573 }
574 }
575 /* Update the count-of-usability, remove useless subpatterns,
576 unshift s. */
577
578 DEBUG_EXECUTE_r({
579 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
580 SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
581 PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
582 (s ? "Found" : "Did not find"),
583 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
584 ? "anchored" : "floating"),
585 quoted,
586 RE_SV_TAIL(check),
587 (s ? " at offset " : "...\n") );
588 });
589
590 if (!s)
591 goto fail_finish;
592 /* Finish the diagnostic message */
593 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
594
595 /* XXX dmq: first branch is for positive lookbehind...
596 Our check string is offset from the beginning of the pattern.
597 So we need to do any stclass tests offset forward from that
598 point. I think. :-(
599 */
600
601
602
603 check_at=s;
604
605
606 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
607 Start with the other substr.
608 XXXX no SCREAM optimization yet - and a very coarse implementation
609 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
610 *always* match. Probably should be marked during compile...
611 Probably it is right to do no SCREAM here...
612 */
613
614 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
615 : (prog->float_substr && prog->anchored_substr))
616 {
617 /* Take into account the "other" substring. */
618 /* XXXX May be hopelessly wrong for UTF... */
619 if (!other_last)
620 other_last = strpos;
621 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
622 do_other_anchored:
623 {
624 char * const last = HOP3c(s, -start_shift, strbeg);
625 char *last1, *last2;
626 char * const saved_s = s;
627 SV* must;
628
629 t = s - prog->check_offset_max;
630 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
631 && (!do_utf8
632 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
633 && t > strpos)))
634 NOOP;
635 else
636 t = strpos;
637 t = HOP3c(t, prog->anchored_offset, strend);
638 if (t < other_last) /* These positions already checked */
639 t = other_last;
640 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
641 if (last < last1)
642 last1 = last;
643 /* XXXX It is not documented what units *_offsets are in.
644 We assume bytes, but this is clearly wrong.
645 Meaning this code needs to be carefully reviewed for errors.
646 dmq.
647 */
648
649 /* On end-of-str: see comment below. */
650 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
651 if (must == &PL_sv_undef) {
652 s = (char*)NULL;
653 DEBUG_r(must = prog->anchored_utf8); /* for debug */
654 }
655 else
656 s = fbm_instr(
657 (unsigned char*)t,
658 HOP3(HOP3(last1, prog->anchored_offset, strend)
659 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
660 must,
661 multiline ? FBMrf_MULTILINE : 0
662 );
663 DEBUG_EXECUTE_r({
664 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
665 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
666 PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
667 (s ? "Found" : "Contradicts"),
668 quoted, RE_SV_TAIL(must));
669 });
670
671
672 if (!s) {
673 if (last1 >= last2) {
674 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
675 ", giving up...\n"));
676 goto fail_finish;
677 }
678 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
679 ", trying floating at offset %ld...\n",
680 (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
681 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
682 s = HOP3c(last, 1, strend);
683 goto restart;
684 }
685 else {
686 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
687 (long)(s - i_strpos)));
688 t = HOP3c(s, -prog->anchored_offset, strbeg);
689 other_last = HOP3c(s, 1, strend);
690 s = saved_s;
691 if (t == strpos)
692 goto try_at_start;
693 goto try_at_offset;
694 }
695 }
696 }
697 else { /* Take into account the floating substring. */
698 char *last, *last1;
699 char * const saved_s = s;
700 SV* must;
701
702 t = HOP3c(s, -start_shift, strbeg);
703 last1 = last =
704 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
705 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
706 last = HOP3c(t, prog->float_max_offset, strend);
707 s = HOP3c(t, prog->float_min_offset, strend);
708 if (s < other_last)
709 s = other_last;
710 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
711 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
712 /* fbm_instr() takes into account exact value of end-of-str
713 if the check is SvTAIL(ed). Since false positives are OK,
714 and end-of-str is not later than strend we are OK. */
715 if (must == &PL_sv_undef) {
716 s = (char*)NULL;
717 DEBUG_r(must = prog->float_utf8); /* for debug message */
718 }
719 else
720 s = fbm_instr((unsigned char*)s,
721 (unsigned char*)last + SvCUR(must)
722 - (SvTAIL(must)!=0),
723 must, multiline ? FBMrf_MULTILINE : 0);
724 DEBUG_EXECUTE_r({
725 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
726 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
727 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
728 (s ? "Found" : "Contradicts"),
729 quoted, RE_SV_TAIL(must));
730 });
731 if (!s) {
732 if (last1 == last) {
733 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
734 ", giving up...\n"));
735 goto fail_finish;
736 }
737 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
738 ", trying anchored starting at offset %ld...\n",
739 (long)(saved_s + 1 - i_strpos)));
740 other_last = last;
741 s = HOP3c(t, 1, strend);
742 goto restart;
743 }
744 else {
745 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
746 (long)(s - i_strpos)));
747 other_last = s; /* Fix this later. --Hugo */
748 s = saved_s;
749 if (t == strpos)
750 goto try_at_start;
751 goto try_at_offset;
752 }
753 }
754 }
755
756
757 t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
758
759 DEBUG_OPTIMISE_MORE_r(
760 PerlIO_printf(Perl_debug_log,
761 "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
762 (IV)prog->check_offset_min,
763 (IV)prog->check_offset_max,
764 (IV)(s-strpos),
765 (IV)(t-strpos),
766 (IV)(t-s),
767 (IV)(strend-strpos)
768 )
769 );
770
771 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
772 && (!do_utf8
773 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
774 && t > strpos)))
775 {
776 /* Fixed substring is found far enough so that the match
777 cannot start at strpos. */
778 try_at_offset:
779 if (ml_anch && t[-1] != '\n') {
780 /* Eventually fbm_*() should handle this, but often
781 anchored_offset is not 0, so this check will not be wasted. */
782 /* XXXX In the code below we prefer to look for "^" even in
783 presence of anchored substrings. And we search even
784 beyond the found float position. These pessimizations
785 are historical artefacts only. */
786 find_anchor:
787 while (t < strend - prog->minlen) {
788 if (*t == '\n') {
789 if (t < check_at - prog->check_offset_min) {
790 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
791 /* Since we moved from the found position,
792 we definitely contradict the found anchored
793 substr. Due to the above check we do not
794 contradict "check" substr.
795 Thus we can arrive here only if check substr
796 is float. Redo checking for "other"=="fixed".
797 */
798 strpos = t + 1;
799 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
800 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
801 goto do_other_anchored;
802 }
803 /* We don't contradict the found floating substring. */
804 /* XXXX Why not check for STCLASS? */
805 s = t + 1;
806 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
807 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
808 goto set_useful;
809 }
810 /* Position contradicts check-string */
811 /* XXXX probably better to look for check-string
812 than for "\n", so one should lower the limit for t? */
813 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
814 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
815 other_last = strpos = s = t + 1;
816 goto restart;
817 }
818 t++;
819 }
820 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
821 PL_colors[0], PL_colors[1]));
822 goto fail_finish;
823 }
824 else {
825 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
826 PL_colors[0], PL_colors[1]));
827 }
828 s = t;
829 set_useful:
830 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
831 }
832 else {
833 /* The found string does not prohibit matching at strpos,
834 - no optimization of calling REx engine can be performed,
835 unless it was an MBOL and we are not after MBOL,
836 or a future STCLASS check will fail this. */
837 try_at_start:
838 /* Even in this situation we may use MBOL flag if strpos is offset
839 wrt the start of the string. */
840 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
841 && (strpos != strbeg) && strpos[-1] != '\n'
842 /* May be due to an implicit anchor of m{.*foo} */
843 && !(prog->intflags & PREGf_IMPLICIT))
844 {
845 t = strpos;
846 goto find_anchor;
847 }
848 DEBUG_EXECUTE_r( if (ml_anch)
849 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
850 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
851 );
852 success_at_start:
853 if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
854 && (do_utf8 ? (
855 prog->check_utf8 /* Could be deleted already */
856 && --BmUSEFUL(prog->check_utf8) < 0
857 && (prog->check_utf8 == prog->float_utf8)
858 ) : (
859 prog->check_substr /* Could be deleted already */
860 && --BmUSEFUL(prog->check_substr) < 0
861 && (prog->check_substr == prog->float_substr)
862 )))
863 {
864 /* If flags & SOMETHING - do not do it many times on the same match */
865 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
866 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
867 if (do_utf8 ? prog->check_substr : prog->check_utf8)
868 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
869 prog->check_substr = prog->check_utf8 = NULL; /* disable */
870 prog->float_substr = prog->float_utf8 = NULL; /* clear */
871 check = NULL; /* abort */
872 s = strpos;
873 /* XXXX This is a remnant of the old implementation. It
874 looks wasteful, since now INTUIT can use many
875 other heuristics. */
876 prog->extflags &= ~RXf_USE_INTUIT;
877 }
878 else
879 s = strpos;
880 }
881
882 /* Last resort... */
883 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
884 /* trie stclasses are too expensive to use here, we are better off to
885 leave it to regmatch itself */
886 if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
887 /* minlen == 0 is possible if regstclass is \b or \B,
888 and the fixed substr is ''$.
889 Since minlen is already taken into account, s+1 is before strend;
890 accidentally, minlen >= 1 guaranties no false positives at s + 1
891 even for \b or \B. But (minlen? 1 : 0) below assumes that
892 regstclass does not come from lookahead... */
893 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
894 This leaves EXACTF only, which is dealt with in find_byclass(). */
895 const U8* const str = (U8*)STRING(progi->regstclass);
896 const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
897 ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
898 : 1);
899 char * endpos;
900 if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
901 endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
902 else if (prog->float_substr || prog->float_utf8)
903 endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
904 else
905 endpos= strend;
906
907 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
908 (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
909
910 t = s;
911 s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
912 if (!s) {
913#ifdef DEBUGGING
914 const char *what = NULL;
915#endif
916 if (endpos == strend) {
917 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
918 "Could not match STCLASS...\n") );
919 goto fail;
920 }
921 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
922 "This position contradicts STCLASS...\n") );
923 if ((prog->extflags & RXf_ANCH) && !ml_anch)
924 goto fail;
925 /* Contradict one of substrings */
926 if (prog->anchored_substr || prog->anchored_utf8) {
927 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
928 DEBUG_EXECUTE_r( what = "anchored" );
929 hop_and_restart:
930 s = HOP3c(t, 1, strend);
931 if (s + start_shift + end_shift > strend) {
932 /* XXXX Should be taken into account earlier? */
933 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
934 "Could not match STCLASS...\n") );
935 goto fail;
936 }
937 if (!check)
938 goto giveup;
939 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
940 "Looking for %s substr starting at offset %ld...\n",
941 what, (long)(s + start_shift - i_strpos)) );
942 goto restart;
943 }
944 /* Have both, check_string is floating */
945 if (t + start_shift >= check_at) /* Contradicts floating=check */
946 goto retry_floating_check;
947 /* Recheck anchored substring, but not floating... */
948 s = check_at;
949 if (!check)
950 goto giveup;
951 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
952 "Looking for anchored substr starting at offset %ld...\n",
953 (long)(other_last - i_strpos)) );
954 goto do_other_anchored;
955 }
956 /* Another way we could have checked stclass at the
957 current position only: */
958 if (ml_anch) {
959 s = t = t + 1;
960 if (!check)
961 goto giveup;
962 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
963 "Looking for /%s^%s/m starting at offset %ld...\n",
964 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
965 goto try_at_offset;
966 }
967 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
968 goto fail;
969 /* Check is floating subtring. */
970 retry_floating_check:
971 t = check_at - start_shift;
972 DEBUG_EXECUTE_r( what = "floating" );
973 goto hop_and_restart;
974 }
975 if (t != s) {
976 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
977 "By STCLASS: moving %ld --> %ld\n",
978 (long)(t - i_strpos), (long)(s - i_strpos))
979 );
980 }
981 else {
982 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
983 "Does not contradict STCLASS...\n");
984 );
985 }
986 }
987 giveup:
988 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
989 PL_colors[4], (check ? "Guessed" : "Giving up"),
990 PL_colors[5], (long)(s - i_strpos)) );
991 return s;
992
993 fail_finish: /* Substring not found */
994 if (prog->check_substr || prog->check_utf8) /* could be removed already */
995 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
996 fail:
997 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
998 PL_colors[4], PL_colors[5]));
999 return NULL;
1000}
1001
1002#define DECL_TRIE_TYPE(scan) \
1003 const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1004 trie_type = (scan->flags != EXACT) \
1005 ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1006 : (do_utf8 ? trie_utf8 : trie_plain)
1007
1008#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
1009uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
1010 UV uvc_unfolded = 0; \
1011 switch (trie_type) { \
1012 case trie_utf8_fold: \
1013 if ( foldlen>0 ) { \
1014 uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1015 foldlen -= len; \
1016 uscan += len; \
1017 len=0; \
1018 } else { \
1019 uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1020 uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
1021 foldlen -= UNISKIP( uvc ); \
1022 uscan = foldbuf + UNISKIP( uvc ); \
1023 } \
1024 break; \
1025 case trie_latin_utf8_fold: \
1026 if ( foldlen>0 ) { \
1027 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1028 foldlen -= len; \
1029 uscan += len; \
1030 len=0; \
1031 } else { \
1032 len = 1; \
1033 uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen ); \
1034 foldlen -= UNISKIP( uvc ); \
1035 uscan = foldbuf + UNISKIP( uvc ); \
1036 } \
1037 break; \
1038 case trie_utf8: \
1039 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1040 break; \
1041 case trie_plain: \
1042 uvc = (UV)*uc; \
1043 len = 1; \
1044 } \
1045 \
1046 if (uvc < 256) { \
1047 charid = trie->charmap[ uvc ]; \
1048 } \
1049 else { \
1050 charid = 0; \
1051 if (widecharmap) { \
1052 SV** const svpp = hv_fetch(widecharmap, \
1053 (char*)&uvc, sizeof(UV), 0); \
1054 if (svpp) \
1055 charid = (U16)SvIV(*svpp); \
1056 } \
1057 } \
1058 if (!charid && trie_type == trie_utf8_fold && !UTF) { \
1059 charid = trie->charmap[uvc_unfolded]; \
1060 } \
1061} STMT_END
1062
1063#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
1064{ \
1065 char *my_strend= (char *)strend; \
1066 if ( (CoNd) \
1067 && (ln == len || \
1068 !ibcmp_utf8(s, &my_strend, 0, do_utf8, \
1069 m, NULL, ln, (bool)UTF)) \
1070 && (!reginfo || regtry(reginfo, &s)) ) \
1071 goto got_it; \
1072 else { \
1073 U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
1074 uvchr_to_utf8(tmpbuf, c); \
1075 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
1076 if ( f != c \
1077 && (f == c1 || f == c2) \
1078 && (ln == len || \
1079 !ibcmp_utf8(s, &my_strend, 0, do_utf8,\
1080 m, NULL, ln, (bool)UTF)) \
1081 && (!reginfo || regtry(reginfo, &s)) ) \
1082 goto got_it; \
1083 } \
1084} \
1085s += len
1086
1087#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
1088STMT_START { \
1089 while (s <= e) { \
1090 if ( (CoNd) \
1091 && (ln == 1 || !(OP(c) == EXACTF \
1092 ? ibcmp(s, m, ln) \
1093 : ibcmp_locale(s, m, ln))) \
1094 && (!reginfo || regtry(reginfo, &s)) ) \
1095 goto got_it; \
1096 s++; \
1097 } \
1098} STMT_END
1099
1100#define REXEC_FBC_UTF8_SCAN(CoDe) \
1101STMT_START { \
1102 while (s + (uskip = UTF8SKIP(s)) <= strend) { \
1103 CoDe \
1104 s += uskip; \
1105 } \
1106} STMT_END
1107
1108#define REXEC_FBC_SCAN(CoDe) \
1109STMT_START { \
1110 while (s < strend) { \
1111 CoDe \
1112 s++; \
1113 } \
1114} STMT_END
1115
1116#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
1117REXEC_FBC_UTF8_SCAN( \
1118 if (CoNd) { \
1119 if (tmp && (!reginfo || regtry(reginfo, &s))) \
1120 goto got_it; \
1121 else \
1122 tmp = doevery; \
1123 } \
1124 else \
1125 tmp = 1; \
1126)
1127
1128#define REXEC_FBC_CLASS_SCAN(CoNd) \
1129REXEC_FBC_SCAN( \
1130 if (CoNd) { \
1131 if (tmp && (!reginfo || regtry(reginfo, &s))) \
1132 goto got_it; \
1133 else \
1134 tmp = doevery; \
1135 } \
1136 else \
1137 tmp = 1; \
1138)
1139
1140#define REXEC_FBC_TRYIT \
1141if ((!reginfo || regtry(reginfo, &s))) \
1142 goto got_it
1143
1144#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
1145 if (do_utf8) { \
1146 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1147 } \
1148 else { \
1149 REXEC_FBC_CLASS_SCAN(CoNd); \
1150 } \
1151 break
1152
1153#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
1154 if (do_utf8) { \
1155 UtFpReLoAd; \
1156 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1157 } \
1158 else { \
1159 REXEC_FBC_CLASS_SCAN(CoNd); \
1160 } \
1161 break
1162
1163#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
1164 PL_reg_flags |= RF_tainted; \
1165 if (do_utf8) { \
1166 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1167 } \
1168 else { \
1169 REXEC_FBC_CLASS_SCAN(CoNd); \
1170 } \
1171 break
1172
1173#define DUMP_EXEC_POS(li,s,doutf8) \
1174 dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1175
1176/* We know what class REx starts with. Try to find this position... */
1177/* if reginfo is NULL, its a dryrun */
1178/* annoyingly all the vars in this routine have different names from their counterparts
1179 in regmatch. /grrr */
1180
1181STATIC char *
1182S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1183 const char *strend, regmatch_info *reginfo)
1184{
1185 dVAR;
1186 const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1187 char *m;
1188 STRLEN ln;
1189 STRLEN lnc;
1190 register STRLEN uskip;
1191 unsigned int c1;
1192 unsigned int c2;
1193 char *e;
1194 register I32 tmp = 1; /* Scratch variable? */
1195 register const bool do_utf8 = PL_reg_match_utf8;
1196 RXi_GET_DECL(prog,progi);
1197
1198 PERL_ARGS_ASSERT_FIND_BYCLASS;
1199
1200 /* We know what class it must start with. */
1201 switch (OP(c)) {
1202 case ANYOF:
1203 if (do_utf8) {
1204 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1205 !UTF8_IS_INVARIANT((U8)s[0]) ?
1206 reginclass(prog, c, (U8*)s, 0, do_utf8) :
1207 REGINCLASS(prog, c, (U8*)s));
1208 }
1209 else {
1210 while (s < strend) {
1211 STRLEN skip = 1;
1212
1213 if (REGINCLASS(prog, c, (U8*)s) ||
1214 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1215 /* The assignment of 2 is intentional:
1216 * for the folded sharp s, the skip is 2. */
1217 (skip = SHARP_S_SKIP))) {
1218 if (tmp && (!reginfo || regtry(reginfo, &s)))
1219 goto got_it;
1220 else
1221 tmp = doevery;
1222 }
1223 else
1224 tmp = 1;
1225 s += skip;
1226 }
1227 }
1228 break;
1229 case CANY:
1230 REXEC_FBC_SCAN(
1231 if (tmp && (!reginfo || regtry(reginfo, &s)))
1232 goto got_it;
1233 else
1234 tmp = doevery;
1235 );
1236 break;
1237 case EXACTF:
1238 m = STRING(c);
1239 ln = STR_LEN(c); /* length to match in octets/bytes */
1240 lnc = (I32) ln; /* length to match in characters */
1241 if (UTF) {
1242 STRLEN ulen1, ulen2;
1243 U8 *sm = (U8 *) m;
1244 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1245 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1246 /* used by commented-out code below */
1247 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1248
1249 /* XXX: Since the node will be case folded at compile
1250 time this logic is a little odd, although im not
1251 sure that its actually wrong. --dmq */
1252
1253 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1254 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1255
1256 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1257 codepoint of the first character in the converted
1258 form, yet originally we did the extra step.
1259 No tests fail by commenting this code out however
1260 so Ive left it out. -- dmq.
1261
1262 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1263 0, uniflags);
1264 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1265 0, uniflags);
1266 */
1267
1268 lnc = 0;
1269 while (sm < ((U8 *) m + ln)) {
1270 lnc++;
1271 sm += UTF8SKIP(sm);
1272 }
1273 }
1274 else {
1275 c1 = *(U8*)m;
1276 c2 = PL_fold[c1];
1277 }
1278 goto do_exactf;
1279 case EXACTFL:
1280 m = STRING(c);
1281 ln = STR_LEN(c);
1282 lnc = (I32) ln;
1283 c1 = *(U8*)m;
1284 c2 = PL_fold_locale[c1];
1285 do_exactf:
1286 e = HOP3c(strend, -((I32)lnc), s);
1287
1288 if (!reginfo && e < s)
1289 e = s; /* Due to minlen logic of intuit() */
1290
1291 /* The idea in the EXACTF* cases is to first find the
1292 * first character of the EXACTF* node and then, if
1293 * necessary, case-insensitively compare the full
1294 * text of the node. The c1 and c2 are the first
1295 * characters (though in Unicode it gets a bit
1296 * more complicated because there are more cases
1297 * than just upper and lower: one needs to use
1298 * the so-called folding case for case-insensitive
1299 * matching (called "loose matching" in Unicode).
1300 * ibcmp_utf8() will do just that. */
1301
1302 if (do_utf8 || UTF) {
1303 UV c, f;
1304 U8 tmpbuf [UTF8_MAXBYTES+1];
1305 STRLEN len = 1;
1306 STRLEN foldlen;
1307 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1308 if (c1 == c2) {
1309 /* Upper and lower of 1st char are equal -
1310 * probably not a "letter". */
1311 while (s <= e) {
1312 if (do_utf8) {
1313 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1314 uniflags);
1315 } else {
1316 c = *((U8*)s);
1317 }
1318 REXEC_FBC_EXACTISH_CHECK(c == c1);
1319 }
1320 }
1321 else {
1322 while (s <= e) {
1323 if (do_utf8) {
1324 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1325 uniflags);
1326 } else {
1327 c = *((U8*)s);
1328 }
1329
1330 /* Handle some of the three Greek sigmas cases.
1331 * Note that not all the possible combinations
1332 * are handled here: some of them are handled
1333 * by the standard folding rules, and some of
1334 * them (the character class or ANYOF cases)
1335 * are handled during compiletime in
1336 * regexec.c:S_regclass(). */
1337 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1338 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1339 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1340
1341 REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1342 }
1343 }
1344 }
1345 else {
1346 /* Neither pattern nor string are UTF8 */
1347 if (c1 == c2)
1348 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1349 else
1350 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1351 }
1352 break;
1353 case BOUNDL:
1354 PL_reg_flags |= RF_tainted;
1355 /* FALL THROUGH */
1356 case BOUND:
1357 if (do_utf8) {
1358 if (s == PL_bostr)
1359 tmp = '\n';
1360 else {
1361 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1362 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1363 }
1364 tmp = ((OP(c) == BOUND ?
1365 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1366 LOAD_UTF8_CHARCLASS_ALNUM();
1367 REXEC_FBC_UTF8_SCAN(
1368 if (tmp == !(OP(c) == BOUND ?
1369 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1370 isALNUM_LC_utf8((U8*)s)))
1371 {
1372 tmp = !tmp;
1373 REXEC_FBC_TRYIT;
1374 }
1375 );
1376 }
1377 else {
1378 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1379 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1380 REXEC_FBC_SCAN(
1381 if (tmp ==
1382 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1383 tmp = !tmp;
1384 REXEC_FBC_TRYIT;
1385 }
1386 );
1387 }
1388 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1389 goto got_it;
1390 break;
1391 case NBOUNDL:
1392 PL_reg_flags |= RF_tainted;
1393 /* FALL THROUGH */
1394 case NBOUND:
1395 if (do_utf8) {
1396 if (s == PL_bostr)
1397 tmp = '\n';
1398 else {
1399 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1400 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1401 }
1402 tmp = ((OP(c) == NBOUND ?
1403 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1404 LOAD_UTF8_CHARCLASS_ALNUM();
1405 REXEC_FBC_UTF8_SCAN(
1406 if (tmp == !(OP(c) == NBOUND ?
1407 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1408 isALNUM_LC_utf8((U8*)s)))
1409 tmp = !tmp;
1410 else REXEC_FBC_TRYIT;
1411 );
1412 }
1413 else {
1414 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1415 tmp = ((OP(c) == NBOUND ?
1416 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1417 REXEC_FBC_SCAN(
1418 if (tmp ==
1419 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1420 tmp = !tmp;
1421 else REXEC_FBC_TRYIT;
1422 );
1423 }
1424 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1425 goto got_it;
1426 break;
1427 case ALNUM:
1428 REXEC_FBC_CSCAN_PRELOAD(
1429 LOAD_UTF8_CHARCLASS_ALNUM(),
1430 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1431 isALNUM(*s)
1432 );
1433 case ALNUML:
1434 REXEC_FBC_CSCAN_TAINT(
1435 isALNUM_LC_utf8((U8*)s),
1436 isALNUM_LC(*s)
1437 );
1438 case NALNUM:
1439 REXEC_FBC_CSCAN_PRELOAD(
1440 LOAD_UTF8_CHARCLASS_ALNUM(),
1441 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1442 !isALNUM(*s)
1443 );
1444 case NALNUML:
1445 REXEC_FBC_CSCAN_TAINT(
1446 !isALNUM_LC_utf8((U8*)s),
1447 !isALNUM_LC(*s)
1448 );
1449 case SPACE:
1450 REXEC_FBC_CSCAN_PRELOAD(
1451 LOAD_UTF8_CHARCLASS_SPACE(),
1452 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1453 isSPACE(*s)
1454 );
1455 case SPACEL:
1456 REXEC_FBC_CSCAN_TAINT(
1457 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1458 isSPACE_LC(*s)
1459 );
1460 case NSPACE:
1461 REXEC_FBC_CSCAN_PRELOAD(
1462 LOAD_UTF8_CHARCLASS_SPACE(),
1463 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1464 !isSPACE(*s)
1465 );
1466 case NSPACEL:
1467 REXEC_FBC_CSCAN_TAINT(
1468 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1469 !isSPACE_LC(*s)
1470 );
1471 case DIGIT:
1472 REXEC_FBC_CSCAN_PRELOAD(
1473 LOAD_UTF8_CHARCLASS_DIGIT(),
1474 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1475 isDIGIT(*s)
1476 );
1477 case DIGITL:
1478 REXEC_FBC_CSCAN_TAINT(
1479 isDIGIT_LC_utf8((U8*)s),
1480 isDIGIT_LC(*s)
1481 );
1482 case NDIGIT:
1483 REXEC_FBC_CSCAN_PRELOAD(
1484 LOAD_UTF8_CHARCLASS_DIGIT(),
1485 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1486 !isDIGIT(*s)
1487 );
1488 case NDIGITL:
1489 REXEC_FBC_CSCAN_TAINT(
1490 !isDIGIT_LC_utf8((U8*)s),
1491 !isDIGIT_LC(*s)
1492 );
1493 case LNBREAK:
1494 REXEC_FBC_CSCAN(
1495 is_LNBREAK_utf8(s),
1496 is_LNBREAK_latin1(s)
1497 );
1498 case VERTWS:
1499 REXEC_FBC_CSCAN(
1500 is_VERTWS_utf8(s),
1501 is_VERTWS_latin1(s)
1502 );
1503 case NVERTWS:
1504 REXEC_FBC_CSCAN(
1505 !is_VERTWS_utf8(s),
1506 !is_VERTWS_latin1(s)
1507 );
1508 case HORIZWS:
1509 REXEC_FBC_CSCAN(
1510 is_HORIZWS_utf8(s),
1511 is_HORIZWS_latin1(s)
1512 );
1513 case NHORIZWS:
1514 REXEC_FBC_CSCAN(
1515 !is_HORIZWS_utf8(s),
1516 !is_HORIZWS_latin1(s)
1517 );
1518 case AHOCORASICKC:
1519 case AHOCORASICK:
1520 {
1521 DECL_TRIE_TYPE(c);
1522 /* what trie are we using right now */
1523 reg_ac_data *aho
1524 = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1525 reg_trie_data *trie
1526 = (reg_trie_data*)progi->data->data[ aho->trie ];
1527 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1528
1529 const char *last_start = strend - trie->minlen;
1530#ifdef DEBUGGING
1531 const char *real_start = s;
1532#endif
1533 STRLEN maxlen = trie->maxlen;
1534 SV *sv_points;
1535 U8 **points; /* map of where we were in the input string
1536 when reading a given char. For ASCII this
1537 is unnecessary overhead as the relationship
1538 is always 1:1, but for Unicode, especially
1539 case folded Unicode this is not true. */
1540 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1541 U8 *bitmap=NULL;
1542
1543
1544 GET_RE_DEBUG_FLAGS_DECL;
1545
1546 /* We can't just allocate points here. We need to wrap it in
1547 * an SV so it gets freed properly if there is a croak while
1548 * running the match */
1549 ENTER;
1550 SAVETMPS;
1551 sv_points=newSV(maxlen * sizeof(U8 *));
1552 SvCUR_set(sv_points,
1553 maxlen * sizeof(U8 *));
1554 SvPOK_on(sv_points);
1555 sv_2mortal(sv_points);
1556 points=(U8**)SvPV_nolen(sv_points );
1557 if ( trie_type != trie_utf8_fold
1558 && (trie->bitmap || OP(c)==AHOCORASICKC) )
1559 {
1560 if (trie->bitmap)
1561 bitmap=(U8*)trie->bitmap;
1562 else
1563 bitmap=(U8*)ANYOF_BITMAP(c);
1564 }
1565 /* this is the Aho-Corasick algorithm modified a touch
1566 to include special handling for long "unknown char"
1567 sequences. The basic idea being that we use AC as long
1568 as we are dealing with a possible matching char, when
1569 we encounter an unknown char (and we have not encountered
1570 an accepting state) we scan forward until we find a legal
1571 starting char.
1572 AC matching is basically that of trie matching, except
1573 that when we encounter a failing transition, we fall back
1574 to the current states "fail state", and try the current char
1575 again, a process we repeat until we reach the root state,
1576 state 1, or a legal transition. If we fail on the root state
1577 then we can either terminate if we have reached an accepting
1578 state previously, or restart the entire process from the beginning
1579 if we have not.
1580
1581 */
1582 while (s <= last_start) {
1583 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1584 U8 *uc = (U8*)s;
1585 U16 charid = 0;
1586 U32 base = 1;
1587 U32 state = 1;
1588 UV uvc = 0;
1589 STRLEN len = 0;
1590 STRLEN foldlen = 0;
1591 U8 *uscan = (U8*)NULL;
1592 U8 *leftmost = NULL;
1593#ifdef DEBUGGING
1594 U32 accepted_word= 0;
1595#endif
1596 U32 pointpos = 0;
1597
1598 while ( state && uc <= (U8*)strend ) {
1599 int failed=0;
1600 U32 word = aho->states[ state ].wordnum;
1601
1602 if( state==1 ) {
1603 if ( bitmap ) {
1604 DEBUG_TRIE_EXECUTE_r(
1605 if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1606 dump_exec_pos( (char *)uc, c, strend, real_start,
1607 (char *)uc, do_utf8 );
1608 PerlIO_printf( Perl_debug_log,
1609 " Scanning for legal start char...\n");
1610 }
1611 );
1612 while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1613 uc++;
1614 }
1615 s= (char *)uc;
1616 }
1617 if (uc >(U8*)last_start) break;
1618 }
1619
1620 if ( word ) {
1621 U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1622 if (!leftmost || lpos < leftmost) {
1623 DEBUG_r(accepted_word=word);
1624 leftmost= lpos;
1625 }
1626 if (base==0) break;
1627
1628 }
1629 points[pointpos++ % maxlen]= uc;
1630 REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1631 uscan, len, uvc, charid, foldlen,
1632 foldbuf, uniflags);
1633 DEBUG_TRIE_EXECUTE_r({
1634 dump_exec_pos( (char *)uc, c, strend, real_start,
1635 s, do_utf8 );
1636 PerlIO_printf(Perl_debug_log,
1637 " Charid:%3u CP:%4"UVxf" ",
1638 charid, uvc);
1639 });
1640
1641 do {
1642#ifdef DEBUGGING
1643 word = aho->states[ state ].wordnum;
1644#endif
1645 base = aho->states[ state ].trans.base;
1646
1647 DEBUG_TRIE_EXECUTE_r({
1648 if (failed)
1649 dump_exec_pos( (char *)uc, c, strend, real_start,
1650 s, do_utf8 );
1651 PerlIO_printf( Perl_debug_log,
1652 "%sState: %4"UVxf", word=%"UVxf,
1653 failed ? " Fail transition to " : "",
1654 (UV)state, (UV)word);
1655 });
1656 if ( base ) {
1657 U32 tmp;
1658 if (charid &&
1659 (base + charid > trie->uniquecharcount )
1660 && (base + charid - 1 - trie->uniquecharcount
1661 < trie->lasttrans)
1662 && trie->trans[base + charid - 1 -
1663 trie->uniquecharcount].check == state
1664 && (tmp=trie->trans[base + charid - 1 -
1665 trie->uniquecharcount ].next))
1666 {
1667 DEBUG_TRIE_EXECUTE_r(
1668 PerlIO_printf( Perl_debug_log," - legal\n"));
1669 state = tmp;
1670 break;
1671 }
1672 else {
1673 DEBUG_TRIE_EXECUTE_r(
1674 PerlIO_printf( Perl_debug_log," - fail\n"));
1675 failed = 1;
1676 state = aho->fail[state];
1677 }
1678 }
1679 else {
1680 /* we must be accepting here */
1681 DEBUG_TRIE_EXECUTE_r(
1682 PerlIO_printf( Perl_debug_log," - accepting\n"));
1683 failed = 1;
1684 break;
1685 }
1686 } while(state);
1687 uc += len;
1688 if (failed) {
1689 if (leftmost)
1690 break;
1691 if (!state) state = 1;
1692 }
1693 }
1694 if ( aho->states[ state ].wordnum ) {
1695 U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1696 if (!leftmost || lpos < leftmost) {
1697 DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1698 leftmost = lpos;
1699 }
1700 }
1701 if (leftmost) {
1702 s = (char*)leftmost;
1703 DEBUG_TRIE_EXECUTE_r({
1704 PerlIO_printf(
1705 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1706 (UV)accepted_word, (IV)(s - real_start)
1707 );
1708 });
1709 if (!reginfo || regtry(reginfo, &s)) {
1710 FREETMPS;
1711 LEAVE;
1712 goto got_it;
1713 }
1714 s = HOPc(s,1);
1715 DEBUG_TRIE_EXECUTE_r({
1716 PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1717 });
1718 } else {
1719 DEBUG_TRIE_EXECUTE_r(
1720 PerlIO_printf( Perl_debug_log,"No match.\n"));
1721 break;
1722 }
1723 }
1724 FREETMPS;
1725 LEAVE;
1726 }
1727 break;
1728 default:
1729 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1730 break;
1731 }
1732 return 0;
1733 got_it:
1734 return s;
1735}
1736
1737
1738/*
1739 - regexec_flags - match a regexp against a string
1740 */
1741I32
1742Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1743 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1744/* strend: pointer to null at end of string */
1745/* strbeg: real beginning of string */
1746/* minend: end of match must be >=minend after stringarg. */
1747/* data: May be used for some additional optimizations.
1748 Currently its only used, with a U32 cast, for transmitting
1749 the ganch offset when doing a /g match. This will change */
1750/* nosave: For optimizations. */
1751{
1752 dVAR;
1753 struct regexp *const prog = (struct regexp *)SvANY(rx);
1754 /*register*/ char *s;
1755 register regnode *c;
1756 /*register*/ char *startpos = stringarg;
1757 I32 minlen; /* must match at least this many chars */
1758 I32 dontbother = 0; /* how many characters not to try at end */
1759 I32 end_shift = 0; /* Same for the end. */ /* CC */
1760 I32 scream_pos = -1; /* Internal iterator of scream. */
1761 char *scream_olds = NULL;
1762 const bool do_utf8 = (bool)DO_UTF8(sv);
1763 I32 multiline;
1764 RXi_GET_DECL(prog,progi);
1765 regmatch_info reginfo; /* create some info to pass to regtry etc */
1766 regexp_paren_pair *swap = NULL;
1767 GET_RE_DEBUG_FLAGS_DECL;
1768
1769 PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1770 PERL_UNUSED_ARG(data);
1771
1772 /* Be paranoid... */
1773 if (prog == NULL || startpos == NULL) {
1774 Perl_croak(aTHX_ "NULL regexp parameter");
1775 return 0;
1776 }
1777
1778 multiline = prog->extflags & RXf_PMf_MULTILINE;
1779 reginfo.prog = rx; /* Yes, sorry that this is confusing. */
1780
1781 RX_MATCH_UTF8_set(rx, do_utf8);
1782 DEBUG_EXECUTE_r(
1783 debug_start_match(rx, do_utf8, startpos, strend,
1784 "Matching");
1785 );
1786
1787 minlen = prog->minlen;
1788
1789 if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1790 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1791 "String too short [regexec_flags]...\n"));
1792 goto phooey;
1793 }
1794
1795
1796 /* Check validity of program. */
1797 if (UCHARAT(progi->program) != REG_MAGIC) {
1798 Perl_croak(aTHX_ "corrupted regexp program");
1799 }
1800
1801 PL_reg_flags = 0;
1802 PL_reg_eval_set = 0;
1803 PL_reg_maxiter = 0;
1804
1805 if (RX_UTF8(rx))
1806 PL_reg_flags |= RF_utf8;
1807
1808 /* Mark beginning of line for ^ and lookbehind. */
1809 reginfo.bol = startpos; /* XXX not used ??? */
1810 PL_bostr = strbeg;
1811 reginfo.sv = sv;
1812
1813 /* Mark end of line for $ (and such) */
1814 PL_regeol = strend;
1815
1816 /* see how far we have to get to not match where we matched before */
1817 reginfo.till = startpos+minend;
1818
1819 /* If there is a "must appear" string, look for it. */
1820 s = startpos;
1821
1822 if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1823 MAGIC *mg;
1824
1825 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1826 reginfo.ganch = startpos + prog->gofs;
1827 else if (sv && SvTYPE(sv) >= SVt_PVMG
1828 && SvMAGIC(sv)
1829 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1830 && mg->mg_len >= 0) {
1831 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
1832 if (prog->extflags & RXf_ANCH_GPOS) {
1833 if (s > reginfo.ganch)
1834 goto phooey;
1835 s = reginfo.ganch - prog->gofs;
1836 }
1837 }
1838 else if (data) {
1839 reginfo.ganch = strbeg + PTR2UV(data);
1840 } else /* pos() not defined */
1841 reginfo.ganch = strbeg;
1842 }
1843 if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1844 /* We have to be careful. If the previous successful match
1845 was from this regex we don't want a subsequent partially
1846 successful match to clobber the old results.
1847 So when we detect this possibility we add a swap buffer
1848 to the re, and switch the buffer each match. If we fail
1849 we switch it back, otherwise we leave it swapped.
1850 */
1851 swap = prog->offs;
1852 /* do we need a save destructor here for eval dies? */
1853 Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
1854 }
1855 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1856 re_scream_pos_data d;
1857
1858 d.scream_olds = &scream_olds;
1859 d.scream_pos = &scream_pos;
1860 s = re_intuit_start(rx, sv, s, strend, flags, &d);
1861 if (!s) {
1862 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1863 goto phooey; /* not present */
1864 }
1865 }
1866
1867
1868
1869 /* Simplest case: anchored match need be tried only once. */
1870 /* [unless only anchor is BOL and multiline is set] */
1871 if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1872 if (s == startpos && regtry(&reginfo, &startpos))
1873 goto got_it;
1874 else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1875 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1876 {
1877 char *end;
1878
1879 if (minlen)
1880 dontbother = minlen - 1;
1881 end = HOP3c(strend, -dontbother, strbeg) - 1;
1882 /* for multiline we only have to try after newlines */
1883 if (prog->check_substr || prog->check_utf8) {
1884 if (s == startpos)
1885 goto after_try;
1886 while (1) {
1887 if (regtry(&reginfo, &s))
1888 goto got_it;
1889 after_try:
1890 if (s > end)
1891 goto phooey;
1892 if (prog->extflags & RXf_USE_INTUIT) {
1893 s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
1894 if (!s)
1895 goto phooey;
1896 }
1897 else
1898 s++;
1899 }
1900 } else {
1901 if (s > startpos)
1902 s--;
1903 while (s < end) {
1904 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1905 if (regtry(&reginfo, &s))
1906 goto got_it;
1907 }
1908 }
1909 }
1910 }
1911 goto phooey;
1912 } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1913 {
1914 /* the warning about reginfo.ganch being used without intialization
1915 is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1916 and we only enter this block when the same bit is set. */
1917 char *tmp_s = reginfo.ganch - prog->gofs;
1918 if (regtry(&reginfo, &tmp_s))
1919 goto got_it;
1920 goto phooey;
1921 }
1922
1923 /* Messy cases: unanchored match. */
1924 if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1925 /* we have /x+whatever/ */
1926 /* it must be a one character string (XXXX Except UTF?) */
1927 char ch;
1928#ifdef DEBUGGING
1929 int did_match = 0;
1930#endif
1931 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1932 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1933 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1934
1935 if (do_utf8) {
1936 REXEC_FBC_SCAN(
1937 if (*s == ch) {
1938 DEBUG_EXECUTE_r( did_match = 1 );
1939 if (regtry(&reginfo, &s)) goto got_it;
1940 s += UTF8SKIP(s);
1941 while (s < strend && *s == ch)
1942 s += UTF8SKIP(s);
1943 }
1944 );
1945 }
1946 else {
1947 REXEC_FBC_SCAN(
1948 if (*s == ch) {
1949 DEBUG_EXECUTE_r( did_match = 1 );
1950 if (regtry(&reginfo, &s)) goto got_it;
1951 s++;
1952 while (s < strend && *s == ch)
1953 s++;
1954 }
1955 );
1956 }
1957 DEBUG_EXECUTE_r(if (!did_match)
1958 PerlIO_printf(Perl_debug_log,
1959 "Did not find anchored character...\n")
1960 );
1961 }
1962 else if (prog->anchored_substr != NULL
1963 || prog->anchored_utf8 != NULL
1964 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1965 && prog->float_max_offset < strend - s)) {
1966 SV *must;
1967 I32 back_max;
1968 I32 back_min;
1969 char *last;
1970 char *last1; /* Last position checked before */
1971#ifdef DEBUGGING
1972 int did_match = 0;
1973#endif
1974 if (prog->anchored_substr || prog->anchored_utf8) {
1975 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1976 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1977 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1978 back_max = back_min = prog->anchored_offset;
1979 } else {
1980 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1981 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1982 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1983 back_max = prog->float_max_offset;
1984 back_min = prog->float_min_offset;
1985 }
1986
1987
1988 if (must == &PL_sv_undef)
1989 /* could not downgrade utf8 check substring, so must fail */
1990 goto phooey;
1991
1992 if (back_min<0) {
1993 last = strend;
1994 } else {
1995 last = HOP3c(strend, /* Cannot start after this */
1996 -(I32)(CHR_SVLEN(must)
1997 - (SvTAIL(must) != 0) + back_min), strbeg);
1998 }
1999 if (s > PL_bostr)
2000 last1 = HOPc(s, -1);
2001 else
2002 last1 = s - 1; /* bogus */
2003
2004 /* XXXX check_substr already used to find "s", can optimize if
2005 check_substr==must. */
2006 scream_pos = -1;
2007 dontbother = end_shift;
2008 strend = HOPc(strend, -dontbother);
2009 while ( (s <= last) &&
2010 ((flags & REXEC_SCREAM)
2011 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2012 end_shift, &scream_pos, 0))
2013 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2014 (unsigned char*)strend, must,
2015 multiline ? FBMrf_MULTILINE : 0))) ) {
2016 /* we may be pointing at the wrong string */
2017 if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2018 s = strbeg + (s - SvPVX_const(sv));
2019 DEBUG_EXECUTE_r( did_match = 1 );
2020 if (HOPc(s, -back_max) > last1) {
2021 last1 = HOPc(s, -back_min);
2022 s = HOPc(s, -back_max);
2023 }
2024 else {
2025 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2026
2027 last1 = HOPc(s, -back_min);
2028 s = t;
2029 }
2030 if (do_utf8) {
2031 while (s <= last1) {
2032 if (regtry(&reginfo, &s))
2033 goto got_it;
2034 s += UTF8SKIP(s);
2035 }
2036 }
2037 else {
2038 while (s <= last1) {
2039 if (regtry(&reginfo, &s))
2040 goto got_it;
2041 s++;
2042 }
2043 }
2044 }
2045 DEBUG_EXECUTE_r(if (!did_match) {
2046 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2047 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2048 PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2049 ((must == prog->anchored_substr || must == prog->anchored_utf8)
2050 ? "anchored" : "floating"),
2051 quoted, RE_SV_TAIL(must));
2052 });
2053 goto phooey;
2054 }
2055 else if ( (c = progi->regstclass) ) {
2056 if (minlen) {
2057 const OPCODE op = OP(progi->regstclass);
2058 /* don't bother with what can't match */
2059 if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2060 strend = HOPc(strend, -(minlen - 1));
2061 }
2062 DEBUG_EXECUTE_r({
2063 SV * const prop = sv_newmortal();
2064 regprop(prog, prop, c);
2065 {
2066 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2067 s,strend-s,60);
2068 PerlIO_printf(Perl_debug_log,
2069 "Matching stclass %.*s against %s (%d chars)\n",
2070 (int)SvCUR(prop), SvPVX_const(prop),
2071 quoted, (int)(strend - s));
2072 }
2073 });
2074 if (find_byclass(prog, c, s, strend, &reginfo))
2075 goto got_it;
2076 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2077 }
2078 else {
2079 dontbother = 0;
2080 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2081 /* Trim the end. */
2082 char *last;
2083 SV* float_real;
2084
2085 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2086 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2087 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2088
2089 if (flags & REXEC_SCREAM) {
2090 last = screaminstr(sv, float_real, s - strbeg,
2091 end_shift, &scream_pos, 1); /* last one */
2092 if (!last)
2093 last = scream_olds; /* Only one occurrence. */
2094 /* we may be pointing at the wrong string */
2095 else if (RXp_MATCH_COPIED(prog))
2096 s = strbeg + (s - SvPVX_const(sv));
2097 }
2098 else {
2099 STRLEN len;
2100 const char * const little = SvPV_const(float_real, len);
2101
2102 if (SvTAIL(float_real)) {
2103 if (memEQ(strend - len + 1, little, len - 1))
2104 last = strend - len + 1;
2105 else if (!multiline)
2106 last = memEQ(strend - len, little, len)
2107 ? strend - len : NULL;
2108 else
2109 goto find_last;
2110 } else {
2111 find_last:
2112 if (len)
2113 last = rninstr(s, strend, little, little + len);
2114 else
2115 last = strend; /* matching "$" */
2116 }
2117 }
2118 if (last == NULL) {
2119 DEBUG_EXECUTE_r(
2120 PerlIO_printf(Perl_debug_log,
2121 "%sCan't trim the tail, match fails (should not happen)%s\n",
2122 PL_colors[4], PL_colors[5]));
2123 goto phooey; /* Should not happen! */
2124 }
2125 dontbother = strend - last + prog->float_min_offset;
2126 }
2127 if (minlen && (dontbother < minlen))
2128 dontbother = minlen - 1;
2129 strend -= dontbother; /* this one's always in bytes! */
2130 /* We don't know much -- general case. */
2131 if (do_utf8) {
2132 for (;;) {
2133 if (regtry(&reginfo, &s))
2134 goto got_it;
2135 if (s >= strend)
2136 break;
2137 s += UTF8SKIP(s);
2138 };
2139 }
2140 else {
2141 do {
2142 if (regtry(&reginfo, &s))
2143 goto got_it;
2144 } while (s++ < strend);
2145 }
2146 }
2147
2148 /* Failure. */
2149 goto phooey;
2150
2151got_it:
2152 Safefree(swap);
2153 RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2154
2155 if (PL_reg_eval_set)
2156 restore_pos(aTHX_ prog);
2157 if (RXp_PAREN_NAMES(prog))
2158 (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2159
2160 /* make sure $`, $&, $', and $digit will work later */
2161 if ( !(flags & REXEC_NOT_FIRST) ) {
2162 RX_MATCH_COPY_FREE(rx);
2163 if (flags & REXEC_COPY_STR) {
2164 const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2165#ifdef PERL_OLD_COPY_ON_WRITE
2166 if ((SvIsCOW(sv)
2167 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2168 if (DEBUG_C_TEST) {
2169 PerlIO_printf(Perl_debug_log,
2170 "Copy on write: regexp capture, type %d\n",
2171 (int) SvTYPE(sv));
2172 }
2173 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2174 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2175 assert (SvPOKp(prog->saved_copy));
2176 } else
2177#endif
2178 {
2179 RX_MATCH_COPIED_on(rx);
2180 s = savepvn(strbeg, i);
2181 prog->subbeg = s;
2182 }
2183 prog->sublen = i;
2184 }
2185 else {
2186 prog->subbeg = strbeg;
2187 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2188 }
2189 }
2190
2191 return 1;
2192
2193phooey:
2194 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2195 PL_colors[4], PL_colors[5]));
2196 if (PL_reg_eval_set)
2197 restore_pos(aTHX_ prog);
2198 if (swap) {
2199 /* we failed :-( roll it back */
2200 Safefree(prog->offs);
2201 prog->offs = swap;
2202 }
2203
2204 return 0;
2205}
2206
2207
2208/*
2209 - regtry - try match at specific point
2210 */
2211STATIC I32 /* 0 failure, 1 success */
2212S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2213{
2214 dVAR;
2215 CHECKPOINT lastcp;
2216 REGEXP *const rx = reginfo->prog;
2217 regexp *const prog = (struct regexp *)SvANY(rx);
2218 RXi_GET_DECL(prog,progi);
2219 GET_RE_DEBUG_FLAGS_DECL;
2220
2221 PERL_ARGS_ASSERT_REGTRY;
2222
2223 reginfo->cutpoint=NULL;
2224
2225 if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2226 MAGIC *mg;
2227
2228 PL_reg_eval_set = RS_init;
2229 DEBUG_EXECUTE_r(DEBUG_s(
2230 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2231 (IV)(PL_stack_sp - PL_stack_base));
2232 ));
2233 SAVESTACK_CXPOS();
2234 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2235 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2236 SAVETMPS;
2237 /* Apparently this is not needed, judging by wantarray. */
2238 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2239 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2240
2241 if (reginfo->sv) {
2242 /* Make $_ available to executed code. */
2243 if (reginfo->sv != DEFSV) {
2244 SAVE_DEFSV;
2245 DEFSV_set(reginfo->sv);
2246 }
2247
2248 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2249 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2250 /* prepare for quick setting of pos */
2251#ifdef PERL_OLD_COPY_ON_WRITE
2252 if (SvIsCOW(reginfo->sv))
2253 sv_force_normal_flags(reginfo->sv, 0);
2254#endif
2255 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2256 &PL_vtbl_mglob, NULL, 0);
2257 mg->mg_len = -1;
2258 }
2259 PL_reg_magic = mg;
2260 PL_reg_oldpos = mg->mg_len;
2261 SAVEDESTRUCTOR_X(restore_pos, prog);
2262 }
2263 if (!PL_reg_curpm) {
2264 Newxz(PL_reg_curpm, 1, PMOP);
2265#ifdef USE_ITHREADS
2266 {
2267 SV* const repointer = &PL_sv_undef;
2268 /* this regexp is also owned by the new PL_reg_curpm, which
2269 will try to free it. */
2270 av_push(PL_regex_padav, repointer);
2271 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2272 PL_regex_pad = AvARRAY(PL_regex_padav);
2273 }
2274#endif
2275 }
2276#ifdef USE_ITHREADS
2277 /* It seems that non-ithreads works both with and without this code.
2278 So for efficiency reasons it seems best not to have the code
2279 compiled when it is not needed. */
2280 /* This is safe against NULLs: */
2281 ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2282 /* PM_reg_curpm owns a reference to this regexp. */
2283 ReREFCNT_inc(rx);
2284#endif
2285 PM_SETRE(PL_reg_curpm, rx);
2286 PL_reg_oldcurpm = PL_curpm;
2287 PL_curpm = PL_reg_curpm;
2288 if (RXp_MATCH_COPIED(prog)) {
2289 /* Here is a serious problem: we cannot rewrite subbeg,
2290 since it may be needed if this match fails. Thus
2291 $` inside (?{}) could fail... */
2292 PL_reg_oldsaved = prog->subbeg;
2293 PL_reg_oldsavedlen = prog->sublen;
2294#ifdef PERL_OLD_COPY_ON_WRITE
2295 PL_nrs = prog->saved_copy;
2296#endif
2297 RXp_MATCH_COPIED_off(prog);
2298 }
2299 else
2300 PL_reg_oldsaved = NULL;
2301 prog->subbeg = PL_bostr;
2302 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2303 }
2304 DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2305 prog->offs[0].start = *startpos - PL_bostr;
2306 PL_reginput = *startpos;
2307 PL_reglastparen = &prog->lastparen;
2308 PL_reglastcloseparen = &prog->lastcloseparen;
2309 prog->lastparen = 0;
2310 prog->lastcloseparen = 0;
2311 PL_regsize = 0;
2312 PL_regoffs = prog->offs;
2313 if (PL_reg_start_tmpl <= prog->nparens) {
2314 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2315 if(PL_reg_start_tmp)
2316 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2317 else
2318 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2319 }
2320
2321 /* XXXX What this code is doing here?!!! There should be no need
2322 to do this again and again, PL_reglastparen should take care of
2323 this! --ilya*/
2324
2325 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2326 * Actually, the code in regcppop() (which Ilya may be meaning by
2327 * PL_reglastparen), is not needed at all by the test suite
2328 * (op/regexp, op/pat, op/split), but that code is needed otherwise
2329 * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2330 * Meanwhile, this code *is* needed for the
2331 * above-mentioned test suite tests to succeed. The common theme
2332 * on those tests seems to be returning null fields from matches.
2333 * --jhi updated by dapm */
2334#if 1
2335 if (prog->nparens) {
2336 regexp_paren_pair *pp = PL_regoffs;
2337 register I32 i;
2338 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2339 ++pp;
2340 pp->start = -1;
2341 pp->end = -1;
2342 }
2343 }
2344#endif
2345 REGCP_SET(lastcp);
2346 if (regmatch(reginfo, progi->program + 1)) {
2347 PL_regoffs[0].end = PL_reginput - PL_bostr;
2348 return 1;
2349 }
2350 if (reginfo->cutpoint)
2351 *startpos= reginfo->cutpoint;
2352 REGCP_UNWIND(lastcp);
2353 return 0;
2354}
2355
2356
2357#define sayYES goto yes
2358#define sayNO goto no
2359#define sayNO_SILENT goto no_silent
2360
2361/* we dont use STMT_START/END here because it leads to
2362 "unreachable code" warnings, which are bogus, but distracting. */
2363#define CACHEsayNO \
2364 if (ST.cache_mask) \
2365 PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2366 sayNO
2367
2368/* this is used to determine how far from the left messages like
2369 'failed...' are printed. It should be set such that messages
2370 are inline with the regop output that created them.
2371*/
2372#define REPORT_CODE_OFF 32
2373
2374
2375/* Make sure there is a test for this +1 options in re_tests */
2376#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2377
2378#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2379#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
2380
2381#define SLAB_FIRST(s) (&(s)->states[0])
2382#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2383
2384/* grab a new slab and return the first slot in it */
2385
2386STATIC regmatch_state *
2387S_push_slab(pTHX)
2388{
2389#if PERL_VERSION < 9 && !defined(PERL_CORE)
2390 dMY_CXT;
2391#endif
2392 regmatch_slab *s = PL_regmatch_slab->next;
2393 if (!s) {
2394 Newx(s, 1, regmatch_slab);
2395 s->prev = PL_regmatch_slab;
2396 s->next = NULL;
2397 PL_regmatch_slab->next = s;
2398 }
2399 PL_regmatch_slab = s;
2400 return SLAB_FIRST(s);
2401}
2402
2403
2404/* push a new state then goto it */
2405
2406#define PUSH_STATE_GOTO(state, node) \
2407 scan = node; \
2408 st->resume_state = state; \
2409 goto push_state;
2410
2411/* push a new state with success backtracking, then goto it */
2412
2413#define PUSH_YES_STATE_GOTO(state, node) \
2414 scan = node; \
2415 st->resume_state = state; \
2416 goto push_yes_state;
2417
2418
2419
2420/*
2421
2422regmatch() - main matching routine
2423
2424This is basically one big switch statement in a loop. We execute an op,
2425set 'next' to point the next op, and continue. If we come to a point which
2426we may need to backtrack to on failure such as (A|B|C), we push a
2427backtrack state onto the backtrack stack. On failure, we pop the top
2428state, and re-enter the loop at the state indicated. If there are no more
2429states to pop, we return failure.
2430
2431Sometimes we also need to backtrack on success; for example /A+/, where
2432after successfully matching one A, we need to go back and try to
2433match another one; similarly for lookahead assertions: if the assertion
2434completes successfully, we backtrack to the state just before the assertion
2435and then carry on. In these cases, the pushed state is marked as
2436'backtrack on success too'. This marking is in fact done by a chain of
2437pointers, each pointing to the previous 'yes' state. On success, we pop to
2438the nearest yes state, discarding any intermediate failure-only states.
2439Sometimes a yes state is pushed just to force some cleanup code to be
2440called at the end of a successful match or submatch; e.g. (??{$re}) uses
2441it to free the inner regex.
2442
2443Note that failure backtracking rewinds the cursor position, while
2444success backtracking leaves it alone.
2445
2446A pattern is complete when the END op is executed, while a subpattern
2447such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2448ops trigger the "pop to last yes state if any, otherwise return true"
2449behaviour.
2450
2451A common convention in this function is to use A and B to refer to the two
2452subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2453the subpattern to be matched possibly multiple times, while B is the entire
2454rest of the pattern. Variable and state names reflect this convention.
2455
2456The states in the main switch are the union of ops and failure/success of
2457substates associated with with that op. For example, IFMATCH is the op
2458that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2459'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2460successfully matched A and IFMATCH_A_fail is a state saying that we have
2461just failed to match A. Resume states always come in pairs. The backtrack
2462state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2463at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2464on success or failure.
2465
2466The struct that holds a backtracking state is actually a big union, with
2467one variant for each major type of op. The variable st points to the
2468top-most backtrack struct. To make the code clearer, within each
2469block of code we #define ST to alias the relevant union.
2470
2471Here's a concrete example of a (vastly oversimplified) IFMATCH
2472implementation:
2473
2474 switch (state) {
2475 ....
2476
2477#define ST st->u.ifmatch
2478
2479 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2480 ST.foo = ...; // some state we wish to save
2481 ...
2482 // push a yes backtrack state with a resume value of
2483 // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2484 // first node of A:
2485 PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2486 // NOTREACHED
2487
2488 case IFMATCH_A: // we have successfully executed A; now continue with B
2489 next = B;
2490 bar = ST.foo; // do something with the preserved value
2491 break;
2492
2493 case IFMATCH_A_fail: // A failed, so the assertion failed
2494 ...; // do some housekeeping, then ...
2495 sayNO; // propagate the failure
2496
2497#undef ST
2498
2499 ...
2500 }
2501
2502For any old-timers reading this who are familiar with the old recursive
2503approach, the code above is equivalent to:
2504
2505 case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2506 {
2507 int foo = ...
2508 ...
2509 if (regmatch(A)) {
2510 next = B;
2511 bar = foo;
2512 break;
2513 }
2514 ...; // do some housekeeping, then ...
2515 sayNO; // propagate the failure
2516 }
2517
2518The topmost backtrack state, pointed to by st, is usually free. If you
2519want to claim it, populate any ST.foo fields in it with values you wish to
2520save, then do one of
2521
2522 PUSH_STATE_GOTO(resume_state, node);
2523 PUSH_YES_STATE_GOTO(resume_state, node);
2524
2525which sets that backtrack state's resume value to 'resume_state', pushes a
2526new free entry to the top of the backtrack stack, then goes to 'node'.
2527On backtracking, the free slot is popped, and the saved state becomes the
2528new free state. An ST.foo field in this new top state can be temporarily
2529accessed to retrieve values, but once the main loop is re-entered, it
2530becomes available for reuse.
2531
2532Note that the depth of the backtrack stack constantly increases during the
2533left-to-right execution of the pattern, rather than going up and down with
2534the pattern nesting. For example the stack is at its maximum at Z at the
2535end of the pattern, rather than at X in the following:
2536
2537 /(((X)+)+)+....(Y)+....Z/
2538
2539The only exceptions to this are lookahead/behind assertions and the cut,
2540(?>A), which pop all the backtrack states associated with A before
2541continuing.
2542
2543Bascktrack state structs are allocated in slabs of about 4K in size.
2544PL_regmatch_state and st always point to the currently active state,
2545and PL_regmatch_slab points to the slab currently containing
2546PL_regmatch_state. The first time regmatch() is called, the first slab is
2547allocated, and is never freed until interpreter destruction. When the slab
2548is full, a new one is allocated and chained to the end. At exit from
2549regmatch(), slabs allocated since entry are freed.
2550
2551*/
2552
2553
2554#define DEBUG_STATE_pp(pp) \
2555 DEBUG_STATE_r({ \
2556 DUMP_EXEC_POS(locinput, scan, do_utf8); \
2557 PerlIO_printf(Perl_debug_log, \
2558 " %*s"pp" %s%s%s%s%s\n", \
2559 depth*2, "", \
2560 PL_reg_name[st->resume_state], \
2561 ((st==yes_state||st==mark_state) ? "[" : ""), \
2562 ((st==yes_state) ? "Y" : ""), \
2563 ((st==mark_state) ? "M" : ""), \
2564 ((st==yes_state||st==mark_state) ? "]" : "") \
2565 ); \
2566 });
2567
2568
2569#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2570
2571#ifdef DEBUGGING
2572
2573STATIC void
2574S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2575 const char *start, const char *end, const char *blurb)
2576{
2577 const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2578
2579 PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2580
2581 if (!PL_colorset)
2582 reginitcolors();
2583 {
2584 RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2585 RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2586
2587 RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2588 start, end - start, 60);
2589
2590 PerlIO_printf(Perl_debug_log,
2591 "%s%s REx%s %s against %s\n",
2592 PL_colors[4], blurb, PL_colors[5], s0, s1);
2593
2594 if (do_utf8||utf8_pat)
2595 PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2596 utf8_pat ? "pattern" : "",
2597 utf8_pat && do_utf8 ? " and " : "",
2598 do_utf8 ? "string" : ""
2599 );
2600 }
2601}
2602
2603STATIC void
2604S_dump_exec_pos(pTHX_ const char *locinput,
2605 const regnode *scan,
2606 const char *loc_regeol,
2607 const char *loc_bostr,
2608 const char *loc_reg_starttry,
2609 const bool do_utf8)
2610{
2611 const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2612 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2613 int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2614 /* The part of the string before starttry has one color
2615 (pref0_len chars), between starttry and current
2616 position another one (pref_len - pref0_len chars),
2617 after the current position the third one.
2618 We assume that pref0_len <= pref_len, otherwise we
2619 decrease pref0_len. */
2620 int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2621 ? (5 + taill) - l : locinput - loc_bostr;
2622 int pref0_len;
2623
2624 PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2625
2626 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2627 pref_len++;
2628 pref0_len = pref_len - (locinput - loc_reg_starttry);
2629 if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2630 l = ( loc_regeol - locinput > (5 + taill) - pref_len
2631 ? (5 + taill) - pref_len : loc_regeol - locinput);
2632 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2633 l--;
2634 if (pref0_len < 0)
2635 pref0_len = 0;
2636 if (pref0_len > pref_len)
2637 pref0_len = pref_len;
2638 {
2639 const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2640
2641 RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2642 (locinput - pref_len),pref0_len, 60, 4, 5);
2643
2644 RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2645 (locinput - pref_len + pref0_len),
2646 pref_len - pref0_len, 60, 2, 3);
2647
2648 RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2649 locinput, loc_regeol - locinput, 10, 0, 1);
2650
2651 const STRLEN tlen=len0+len1+len2;
2652 PerlIO_printf(Perl_debug_log,
2653 "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2654 (IV)(locinput - loc_bostr),
2655 len0, s0,
2656 len1, s1,
2657 (docolor ? "" : "> <"),
2658 len2, s2,
2659 (int)(tlen > 19 ? 0 : 19 - tlen),
2660 "");
2661 }
2662}
2663
2664#endif
2665
2666/* reg_check_named_buff_matched()
2667 * Checks to see if a named buffer has matched. The data array of
2668 * buffer numbers corresponding to the buffer is expected to reside
2669 * in the regexp->data->data array in the slot stored in the ARG() of
2670 * node involved. Note that this routine doesn't actually care about the
2671 * name, that information is not preserved from compilation to execution.
2672 * Returns the index of the leftmost defined buffer with the given name
2673 * or 0 if non of the buffers matched.
2674 */
2675STATIC I32
2676S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2677{
2678 I32 n;
2679 RXi_GET_DECL(rex,rexi);
2680 SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2681 I32 *nums=(I32*)SvPVX(sv_dat);
2682
2683 PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2684
2685 for ( n=0; n<SvIVX(sv_dat); n++ ) {
2686 if ((I32)*PL_reglastparen >= nums[n] &&
2687 PL_regoffs[nums[n]].end != -1)
2688 {
2689 return nums[n];
2690 }
2691 }
2692 return 0;
2693}
2694
2695
2696/* free all slabs above current one - called during LEAVE_SCOPE */
2697
2698STATIC void
2699S_clear_backtrack_stack(pTHX_ void *p)
2700{
2701 regmatch_slab *s = PL_regmatch_slab->next;
2702 PERL_UNUSED_ARG(p);
2703
2704 if (!s)
2705 return;
2706 PL_regmatch_slab->next = NULL;
2707 while (s) {
2708 regmatch_slab * const osl = s;
2709 s = s->next;
2710 Safefree(osl);
2711 }
2712}
2713
2714
2715#define SETREX(Re1,Re2) \
2716 if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2717 Re1 = (Re2)
2718
2719STATIC I32 /* 0 failure, 1 success */
2720S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2721{
2722#if PERL_VERSION < 9 && !defined(PERL_CORE)
2723 dMY_CXT;
2724#endif
2725 dVAR;
2726 register const bool do_utf8 = PL_reg_match_utf8;
2727 const U32 uniflags = UTF8_ALLOW_DEFAULT;
2728 REGEXP *rex_sv = reginfo->prog;
2729 regexp *rex = (struct regexp *)SvANY(rex_sv);
2730 RXi_GET_DECL(rex,rexi);
2731 I32 oldsave;
2732 /* the current state. This is a cached copy of PL_regmatch_state */
2733 register regmatch_state *st;
2734 /* cache heavy used fields of st in registers */
2735 register regnode *scan;
2736 register regnode *next;
2737 register U32 n = 0; /* general value; init to avoid compiler warning */
2738 register I32 ln = 0; /* len or last; init to avoid compiler warning */
2739 register char *locinput = PL_reginput;
2740 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2741
2742 bool result = 0; /* return value of S_regmatch */
2743 int depth = 0; /* depth of backtrack stack */
2744 U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2745 const U32 max_nochange_depth =
2746 (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2747 3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2748 regmatch_state *yes_state = NULL; /* state to pop to on success of
2749 subpattern */
2750 /* mark_state piggy backs on the yes_state logic so that when we unwind
2751 the stack on success we can update the mark_state as we go */
2752 regmatch_state *mark_state = NULL; /* last mark state we have seen */
2753 regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2754 struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
2755 U32 state_num;
2756 bool no_final = 0; /* prevent failure from backtracking? */
2757 bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
2758 char *startpoint = PL_reginput;
2759 SV *popmark = NULL; /* are we looking for a mark? */
2760 SV *sv_commit = NULL; /* last mark name seen in failure */
2761 SV *sv_yes_mark = NULL; /* last mark name we have seen
2762 during a successfull match */
2763 U32 lastopen = 0; /* last open we saw */
2764 bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2765 SV* const oreplsv = GvSV(PL_replgv);
2766 /* these three flags are set by various ops to signal information to
2767 * the very next op. They have a useful lifetime of exactly one loop
2768 * iteration, and are not preserved or restored by state pushes/pops
2769 */
2770 bool sw = 0; /* the condition value in (?(cond)a|b) */
2771 bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
2772 int logical = 0; /* the following EVAL is:
2773 0: (?{...})
2774 1: (?(?{...})X|Y)
2775 2: (??{...})
2776 or the following IFMATCH/UNLESSM is:
2777 false: plain (?=foo)
2778 true: used as a condition: (?(?=foo))
2779 */
2780#ifdef DEBUGGING
2781 GET_RE_DEBUG_FLAGS_DECL;
2782#endif
2783
2784 PERL_ARGS_ASSERT_REGMATCH;
2785
2786 DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2787 PerlIO_printf(Perl_debug_log,"regmatch start\n");
2788 }));
2789 /* on first ever call to regmatch, allocate first slab */
2790 if (!PL_regmatch_slab) {
2791 Newx(PL_regmatch_slab, 1, regmatch_slab);
2792 PL_regmatch_slab->prev = NULL;
2793 PL_regmatch_slab->next = NULL;
2794 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2795 }
2796
2797 oldsave = PL_savestack_ix;
2798 SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2799 SAVEVPTR(PL_regmatch_slab);
2800 SAVEVPTR(PL_regmatch_state);
2801
2802 /* grab next free state slot */
2803 st = ++PL_regmatch_state;
2804 if (st > SLAB_LAST(PL_regmatch_slab))
2805 st = PL_regmatch_state = S_push_slab(aTHX);
2806
2807 /* Note that nextchr is a byte even in UTF */
2808 nextchr = UCHARAT(locinput);
2809 scan = prog;
2810 while (scan != NULL) {
2811
2812 DEBUG_EXECUTE_r( {
2813 SV * const prop = sv_newmortal();
2814 regnode *rnext=regnext(scan);
2815 DUMP_EXEC_POS( locinput, scan, do_utf8 );
2816 regprop(rex, prop, scan);
2817
2818 PerlIO_printf(Perl_debug_log,
2819 "%3"IVdf":%*s%s(%"IVdf")\n",
2820 (IV)(scan - rexi->program), depth*2, "",
2821 SvPVX_const(prop),
2822 (PL_regkind[OP(scan)] == END || !rnext) ?
2823 0 : (IV)(rnext - rexi->program));
2824 });
2825
2826 next = scan + NEXT_OFF(scan);
2827 if (next == scan)
2828 next = NULL;
2829 state_num = OP(scan);
2830
2831 reenter_switch:
2832
2833 assert(PL_reglastparen == &rex->lastparen);
2834 assert(PL_reglastcloseparen == &rex->lastcloseparen);
2835 assert(PL_regoffs == rex->offs);
2836
2837 switch (state_num) {
2838 case BOL:
2839 if (locinput == PL_bostr)
2840 {
2841 /* reginfo->till = reginfo->bol; */
2842 break;
2843 }
2844 sayNO;
2845 case MBOL:
2846 if (locinput == PL_bostr ||
2847 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2848 {
2849 break;
2850 }
2851 sayNO;
2852 case SBOL:
2853 if (locinput == PL_bostr)
2854 break;
2855 sayNO;
2856 case GPOS:
2857 if (locinput == reginfo->ganch)
2858 break;
2859 sayNO;
2860
2861 case KEEPS:
2862 /* update the startpoint */
2863 st->u.keeper.val = PL_regoffs[0].start;
2864 PL_reginput = locinput;
2865 PL_regoffs[0].start = locinput - PL_bostr;
2866 PUSH_STATE_GOTO(KEEPS_next, next);
2867 /*NOT-REACHED*/
2868 case KEEPS_next_fail:
2869 /* rollback the start point change */
2870 PL_regoffs[0].start = st->u.keeper.val;
2871 sayNO_SILENT;
2872 /*NOT-REACHED*/
2873 case EOL:
2874 goto seol;
2875 case MEOL:
2876 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2877 sayNO;
2878 break;
2879 case SEOL:
2880 seol:
2881 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2882 sayNO;
2883 if (PL_regeol - locinput > 1)
2884 sayNO;
2885 break;
2886 case EOS:
2887 if (PL_regeol != locinput)
2888 sayNO;
2889 break;
2890 case SANY:
2891 if (!nextchr && locinput >= PL_regeol)
2892 sayNO;
2893 if (do_utf8) {
2894 locinput += PL_utf8skip[nextchr];
2895 if (locinput > PL_regeol)
2896 sayNO;
2897 nextchr = UCHARAT(locinput);
2898 }
2899 else
2900 nextchr = UCHARAT(++locinput);
2901 break;
2902 case CANY:
2903 if (!nextchr && locinput >= PL_regeol)
2904 sayNO;
2905 nextchr = UCHARAT(++locinput);
2906 break;
2907 case REG_ANY:
2908 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2909 sayNO;
2910 if (do_utf8) {
2911 locinput += PL_utf8skip[nextchr];
2912 if (locinput > PL_regeol)
2913 sayNO;
2914 nextchr = UCHARAT(locinput);
2915 }
2916 else
2917 nextchr = UCHARAT(++locinput);
2918 break;
2919
2920#undef ST
2921#define ST st->u.trie
2922 case TRIEC:
2923 /* In this case the charclass data is available inline so
2924 we can fail fast without a lot of extra overhead.
2925 */
2926 if (scan->flags == EXACT || !do_utf8) {
2927 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2928 DEBUG_EXECUTE_r(
2929 PerlIO_printf(Perl_debug_log,
2930 "%*s %sfailed to match trie start class...%s\n",
2931 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2932 );
2933 sayNO_SILENT;
2934 /* NOTREACHED */
2935 }
2936 }
2937 /* FALL THROUGH */
2938 case TRIE:
2939 {
2940 /* what type of TRIE am I? (utf8 makes this contextual) */
2941 DECL_TRIE_TYPE(scan);
2942
2943 /* what trie are we using right now */
2944 reg_trie_data * const trie
2945 = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2946 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2947 U32 state = trie->startstate;
2948
2949 if (trie->bitmap && trie_type != trie_utf8_fold &&
2950 !TRIE_BITMAP_TEST(trie,*locinput)
2951 ) {
2952 if (trie->states[ state ].wordnum) {
2953 DEBUG_EXECUTE_r(
2954 PerlIO_printf(Perl_debug_log,
2955 "%*s %smatched empty string...%s\n",
2956 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2957 );
2958 break;
2959 } else {
2960 DEBUG_EXECUTE_r(
2961 PerlIO_printf(Perl_debug_log,
2962 "%*s %sfailed to match trie start class...%s\n",
2963 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2964 );
2965 sayNO_SILENT;
2966 }
2967 }
2968
2969 {
2970 U8 *uc = ( U8* )locinput;
2971
2972 STRLEN len = 0;
2973 STRLEN foldlen = 0;
2974 U8 *uscan = (U8*)NULL;
2975 STRLEN bufflen=0;
2976 SV *sv_accept_buff = NULL;
2977 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2978
2979 ST.accepted = 0; /* how many accepting states we have seen */
2980 ST.B = next;
2981 ST.jump = trie->jump;
2982 ST.me = scan;
2983 /*
2984 traverse the TRIE keeping track of all accepting states
2985 we transition through until we get to a failing node.
2986 */
2987
2988 while ( state && uc <= (U8*)PL_regeol ) {
2989 U32 base = trie->states[ state ].trans.base;
2990 UV uvc = 0;
2991 U16 charid;
2992 /* We use charid to hold the wordnum as we don't use it
2993 for charid until after we have done the wordnum logic.
2994 We define an alias just so that the wordnum logic reads
2995 more naturally. */
2996
2997#define got_wordnum charid
2998 got_wordnum = trie->states[ state ].wordnum;
2999
3000 if ( got_wordnum ) {
3001 if ( ! ST.accepted ) {
3002 ENTER;
3003 SAVETMPS; /* XXX is this necessary? dmq */
3004 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3005 sv_accept_buff=newSV(bufflen *
3006 sizeof(reg_trie_accepted) - 1);
3007 SvCUR_set(sv_accept_buff, 0);
3008 SvPOK_on(sv_accept_buff);
3009 sv_2mortal(sv_accept_buff);
3010 SAVETMPS;
3011 ST.accept_buff =
3012 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3013 }
3014 do {
3015 if (ST.accepted >= bufflen) {
3016 bufflen *= 2;
3017 ST.accept_buff =(reg_trie_accepted*)
3018 SvGROW(sv_accept_buff,
3019 bufflen * sizeof(reg_trie_accepted));
3020 }
3021 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3022 + sizeof(reg_trie_accepted));
3023
3024
3025 ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3026 ST.accept_buff[ST.accepted].endpos = uc;
3027 ++ST.accepted;
3028 } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3029 }
3030#undef got_wordnum
3031
3032 DEBUG_TRIE_EXECUTE_r({
3033 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3034 PerlIO_printf( Perl_debug_log,
3035 "%*s %sState: %4"UVxf" Accepted: %4"UVxf" ",
3036 2+depth * 2, "", PL_colors[4],
3037 (UV)state, (UV)ST.accepted );
3038 });
3039
3040 if ( base ) {
3041 REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3042 uscan, len, uvc, charid, foldlen,
3043 foldbuf, uniflags);
3044
3045 if (charid &&
3046 (base + charid > trie->uniquecharcount )
3047 && (base + charid - 1 - trie->uniquecharcount
3048 < trie->lasttrans)
3049 && trie->trans[base + charid - 1 -
3050 trie->uniquecharcount].check == state)
3051 {
3052 state = trie->trans[base + charid - 1 -
3053 trie->uniquecharcount ].next;
3054 }
3055 else {
3056 state = 0;
3057 }
3058 uc += len;
3059
3060 }
3061 else {
3062 state = 0;
3063 }
3064 DEBUG_TRIE_EXECUTE_r(
3065 PerlIO_printf( Perl_debug_log,
3066 "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3067 charid, uvc, (UV)state, PL_colors[5] );
3068 );
3069 }
3070 if (!ST.accepted )
3071 sayNO;
3072
3073 DEBUG_EXECUTE_r(
3074 PerlIO_printf( Perl_debug_log,
3075 "%*s %sgot %"IVdf" possible matches%s\n",
3076 REPORT_CODE_OFF + depth * 2, "",
3077 PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3078 );
3079 }}
3080 goto trie_first_try; /* jump into the fail handler */
3081 /* NOTREACHED */
3082 case TRIE_next_fail: /* we failed - try next alterative */
3083 if ( ST.jump) {
3084 REGCP_UNWIND(ST.cp);
3085 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3086 PL_regoffs[n].end = -1;
3087 *PL_reglastparen = n;
3088 }
3089 trie_first_try:
3090 if (do_cutgroup) {
3091 do_cutgroup = 0;
3092 no_final = 0;
3093 }
3094
3095 if ( ST.jump) {
3096 ST.lastparen = *PL_reglastparen;
3097 REGCP_SET(ST.cp);
3098 }
3099 if ( ST.accepted == 1 ) {
3100 /* only one choice left - just continue */
3101 DEBUG_EXECUTE_r({
3102 AV *const trie_words
3103 = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3104 SV ** const tmp = av_fetch( trie_words,
3105 ST.accept_buff[ 0 ].wordnum-1, 0 );
3106 SV *sv= tmp ? sv_newmortal() : NULL;
3107
3108 PerlIO_printf( Perl_debug_log,
3109 "%*s %sonly one match left: #%d <%s>%s\n",
3110 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3111 ST.accept_buff[ 0 ].wordnum,
3112 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3113 PL_colors[0], PL_colors[1],
3114 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3115 )
3116 : "not compiled under -Dr",
3117 PL_colors[5] );
3118 });
3119 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3120 /* in this case we free tmps/leave before we call regmatch
3121 as we wont be using accept_buff again. */
3122
3123 locinput = PL_reginput;
3124 nextchr = UCHARAT(locinput);
3125 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3126 scan = ST.B;
3127 else
3128 scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3129 if (!has_cutgroup) {
3130 FREETMPS;
3131 LEAVE;
3132 } else {
3133 ST.accepted--;
3134 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3135 }
3136
3137 continue; /* execute rest of RE */
3138 }
3139
3140 if ( !ST.accepted-- ) {
3141 DEBUG_EXECUTE_r({
3142 PerlIO_printf( Perl_debug_log,
3143 "%*s %sTRIE failed...%s\n",
3144 REPORT_CODE_OFF+depth*2, "",
3145 PL_colors[4],
3146 PL_colors[5] );
3147 });
3148 FREETMPS;
3149 LEAVE;
3150 sayNO_SILENT;
3151 /*NOTREACHED*/
3152 }
3153
3154 /*
3155 There are at least two accepting states left. Presumably
3156 the number of accepting states is going to be low,
3157 typically two. So we simply scan through to find the one
3158 with lowest wordnum. Once we find it, we swap the last
3159 state into its place and decrement the size. We then try to
3160 match the rest of the pattern at the point where the word
3161 ends. If we succeed, control just continues along the
3162 regex; if we fail we return here to try the next accepting
3163 state
3164 */
3165
3166 {
3167 U32 best = 0;
3168 U32 cur;
3169 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3170 DEBUG_TRIE_EXECUTE_r(
3171 PerlIO_printf( Perl_debug_log,
3172 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3173 REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3174 (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3175 ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3176 );
3177
3178 if (ST.accept_buff[cur].wordnum <
3179 ST.accept_buff[best].wordnum)
3180 best = cur;
3181 }
3182
3183 DEBUG_EXECUTE_r({
3184 AV *const trie_words
3185 = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3186 SV ** const tmp = av_fetch( trie_words,
3187 ST.accept_buff[ best ].wordnum - 1, 0 );
3188 regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3189 ST.B :
3190 ST.me + ST.jump[ST.accept_buff[best].wordnum];
3191 SV *sv= tmp ? sv_newmortal() : NULL;
3192
3193 PerlIO_printf( Perl_debug_log,
3194 "%*s %strying alternation #%d <%s> at node #%d %s\n",
3195 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3196 ST.accept_buff[best].wordnum,
3197 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3198 PL_colors[0], PL_colors[1],
3199 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3200 ) : "not compiled under -Dr",
3201 REG_NODE_NUM(nextop),
3202 PL_colors[5] );
3203 });
3204
3205 if ( best<ST.accepted ) {
3206 reg_trie_accepted tmp = ST.accept_buff[ best ];
3207 ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3208 ST.accept_buff[ ST.accepted ] = tmp;
3209 best = ST.accepted;
3210 }
3211 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3212 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3213 scan = ST.B;
3214 } else {
3215 scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3216 }
3217 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3218 /* NOTREACHED */
3219 }
3220 /* NOTREACHED */
3221 case TRIE_next:
3222 /* we dont want to throw this away, see bug 57042*/
3223 if (oreplsv != GvSV(PL_replgv))
3224 sv_setsv(oreplsv, GvSV(PL_replgv));
3225 FREETMPS;
3226 LEAVE;
3227 sayYES;
3228#undef ST
3229
3230 case EXACT: {
3231 char *s = STRING(scan);
3232 ln = STR_LEN(scan);
3233 if (do_utf8 != UTF) {
3234 /* The target and the pattern have differing utf8ness. */
3235 char *l = locinput;
3236 const char * const e = s + ln;
3237
3238 if (do_utf8) {
3239 /* The target is utf8, the pattern is not utf8. */
3240 while (s < e) {
3241 STRLEN ulen;
3242 if (l >= PL_regeol)
3243 sayNO;
3244 if (NATIVE_TO_UNI(*(U8*)s) !=
3245 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3246 uniflags))
3247 sayNO;
3248 l += ulen;
3249 s ++;
3250 }
3251 }
3252 else {
3253 /* The target is not utf8, the pattern is utf8. */
3254 while (s < e) {
3255 STRLEN ulen;
3256 if (l >= PL_regeol)
3257 sayNO;
3258 if (NATIVE_TO_UNI(*((U8*)l)) !=
3259 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3260 uniflags))
3261 sayNO;
3262 s += ulen;
3263 l ++;
3264 }
3265 }
3266 locinput = l;
3267 nextchr = UCHARAT(locinput);
3268 break;
3269 }
3270 /* The target and the pattern have the same utf8ness. */
3271 /* Inline the first character, for speed. */
3272 if (UCHARAT(s) != nextchr)
3273 sayNO;
3274 if (PL_regeol - locinput < ln)
3275 sayNO;
3276 if (ln > 1 && memNE(s, locinput, ln))
3277 sayNO;
3278 locinput += ln;
3279 nextchr = UCHARAT(locinput);
3280 break;
3281 }
3282 case EXACTFL:
3283 PL_reg_flags |= RF_tainted;
3284 /* FALL THROUGH */
3285 case EXACTF: {
3286 char * const s = STRING(scan);
3287 ln = STR_LEN(scan);
3288
3289 if (do_utf8 || UTF) {
3290 /* Either target or the pattern are utf8. */
3291 const char * const l = locinput;
3292 char *e = PL_regeol;
3293
3294 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
3295 l, &e, 0, do_utf8)) {
3296 /* One more case for the sharp s:
3297 * pack("U0U*", 0xDF) =~ /ss/i,
3298 * the 0xC3 0x9F are the UTF-8
3299 * byte sequence for the U+00DF. */
3300
3301 if (!(do_utf8 &&
3302 toLOWER(s[0]) == 's' &&
3303 ln >= 2 &&
3304 toLOWER(s[1]) == 's' &&
3305 (U8)l[0] == 0xC3 &&
3306 e - l >= 2 &&
3307 (U8)l[1] == 0x9F))
3308 sayNO;
3309 }
3310 locinput = e;
3311 nextchr = UCHARAT(locinput);
3312 break;
3313 }
3314
3315 /* Neither the target and the pattern are utf8. */
3316
3317 /* Inline the first character, for speed. */
3318 if (UCHARAT(s) != nextchr &&
3319 UCHARAT(s) != ((OP(scan) == EXACTF)
3320 ? PL_fold : PL_fold_locale)[nextchr])
3321 sayNO;
3322 if (PL_regeol - locinput < ln)
3323 sayNO;
3324 if (ln > 1 && (OP(scan) == EXACTF
3325 ? ibcmp(s, locinput, ln)
3326 : ibcmp_locale(s, locinput, ln)))
3327 sayNO;
3328 locinput += ln;
3329 nextchr = UCHARAT(locinput);
3330 break;
3331 }
3332 case ANYOF:
3333 if (do_utf8) {
3334 STRLEN inclasslen = PL_regeol - locinput;
3335
3336 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3337 goto anyof_fail;
3338 if (locinput >= PL_regeol)
3339 sayNO;
3340 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3341 nextchr = UCHARAT(locinput);
3342 break;
3343 }
3344 else {
3345 if (nextchr < 0)
3346 nextchr = UCHARAT(locinput);
3347 if (!REGINCLASS(rex, scan, (U8*)locinput))
3348 goto anyof_fail;
3349 if (!nextchr && locinput >= PL_regeol)
3350 sayNO;
3351 nextchr = UCHARAT(++locinput);
3352 break;
3353 }
3354 anyof_fail:
3355 /* If we might have the case of the German sharp s
3356 * in a casefolding Unicode character class. */
3357
3358 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3359 locinput += SHARP_S_SKIP;
3360 nextchr = UCHARAT(locinput);
3361 }
3362 else
3363 sayNO;
3364 break;
3365 case ALNUML:
3366 PL_reg_flags |= RF_tainted;
3367 /* FALL THROUGH */
3368 case ALNUM:
3369 if (!nextchr)
3370 sayNO;
3371 if (do_utf8) {
3372 LOAD_UTF8_CHARCLASS_ALNUM();
3373 if (!(OP(scan) == ALNUM
3374 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3375 : isALNUM_LC_utf8((U8*)locinput)))
3376 {
3377 sayNO;
3378 }
3379 locinput += PL_utf8skip[nextchr];
3380 nextchr = UCHARAT(locinput);
3381 break;
3382 }
3383 if (!(OP(scan) == ALNUM
3384 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3385 sayNO;
3386 nextchr = UCHARAT(++locinput);
3387 break;
3388 case NALNUML:
3389 PL_reg_flags |= RF_tainted;
3390 /* FALL THROUGH */
3391 case NALNUM:
3392 if (!nextchr && locinput >= PL_regeol)
3393 sayNO;
3394 if (do_utf8) {
3395 LOAD_UTF8_CHARCLASS_ALNUM();
3396 if (OP(scan) == NALNUM
3397 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3398 : isALNUM_LC_utf8((U8*)locinput))
3399 {
3400 sayNO;
3401 }
3402 locinput += PL_utf8skip[nextchr];
3403 nextchr = UCHARAT(locinput);
3404 break;
3405 }
3406 if (OP(scan) == NALNUM
3407 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3408 sayNO;
3409 nextchr = UCHARAT(++locinput);
3410 break;
3411 case BOUNDL:
3412 case NBOUNDL:
3413 PL_reg_flags |= RF_tainted;
3414 /* FALL THROUGH */
3415 case BOUND:
3416 case NBOUND:
3417 /* was last char in word? */
3418 if (do_utf8) {
3419 if (locinput == PL_bostr)
3420 ln = '\n';
3421 else {
3422 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3423
3424 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3425 }
3426 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3427 ln = isALNUM_uni(ln);
3428 LOAD_UTF8_CHARCLASS_ALNUM();
3429 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3430 }
3431 else {
3432 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3433 n = isALNUM_LC_utf8((U8*)locinput);
3434 }
3435 }
3436 else {
3437 ln = (locinput != PL_bostr) ?
3438 UCHARAT(locinput - 1) : '\n';
3439 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3440 ln = isALNUM(ln);
3441 n = isALNUM(nextchr);
3442 }
3443 else {
3444 ln = isALNUM_LC(ln);
3445 n = isALNUM_LC(nextchr);
3446 }
3447 }
3448 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3449 OP(scan) == BOUNDL))
3450 sayNO;
3451 break;
3452 case SPACEL:
3453 PL_reg_flags |= RF_tainted;
3454 /* FALL THROUGH */
3455 case SPACE:
3456 if (!nextchr)
3457 sayNO;
3458 if (do_utf8) {
3459 if (UTF8_IS_CONTINUED(nextchr)) {
3460 LOAD_UTF8_CHARCLASS_SPACE();
3461 if (!(OP(scan) == SPACE
3462 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3463 : isSPACE_LC_utf8((U8*)locinput)))
3464 {
3465 sayNO;
3466 }
3467 locinput += PL_utf8skip[nextchr];
3468 nextchr = UCHARAT(locinput);
3469 break;
3470 }
3471 if (!(OP(scan) == SPACE
3472 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3473 sayNO;
3474 nextchr = UCHARAT(++locinput);
3475 }
3476 else {
3477 if (!(OP(scan) == SPACE
3478 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3479 sayNO;
3480 nextchr = UCHARAT(++locinput);
3481 }
3482 break;
3483 case NSPACEL:
3484 PL_reg_flags |= RF_tainted;
3485 /* FALL THROUGH */
3486 case NSPACE:
3487 if (!nextchr && locinput >= PL_regeol)
3488 sayNO;
3489 if (do_utf8) {
3490 LOAD_UTF8_CHARCLASS_SPACE();
3491 if (OP(scan) == NSPACE
3492 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3493 : isSPACE_LC_utf8((U8*)locinput))
3494 {
3495 sayNO;
3496 }
3497 locinput += PL_utf8skip[nextchr];
3498 nextchr = UCHARAT(locinput);
3499 break;
3500 }
3501 if (OP(scan) == NSPACE
3502 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3503 sayNO;
3504 nextchr = UCHARAT(++locinput);
3505 break;
3506 case DIGITL:
3507 PL_reg_flags |= RF_tainted;
3508 /* FALL THROUGH */
3509 case DIGIT:
3510 if (!nextchr)
3511 sayNO;
3512 if (do_utf8) {
3513 LOAD_UTF8_CHARCLASS_DIGIT();
3514 if (!(OP(scan) == DIGIT
3515 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3516 : isDIGIT_LC_utf8((U8*)locinput)))
3517 {
3518 sayNO;
3519 }
3520 locinput += PL_utf8skip[nextchr];
3521 nextchr = UCHARAT(locinput);
3522 break;
3523 }
3524 if (!(OP(scan) == DIGIT
3525 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3526 sayNO;
3527 nextchr = UCHARAT(++locinput);
3528 break;
3529 case NDIGITL:
3530 PL_reg_flags |= RF_tainted;
3531 /* FALL THROUGH */
3532 case NDIGIT:
3533 if (!nextchr && locinput >= PL_regeol)
3534 sayNO;
3535 if (do_utf8) {
3536 LOAD_UTF8_CHARCLASS_DIGIT();
3537 if (OP(scan) == NDIGIT
3538 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3539 : isDIGIT_LC_utf8((U8*)locinput))
3540 {
3541 sayNO;
3542 }
3543 locinput += PL_utf8skip[nextchr];
3544 nextchr = UCHARAT(locinput);
3545 break;
3546 }
3547 if (OP(scan) == NDIGIT
3548 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3549 sayNO;
3550 nextchr = UCHARAT(++locinput);
3551 break;
3552 case CLUMP:
3553 if (locinput >= PL_regeol)
3554 sayNO;
3555 if (do_utf8) {
3556 LOAD_UTF8_CHARCLASS_MARK();
3557 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3558 sayNO;
3559 locinput += PL_utf8skip[nextchr];
3560 while (locinput < PL_regeol &&
3561 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3562 locinput += UTF8SKIP(locinput);
3563 if (locinput > PL_regeol)
3564 sayNO;
3565 }
3566 else
3567 locinput++;
3568 nextchr = UCHARAT(locinput);
3569 break;
3570
3571 case NREFFL:
3572 {
3573 char *s;
3574 char type;
3575 PL_reg_flags |= RF_tainted;
3576 /* FALL THROUGH */
3577 case NREF:
3578 case NREFF:
3579 type = OP(scan);
3580 n = reg_check_named_buff_matched(rex,scan);
3581
3582 if ( n ) {
3583 type = REF + ( type - NREF );
3584 goto do_ref;
3585 } else {
3586 sayNO;
3587 }
3588 /* unreached */
3589 case REFFL:
3590 PL_reg_flags |= RF_tainted;
3591 /* FALL THROUGH */
3592 case REF:
3593 case REFF:
3594 n = ARG(scan); /* which paren pair */
3595 type = OP(scan);
3596 do_ref:
3597 ln = PL_regoffs[n].start;
3598 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3599 if (*PL_reglastparen < n || ln == -1)
3600 sayNO; /* Do not match unless seen CLOSEn. */
3601 if (ln == PL_regoffs[n].end)
3602 break;
3603
3604 s = PL_bostr + ln;
3605 if (do_utf8 && type != REF) { /* REF can do byte comparison */
3606 char *l = locinput;
3607 const char *e = PL_bostr + PL_regoffs[n].end;
3608 /*
3609 * Note that we can't do the "other character" lookup trick as
3610 * in the 8-bit case (no pun intended) because in Unicode we
3611 * have to map both upper and title case to lower case.
3612 */
3613 if (type == REFF) {
3614 while (s < e) {
3615 STRLEN ulen1, ulen2;
3616 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3617 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3618
3619 if (l >= PL_regeol)
3620 sayNO;
3621 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3622 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3623 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3624 sayNO;
3625 s += ulen1;
3626 l += ulen2;
3627 }
3628 }
3629 locinput = l;
3630 nextchr = UCHARAT(locinput);
3631 break;
3632 }
3633
3634 /* Inline the first character, for speed. */
3635 if (UCHARAT(s) != nextchr &&
3636 (type == REF ||
3637 (UCHARAT(s) != (type == REFF
3638 ? PL_fold : PL_fold_locale)[nextchr])))
3639 sayNO;
3640 ln = PL_regoffs[n].end - ln;
3641 if (locinput + ln > PL_regeol)
3642 sayNO;
3643 if (ln > 1 && (type == REF
3644 ? memNE(s, locinput, ln)
3645 : (type == REFF
3646 ? ibcmp(s, locinput, ln)
3647 : ibcmp_locale(s, locinput, ln))))
3648 sayNO;
3649 locinput += ln;
3650 nextchr = UCHARAT(locinput);
3651 break;
3652 }
3653 case NOTHING:
3654 case TAIL:
3655 break;
3656 case BACK:
3657 break;
3658
3659#undef ST
3660#define ST st->u.eval
3661 {
3662 SV *ret;
3663 REGEXP *re_sv;
3664 regexp *re;
3665 regexp_internal *rei;
3666 regnode *startpoint;
3667
3668 case GOSTART:
3669 case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
3670 if (cur_eval && cur_eval->locinput==locinput) {
3671 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3672 Perl_croak(aTHX_ "Infinite recursion in regex");
3673 if ( ++nochange_depth > max_nochange_depth )
3674 Perl_croak(aTHX_
3675 "Pattern subroutine nesting without pos change"
3676 " exceeded limit in regex");
3677 } else {
3678 nochange_depth = 0;
3679 }
3680 re_sv = rex_sv;
3681 re = rex;
3682 rei = rexi;
3683 (void)ReREFCNT_inc(rex_sv);
3684 if (OP(scan)==GOSUB) {
3685 startpoint = scan + ARG2L(scan);
3686 ST.close_paren = ARG(scan);
3687 } else {
3688 startpoint = rei->program+1;
3689 ST.close_paren = 0;
3690 }
3691 goto eval_recurse_doit;
3692 /* NOTREACHED */
3693 case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
3694 if (cur_eval && cur_eval->locinput==locinput) {
3695 if ( ++nochange_depth > max_nochange_depth )
3696 Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3697 } else {
3698 nochange_depth = 0;
3699 }
3700 {
3701 /* execute the code in the {...} */
3702 dSP;
3703 SV ** const before = SP;
3704 OP_4tree * const oop = PL_op;
3705 COP * const ocurcop = PL_curcop;
3706 PAD *old_comppad;
3707 char *saved_regeol = PL_regeol;
3708
3709 n = ARG(scan);
3710 PL_op = (OP_4tree*)rexi->data->data[n];
3711 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3712 " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3713 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3714 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3715
3716 if (sv_yes_mark) {
3717 SV *sv_mrk = get_sv("REGMARK", 1);
3718 sv_setsv(sv_mrk, sv_yes_mark);
3719 }
3720
3721 CALLRUNOPS(aTHX); /* Scalar context. */
3722 SPAGAIN;
3723 if (SP == before)
3724 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
3725 else {
3726 ret = POPs;
3727 PUTBACK;
3728 }
3729
3730 PL_op = oop;
3731 PAD_RESTORE_LOCAL(old_comppad);
3732 PL_curcop = ocurcop;
3733 PL_regeol = saved_regeol;
3734 if (!logical) {
3735 /* /(?{...})/ */
3736 sv_setsv(save_scalar(PL_replgv), ret);
3737 break;
3738 }
3739 }
3740 if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3741 logical = 0;
3742 {
3743 /* extract RE object from returned value; compiling if
3744 * necessary */
3745 MAGIC *mg = NULL;
3746 REGEXP *rx = NULL;
3747
3748 if (SvROK(ret)) {
3749 SV *const sv = SvRV(ret);
3750
3751 if (SvTYPE(sv) == SVt_REGEXP) {
3752 rx = (REGEXP*) sv;
3753 } else if (SvSMAGICAL(sv)) {
3754 mg = mg_find(sv, PERL_MAGIC_qr);
3755 assert(mg);
3756 }
3757 } else if (SvTYPE(ret) == SVt_REGEXP) {
3758 rx = (REGEXP*) ret;
3759 } else if (SvSMAGICAL(ret)) {
3760 if (SvGMAGICAL(ret)) {
3761 /* I don't believe that there is ever qr magic
3762 here. */
3763 assert(!mg_find(ret, PERL_MAGIC_qr));
3764 sv_unmagic(ret, PERL_MAGIC_qr);
3765 }
3766 else {
3767 mg = mg_find(ret, PERL_MAGIC_qr);
3768 /* testing suggests mg only ends up non-NULL for
3769 scalars who were upgraded and compiled in the
3770 else block below. In turn, this is only
3771 triggered in the "postponed utf8 string" tests
3772 in t/op/pat.t */
3773 }
3774 }
3775
3776 if (mg) {
3777 rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3778 assert(rx);
3779 }
3780 if (rx) {
3781 rx = reg_temp_copy(rx);
3782 }
3783 else {
3784 U32 pm_flags = 0;
3785 const I32 osize = PL_regsize;
3786
3787 if (DO_UTF8(ret)) {
3788 assert (SvUTF8(ret));
3789 } else if (SvUTF8(ret)) {
3790 /* Not doing UTF-8, despite what the SV says. Is
3791 this only if we're trapped in use 'bytes'? */
3792 /* Make a copy of the octet sequence, but without
3793 the flag on, as the compiler now honours the
3794 SvUTF8 flag on ret. */
3795 STRLEN len;
3796 const char *const p = SvPV(ret, len);
3797 ret = newSVpvn_flags(p, len, SVs_TEMP);
3798 }
3799 rx = CALLREGCOMP(ret, pm_flags);
3800 if (!(SvFLAGS(ret)
3801 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3802 | SVs_GMG))) {
3803 /* This isn't a first class regexp. Instead, it's
3804 caching a regexp onto an existing, Perl visible
3805 scalar. */
3806 sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
3807 }
3808 PL_regsize = osize;
3809 }
3810 re_sv = rx;
3811 re = (struct regexp *)SvANY(rx);
3812 }
3813 RXp_MATCH_COPIED_off(re);
3814 re->subbeg = rex->subbeg;
3815 re->sublen = rex->sublen;
3816 rei = RXi_GET(re);
3817 DEBUG_EXECUTE_r(
3818 debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3819 "Matching embedded");
3820 );
3821 startpoint = rei->program + 1;
3822 ST.close_paren = 0; /* only used for GOSUB */
3823 /* borrowed from regtry */
3824 if (PL_reg_start_tmpl <= re->nparens) {
3825 PL_reg_start_tmpl = re->nparens*3/2 + 3;
3826 if(PL_reg_start_tmp)
3827 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3828 else
3829 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3830 }
3831
3832 eval_recurse_doit: /* Share code with GOSUB below this line */
3833 /* run the pattern returned from (??{...}) */
3834 ST.cp = regcppush(0); /* Save *all* the positions. */
3835 REGCP_SET(ST.lastcp);
3836
3837 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3838
3839 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3840 PL_reglastparen = &re->lastparen;
3841 PL_reglastcloseparen = &re->lastcloseparen;
3842 re->lastparen = 0;
3843 re->lastcloseparen = 0;
3844
3845 PL_reginput = locinput;
3846 PL_regsize = 0;
3847
3848 /* XXXX This is too dramatic a measure... */
3849 PL_reg_maxiter = 0;
3850
3851 ST.toggle_reg_flags = PL_reg_flags;
3852 if (RX_UTF8(re_sv))
3853 PL_reg_flags |= RF_utf8;
3854 else
3855 PL_reg_flags &= ~RF_utf8;
3856 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3857
3858 ST.prev_rex = rex_sv;
3859 ST.prev_curlyx = cur_curlyx;
3860 SETREX(rex_sv,re_sv);
3861 rex = re;
3862 rexi = rei;
3863 cur_curlyx = NULL;
3864 ST.B = next;
3865 ST.prev_eval = cur_eval;
3866 cur_eval = st;
3867 /* now continue from first node in postoned RE */
3868 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3869 /* NOTREACHED */
3870 }
3871 /* logical is 1, /(?(?{...})X|Y)/ */
3872 sw = (bool)SvTRUE(ret);
3873 logical = 0;
3874 break;
3875 }
3876
3877 case EVAL_AB: /* cleanup after a successful (??{A})B */
3878 /* note: this is called twice; first after popping B, then A */
3879 PL_reg_flags ^= ST.toggle_reg_flags;
3880 ReREFCNT_dec(rex_sv);
3881 SETREX(rex_sv,ST.prev_rex);
3882 rex = (struct regexp *)SvANY(rex_sv);
3883 rexi = RXi_GET(rex);
3884 regcpblow(ST.cp);
3885 cur_eval = ST.prev_eval;
3886 cur_curlyx = ST.prev_curlyx;
3887
3888 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3889 PL_reglastparen = &rex->lastparen;
3890 PL_reglastcloseparen = &rex->lastcloseparen;
3891 /* also update PL_regoffs */
3892 PL_regoffs = rex->offs;
3893
3894 /* XXXX This is too dramatic a measure... */
3895 PL_reg_maxiter = 0;
3896 if ( nochange_depth )
3897 nochange_depth--;
3898 sayYES;
3899
3900
3901 case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3902 /* note: this is called twice; first after popping B, then A */
3903 PL_reg_flags ^= ST.toggle_reg_flags;
3904 ReREFCNT_dec(rex_sv);
3905 SETREX(rex_sv,ST.prev_rex);
3906 rex = (struct regexp *)SvANY(rex_sv);
3907 rexi = RXi_GET(rex);
3908 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3909 PL_reglastparen = &rex->lastparen;
3910 PL_reglastcloseparen = &rex->lastcloseparen;
3911
3912 PL_reginput = locinput;
3913 REGCP_UNWIND(ST.lastcp);
3914 regcppop(rex);
3915 cur_eval = ST.prev_eval;
3916 cur_curlyx = ST.prev_curlyx;
3917 /* XXXX This is too dramatic a measure... */
3918 PL_reg_maxiter = 0;
3919 if ( nochange_depth )
3920 nochange_depth--;
3921 sayNO_SILENT;
3922#undef ST
3923
3924 case OPEN:
3925 n = ARG(scan); /* which paren pair */
3926 PL_reg_start_tmp[n] = locinput;
3927 if (n > PL_regsize)
3928 PL_regsize = n;
3929 lastopen = n;
3930 break;
3931 case CLOSE:
3932 n = ARG(scan); /* which paren pair */
3933 PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3934 PL_regoffs[n].end = locinput - PL_bostr;
3935 /*if (n > PL_regsize)
3936 PL_regsize = n;*/
3937 if (n > *PL_reglastparen)
3938 *PL_reglastparen = n;
3939 *PL_reglastcloseparen = n;
3940 if (cur_eval && cur_eval->u.eval.close_paren == n) {
3941 goto fake_end;
3942 }
3943 break;
3944 case ACCEPT:
3945 if (ARG(scan)){
3946 regnode *cursor;
3947 for (cursor=scan;
3948 cursor && OP(cursor)!=END;
3949 cursor=regnext(cursor))
3950 {
3951 if ( OP(cursor)==CLOSE ){
3952 n = ARG(cursor);
3953 if ( n <= lastopen ) {
3954 PL_regoffs[n].start
3955 = PL_reg_start_tmp[n] - PL_bostr;
3956 PL_regoffs[n].end = locinput - PL_bostr;
3957 /*if (n > PL_regsize)
3958 PL_regsize = n;*/
3959 if (n > *PL_reglastparen)
3960 *PL_reglastparen = n;
3961 *PL_reglastcloseparen = n;
3962 if ( n == ARG(scan) || (cur_eval &&
3963 cur_eval->u.eval.close_paren == n))
3964 break;
3965 }
3966 }
3967 }
3968 }
3969 goto fake_end;
3970 /*NOTREACHED*/
3971 case GROUPP:
3972 n = ARG(scan); /* which paren pair */
3973 sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3974 break;
3975 case NGROUPP:
3976 /* reg_check_named_buff_matched returns 0 for no match */
3977 sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3978 break;
3979 case INSUBP:
3980 n = ARG(scan);
3981 sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3982 break;
3983 case DEFINEP:
3984 sw = 0;
3985 break;
3986 case IFTHEN:
3987 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3988 if (sw)
3989 next = NEXTOPER(NEXTOPER(scan));
3990 else {
3991 next = scan + ARG(scan);
3992 if (OP(next) == IFTHEN) /* Fake one. */
3993 next = NEXTOPER(NEXTOPER(next));
3994 }
3995 break;
3996 case LOGICAL:
3997 logical = scan->flags;
3998 break;
3999
4000/*******************************************************************
4001
4002The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
4003pattern, where A and B are subpatterns. (For simple A, CURLYM or
4004STAR/PLUS/CURLY/CURLYN are used instead.)
4005
4006A*B is compiled as <CURLYX><A><WHILEM><B>
4007
4008On entry to the subpattern, CURLYX is called. This pushes a CURLYX
4009state, which contains the current count, initialised to -1. It also sets
4010cur_curlyx to point to this state, with any previous value saved in the
4011state block.
4012
4013CURLYX then jumps straight to the WHILEM op, rather than executing A,
4014since the pattern may possibly match zero times (i.e. it's a while {} loop
4015rather than a do {} while loop).
4016
4017Each entry to WHILEM represents a successful match of A. The count in the
4018CURLYX block is incremented, another WHILEM state is pushed, and execution
4019passes to A or B depending on greediness and the current count.
4020
4021For example, if matching against the string a1a2a3b (where the aN are
4022substrings that match /A/), then the match progresses as follows: (the
4023pushed states are interspersed with the bits of strings matched so far):
4024
4025 <CURLYX cnt=-1>
4026 <CURLYX cnt=0><WHILEM>
4027 <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4028 <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4029 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4030 <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4031
4032(Contrast this with something like CURLYM, which maintains only a single
4033backtrack state:
4034
4035 <CURLYM cnt=0> a1
4036 a1 <CURLYM cnt=1> a2
4037 a1 a2 <CURLYM cnt=2> a3
4038 a1 a2 a3 <CURLYM cnt=3> b
4039)
4040
4041Each WHILEM state block marks a point to backtrack to upon partial failure
4042of A or B, and also contains some minor state data related to that
4043iteration. The CURLYX block, pointed to by cur_curlyx, contains the
4044overall state, such as the count, and pointers to the A and B ops.
4045
4046This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4047must always point to the *current* CURLYX block, the rules are:
4048
4049When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4050and set cur_curlyx to point the new block.
4051
4052When popping the CURLYX block after a successful or unsuccessful match,
4053restore the previous cur_curlyx.
4054
4055When WHILEM is about to execute B, save the current cur_curlyx, and set it
4056to the outer one saved in the CURLYX block.
4057
4058When popping the WHILEM block after a successful or unsuccessful B match,
4059restore the previous cur_curlyx.
4060
4061Here's an example for the pattern (AI* BI)*BO
4062I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4063
4064cur_
4065curlyx backtrack stack
4066------ ---------------
4067NULL
4068CO <CO prev=NULL> <WO>
4069CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4070CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4071NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4072
4073At this point the pattern succeeds, and we work back down the stack to
4074clean up, restoring as we go:
4075
4076CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4077CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4078CO <CO prev=NULL> <WO>
4079NULL
4080
4081*******************************************************************/
4082
4083#define ST st->u.curlyx
4084
4085 case CURLYX: /* start of /A*B/ (for complex A) */
4086 {
4087 /* No need to save/restore up to this paren */
4088 I32 parenfloor = scan->flags;
4089
4090 assert(next); /* keep Coverity happy */
4091 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4092 next += ARG(next);
4093
4094 /* XXXX Probably it is better to teach regpush to support
4095 parenfloor > PL_regsize... */
4096 if (parenfloor > (I32)*PL_reglastparen)
4097 parenfloor = *PL_reglastparen; /* Pessimization... */
4098
4099 ST.prev_curlyx= cur_curlyx;
4100 cur_curlyx = st;
4101 ST.cp = PL_savestack_ix;
4102
4103 /* these fields contain the state of the current curly.
4104 * they are accessed by subsequent WHILEMs */
4105 ST.parenfloor = parenfloor;
4106 ST.min = ARG1(scan);
4107 ST.max = ARG2(scan);
4108 ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4109 ST.B = next;
4110 ST.minmod = minmod;
4111 minmod = 0;
4112 ST.count = -1; /* this will be updated by WHILEM */
4113 ST.lastloc = NULL; /* this will be updated by WHILEM */
4114
4115 PL_reginput = locinput;
4116 PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4117 /* NOTREACHED */
4118 }
4119
4120 case CURLYX_end: /* just finished matching all of A*B */
4121 cur_curlyx = ST.prev_curlyx;
4122 sayYES;
4123 /* NOTREACHED */
4124
4125 case CURLYX_end_fail: /* just failed to match all of A*B */
4126 regcpblow(ST.cp);
4127 cur_curlyx = ST.prev_curlyx;
4128 sayNO;
4129 /* NOTREACHED */
4130
4131
4132#undef ST
4133#define ST st->u.whilem
4134
4135 case WHILEM: /* just matched an A in /A*B/ (for complex A) */
4136 {
4137 /* see the discussion above about CURLYX/WHILEM */
4138 I32 n;
4139 assert(cur_curlyx); /* keep Coverity happy */
4140 n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4141 ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4142 ST.cache_offset = 0;
4143 ST.cache_mask = 0;
4144
4145 PL_reginput = locinput;
4146
4147 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4148 "%*s whilem: matched %ld out of %ld..%ld\n",
4149 REPORT_CODE_OFF+depth*2, "", (long)n,
4150 (long)cur_curlyx->u.curlyx.min,
4151 (long)cur_curlyx->u.curlyx.max)
4152 );
4153
4154 /* First just match a string of min A's. */
4155
4156 if (n < cur_curlyx->u.curlyx.min) {
4157 cur_curlyx->u.curlyx.lastloc = locinput;
4158 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4159 /* NOTREACHED */
4160 }
4161
4162 /* If degenerate A matches "", assume A done. */
4163
4164 if (locinput == cur_curlyx->u.curlyx.lastloc) {
4165 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4166 "%*s whilem: empty match detected, trying continuation...\n",
4167 REPORT_CODE_OFF+depth*2, "")
4168 );
4169 goto do_whilem_B_max;
4170 }
4171
4172 /* super-linear cache processing */
4173
4174 if (scan->flags) {
4175
4176 if (!PL_reg_maxiter) {
4177 /* start the countdown: Postpone detection until we
4178 * know the match is not *that* much linear. */
4179 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4180 /* possible overflow for long strings and many CURLYX's */
4181 if (PL_reg_maxiter < 0)
4182 PL_reg_maxiter = I32_MAX;
4183 PL_reg_leftiter = PL_reg_maxiter;
4184 }
4185
4186 if (PL_reg_leftiter-- == 0) {
4187 /* initialise cache */
4188 const I32 size = (PL_reg_maxiter + 7)/8;
4189 if (PL_reg_poscache) {
4190 if ((I32)PL_reg_poscache_size < size) {
4191 Renew(PL_reg_poscache, size, char);
4192 PL_reg_poscache_size = size;
4193 }
4194 Zero(PL_reg_poscache, size, char);
4195 }
4196 else {
4197 PL_reg_poscache_size = size;
4198 Newxz(PL_reg_poscache, size, char);
4199 }
4200 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4201 "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4202 PL_colors[4], PL_colors[5])
4203 );
4204 }
4205
4206 if (PL_reg_leftiter < 0) {
4207 /* have we already failed at this position? */
4208 I32 offset, mask;
4209 offset = (scan->flags & 0xf) - 1
4210 + (locinput - PL_bostr) * (scan->flags>>4);
4211 mask = 1 << (offset % 8);
4212 offset /= 8;
4213 if (PL_reg_poscache[offset] & mask) {
4214 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4215 "%*s whilem: (cache) already tried at this position...\n",
4216 REPORT_CODE_OFF+depth*2, "")
4217 );
4218 sayNO; /* cache records failure */
4219 }
4220 ST.cache_offset = offset;
4221 ST.cache_mask = mask;
4222 }
4223 }
4224
4225 /* Prefer B over A for minimal matching. */
4226
4227 if (cur_curlyx->u.curlyx.minmod) {
4228 ST.save_curlyx = cur_curlyx;
4229 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4230 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4231 REGCP_SET(ST.lastcp);
4232 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4233 /* NOTREACHED */
4234 }
4235
4236 /* Prefer A over B for maximal matching. */
4237
4238 if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4239 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4240 cur_curlyx->u.curlyx.lastloc = locinput;
4241 REGCP_SET(ST.lastcp);
4242 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4243 /* NOTREACHED */
4244 }
4245 goto do_whilem_B_max;
4246 }
4247 /* NOTREACHED */
4248
4249 case WHILEM_B_min: /* just matched B in a minimal match */
4250 case WHILEM_B_max: /* just matched B in a maximal match */
4251 cur_curlyx = ST.save_curlyx;
4252 sayYES;
4253 /* NOTREACHED */
4254
4255 case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4256 cur_curlyx = ST.save_curlyx;
4257 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4258 cur_curlyx->u.curlyx.count--;
4259 CACHEsayNO;
4260 /* NOTREACHED */
4261
4262 case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4263 REGCP_UNWIND(ST.lastcp);
4264 regcppop(rex);
4265 /* FALL THROUGH */
4266 case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4267 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4268 cur_curlyx->u.curlyx.count--;
4269 CACHEsayNO;
4270 /* NOTREACHED */
4271
4272 case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4273 REGCP_UNWIND(ST.lastcp);
4274 regcppop(rex); /* Restore some previous $<digit>s? */
4275 PL_reginput = locinput;
4276 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4277 "%*s whilem: failed, trying continuation...\n",
4278 REPORT_CODE_OFF+depth*2, "")
4279 );
4280 do_whilem_B_max:
4281 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4282 && ckWARN(WARN_REGEXP)
4283 && !(PL_reg_flags & RF_warned))
4284 {
4285 PL_reg_flags |= RF_warned;
4286 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4287 "Complex regular subexpression recursion",
4288 REG_INFTY - 1);
4289 }
4290
4291 /* now try B */
4292 ST.save_curlyx = cur_curlyx;
4293 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4294 PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4295 /* NOTREACHED */
4296
4297 case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4298 cur_curlyx = ST.save_curlyx;
4299 REGCP_UNWIND(ST.lastcp);
4300 regcppop(rex);
4301
4302 if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4303 /* Maximum greed exceeded */
4304 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4305 && ckWARN(WARN_REGEXP)
4306 && !(PL_reg_flags & RF_warned))
4307 {
4308 PL_reg_flags |= RF_warned;
4309 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4310 "%s limit (%d) exceeded",
4311 "Complex regular subexpression recursion",
4312 REG_INFTY - 1);
4313 }
4314 cur_curlyx->u.curlyx.count--;
4315 CACHEsayNO;
4316 }
4317
4318 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4319 "%*s trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4320 );
4321 /* Try grabbing another A and see if it helps. */
4322 PL_reginput = locinput;
4323 cur_curlyx->u.curlyx.lastloc = locinput;
4324 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4325 REGCP_SET(ST.lastcp);
4326 PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4327 /* NOTREACHED */
4328
4329#undef ST
4330#define ST st->u.branch
4331
4332 case BRANCHJ: /* /(...|A|...)/ with long next pointer */
4333 next = scan + ARG(scan);
4334 if (next == scan)
4335 next = NULL;
4336 scan = NEXTOPER(scan);
4337 /* FALL THROUGH */
4338
4339 case BRANCH: /* /(...|A|...)/ */
4340 scan = NEXTOPER(scan); /* scan now points to inner node */
4341 ST.lastparen = *PL_reglastparen;
4342 ST.next_branch = next;
4343 REGCP_SET(ST.cp);
4344 PL_reginput = locinput;
4345
4346 /* Now go into the branch */
4347 if (has_cutgroup) {
4348 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4349 } else {
4350 PUSH_STATE_GOTO(BRANCH_next, scan);
4351 }
4352 /* NOTREACHED */
4353 case CUTGROUP:
4354 PL_reginput = locinput;
4355 sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4356 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4357 PUSH_STATE_GOTO(CUTGROUP_next,next);
4358 /* NOTREACHED */
4359 case CUTGROUP_next_fail:
4360 do_cutgroup = 1;
4361 no_final = 1;
4362 if (st->u.mark.mark_name)
4363 sv_commit = st->u.mark.mark_name;
4364 sayNO;
4365 /* NOTREACHED */
4366 case BRANCH_next:
4367 sayYES;
4368 /* NOTREACHED */
4369 case BRANCH_next_fail: /* that branch failed; try the next, if any */
4370 if (do_cutgroup) {
4371 do_cutgroup = 0;
4372 no_final = 0;
4373 }
4374 REGCP_UNWIND(ST.cp);
4375 for (n = *PL_reglastparen; n > ST.lastparen; n--)
4376 PL_regoffs[n].end = -1;
4377 *PL_reglastparen = n;
4378 /*dmq: *PL_reglastcloseparen = n; */
4379 scan = ST.next_branch;
4380 /* no more branches? */
4381 if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4382 DEBUG_EXECUTE_r({
4383 PerlIO_printf( Perl_debug_log,
4384 "%*s %sBRANCH failed...%s\n",
4385 REPORT_CODE_OFF+depth*2, "",
4386 PL_colors[4],
4387 PL_colors[5] );
4388 });
4389 sayNO_SILENT;
4390 }
4391 continue; /* execute next BRANCH[J] op */
4392 /* NOTREACHED */
4393
4394 case MINMOD:
4395 minmod = 1;
4396 break;
4397
4398#undef ST
4399#define ST st->u.curlym
4400
4401 case CURLYM: /* /A{m,n}B/ where A is fixed-length */
4402
4403 /* This is an optimisation of CURLYX that enables us to push
4404 * only a single backtracking state, no matter how many matches
4405 * there are in {m,n}. It relies on the pattern being constant
4406 * length, with no parens to influence future backrefs
4407 */
4408
4409 ST.me = scan;
4410 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4411
4412 /* if paren positive, emulate an OPEN/CLOSE around A */
4413 if (ST.me->flags) {
4414 U32 paren = ST.me->flags;
4415 if (paren > PL_regsize)
4416 PL_regsize = paren;
4417 if (paren > *PL_reglastparen)
4418 *PL_reglastparen = paren;
4419 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4420 }
4421 ST.A = scan;
4422 ST.B = next;
4423 ST.alen = 0;
4424 ST.count = 0;
4425 ST.minmod = minmod;
4426 minmod = 0;
4427 ST.c1 = CHRTEST_UNINIT;
4428 REGCP_SET(ST.cp);
4429
4430 if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4431 goto curlym_do_B;
4432
4433 curlym_do_A: /* execute the A in /A{m,n}B/ */
4434 PL_reginput = locinput;
4435 PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4436 /* NOTREACHED */
4437
4438 case CURLYM_A: /* we've just matched an A */
4439 locinput = st->locinput;
4440 nextchr = UCHARAT(locinput);
4441
4442 ST.count++;
4443 /* after first match, determine A's length: u.curlym.alen */
4444 if (ST.count == 1) {
4445 if (PL_reg_match_utf8) {
4446 char *s = locinput;
4447 while (s < PL_reginput) {
4448 ST.alen++;
4449 s += UTF8SKIP(s);
4450 }
4451 }
4452 else {
4453 ST.alen = PL_reginput - locinput;
4454 }
4455 if (ST.alen == 0)
4456 ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4457 }
4458 DEBUG_EXECUTE_r(
4459 PerlIO_printf(Perl_debug_log,
4460 "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4461 (int)(REPORT_CODE_OFF+(depth*2)), "",
4462 (IV) ST.count, (IV)ST.alen)
4463 );
4464
4465 locinput = PL_reginput;
4466
4467 if (cur_eval && cur_eval->u.eval.close_paren &&
4468 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4469 goto fake_end;
4470
4471 {
4472 I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4473 if ( max == REG_INFTY || ST.count < max )
4474 goto curlym_do_A; /* try to match another A */
4475 }
4476 goto curlym_do_B; /* try to match B */
4477
4478 case CURLYM_A_fail: /* just failed to match an A */
4479 REGCP_UNWIND(ST.cp);
4480
4481 if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4482 || (cur_eval && cur_eval->u.eval.close_paren &&
4483 cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4484 sayNO;
4485
4486 curlym_do_B: /* execute the B in /A{m,n}B/ */
4487 PL_reginput = locinput;
4488 if (ST.c1 == CHRTEST_UNINIT) {
4489 /* calculate c1 and c2 for possible match of 1st char
4490 * following curly */
4491 ST.c1 = ST.c2 = CHRTEST_VOID;
4492 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4493 regnode *text_node = ST.B;
4494 if (! HAS_TEXT(text_node))
4495 FIND_NEXT_IMPT(text_node);
4496 /* this used to be
4497
4498 (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4499
4500 But the former is redundant in light of the latter.
4501
4502 if this changes back then the macro for
4503 IS_TEXT and friends need to change.
4504 */
4505 if (PL_regkind[OP(text_node)] == EXACT)
4506 {
4507
4508 ST.c1 = (U8)*STRING(text_node);
4509 ST.c2 =
4510 (IS_TEXTF(text_node))
4511 ? PL_fold[ST.c1]
4512 : (IS_TEXTFL(text_node))
4513 ? PL_fold_locale[ST.c1]
4514 : ST.c1;
4515 }
4516 }
4517 }
4518
4519 DEBUG_EXECUTE_r(
4520 PerlIO_printf(Perl_debug_log,
4521 "%*s CURLYM trying tail with matches=%"IVdf"...\n",
4522 (int)(REPORT_CODE_OFF+(depth*2)),
4523 "", (IV)ST.count)
4524 );
4525 if (ST.c1 != CHRTEST_VOID
4526 && UCHARAT(PL_reginput) != ST.c1
4527 && UCHARAT(PL_reginput) != ST.c2)
4528 {
4529 /* simulate B failing */
4530 DEBUG_OPTIMISE_r(
4531 PerlIO_printf(Perl_debug_log,
4532 "%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4533 (int)(REPORT_CODE_OFF+(depth*2)),"",
4534 (IV)ST.c1,(IV)ST.c2
4535 ));
4536 state_num = CURLYM_B_fail;
4537 goto reenter_switch;
4538 }
4539
4540 if (ST.me->flags) {
4541 /* mark current A as captured */
4542 I32 paren = ST.me->flags;
4543 if (ST.count) {
4544 PL_regoffs[paren].start
4545 = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4546 PL_regoffs[paren].end = PL_reginput - PL_bostr;
4547 /*dmq: *PL_reglastcloseparen = paren; */
4548 }
4549 else
4550 PL_regoffs[paren].end = -1;
4551 if (cur_eval && cur_eval->u.eval.close_paren &&
4552 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4553 {
4554 if (ST.count)
4555 goto fake_end;
4556 else
4557 sayNO;
4558 }
4559 }
4560
4561 PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4562 /* NOTREACHED */
4563
4564 case CURLYM_B_fail: /* just failed to match a B */
4565 REGCP_UNWIND(ST.cp);
4566 if (ST.minmod) {
4567 I32 max = ARG2(ST.me);
4568 if (max != REG_INFTY && ST.count == max)
4569 sayNO;
4570 goto curlym_do_A; /* try to match a further A */
4571 }
4572 /* backtrack one A */
4573 if (ST.count == ARG1(ST.me) /* min */)
4574 sayNO;
4575 ST.count--;
4576 locinput = HOPc(locinput, -ST.alen);
4577 goto curlym_do_B; /* try to match B */
4578
4579#undef ST
4580#define ST st->u.curly
4581
4582#define CURLY_SETPAREN(paren, success) \
4583 if (paren) { \
4584 if (success) { \
4585 PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4586 PL_regoffs[paren].end = locinput - PL_bostr; \
4587 *PL_reglastcloseparen = paren; \
4588 } \
4589 else \
4590 PL_regoffs[paren].end = -1; \
4591 }
4592
4593 case STAR: /* /A*B/ where A is width 1 */
4594 ST.paren = 0;
4595 ST.min = 0;
4596 ST.max = REG_INFTY;
4597 scan = NEXTOPER(scan);
4598 goto repeat;
4599 case PLUS: /* /A+B/ where A is width 1 */
4600 ST.paren = 0;
4601 ST.min = 1;
4602 ST.max = REG_INFTY;
4603 scan = NEXTOPER(scan);
4604 goto repeat;
4605 case CURLYN: /* /(A){m,n}B/ where A is width 1 */
4606 ST.paren = scan->flags; /* Which paren to set */
4607 if (ST.paren > PL_regsize)
4608 PL_regsize = ST.paren;
4609 if (ST.paren > *PL_reglastparen)
4610 *PL_reglastparen = ST.paren;
4611 ST.min = ARG1(scan); /* min to match */
4612 ST.max = ARG2(scan); /* max to match */
4613 if (cur_eval && cur_eval->u.eval.close_paren &&
4614 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4615 ST.min=1;
4616 ST.max=1;
4617 }
4618 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4619 goto repeat;
4620 case CURLY: /* /A{m,n}B/ where A is width 1 */
4621 ST.paren = 0;
4622 ST.min = ARG1(scan); /* min to match */
4623 ST.max = ARG2(scan); /* max to match */
4624 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4625 repeat:
4626 /*
4627 * Lookahead to avoid useless match attempts
4628 * when we know what character comes next.
4629 *
4630 * Used to only do .*x and .*?x, but now it allows
4631 * for )'s, ('s and (?{ ... })'s to be in the way
4632 * of the quantifier and the EXACT-like node. -- japhy
4633 */
4634
4635 if (ST.min > ST.max) /* XXX make this a compile-time check? */
4636 sayNO;
4637 if (HAS_TEXT(next) || JUMPABLE(next)) {
4638 U8 *s;
4639 regnode *text_node = next;
4640
4641 if (! HAS_TEXT(text_node))
4642 FIND_NEXT_IMPT(text_node);
4643
4644 if (! HAS_TEXT(text_node))
4645 ST.c1 = ST.c2 = CHRTEST_VOID;
4646 else {
4647 if ( PL_regkind[OP(text_node)] != EXACT ) {
4648 ST.c1 = ST.c2 = CHRTEST_VOID;
4649 goto assume_ok_easy;
4650 }
4651 else
4652 s = (U8*)STRING(text_node);
4653
4654 /* Currently we only get here when
4655
4656 PL_rekind[OP(text_node)] == EXACT
4657
4658 if this changes back then the macro for IS_TEXT and
4659 friends need to change. */
4660 if (!UTF) {
4661 ST.c2 = ST.c1 = *s;
4662 if (IS_TEXTF(text_node))
4663 ST.c2 = PL_fold[ST.c1];
4664 else if (IS_TEXTFL(text_node))
4665 ST.c2 = PL_fold_locale[ST.c1];
4666 }
4667 else { /* UTF */
4668 if (IS_TEXTF(text_node)) {
4669 STRLEN ulen1, ulen2;
4670 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4671 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4672
4673 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4674 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4675#ifdef EBCDIC
4676 ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4677 ckWARN(WARN_UTF8) ?
4678 0 : UTF8_ALLOW_ANY);
4679 ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4680 ckWARN(WARN_UTF8) ?
4681 0 : UTF8_ALLOW_ANY);
4682#else
4683 ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4684 uniflags);
4685 ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4686 uniflags);
4687#endif
4688 }
4689 else {
4690 ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4691 uniflags);
4692 }
4693 }
4694 }
4695 }
4696 else
4697 ST.c1 = ST.c2 = CHRTEST_VOID;
4698 assume_ok_easy:
4699
4700 ST.A = scan;
4701 ST.B = next;
4702 PL_reginput = locinput;
4703 if (minmod) {
4704 minmod = 0;
4705 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4706 sayNO;
4707 ST.count = ST.min;
4708 locinput = PL_reginput;
4709 REGCP_SET(ST.cp);
4710 if (ST.c1 == CHRTEST_VOID)
4711 goto curly_try_B_min;
4712
4713 ST.oldloc = locinput;
4714
4715 /* set ST.maxpos to the furthest point along the
4716 * string that could possibly match */
4717 if (ST.max == REG_INFTY) {
4718 ST.maxpos = PL_regeol - 1;
4719 if (do_utf8)
4720 while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4721 ST.maxpos--;
4722 }
4723 else if (do_utf8) {
4724 int m = ST.max - ST.min;
4725 for (ST.maxpos = locinput;
4726 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4727 ST.maxpos += UTF8SKIP(ST.maxpos);
4728 }
4729 else {
4730 ST.maxpos = locinput + ST.max - ST.min;
4731 if (ST.maxpos >= PL_regeol)
4732 ST.maxpos = PL_regeol - 1;
4733 }
4734 goto curly_try_B_min_known;
4735
4736 }
4737 else {
4738 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4739 locinput = PL_reginput;
4740 if (ST.count < ST.min)
4741 sayNO;
4742 if ((ST.count > ST.min)
4743 && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4744 {
4745 /* A{m,n} must come at the end of the string, there's
4746 * no point in backing off ... */
4747 ST.min = ST.count;
4748 /* ...except that $ and \Z can match before *and* after
4749 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4750 We may back off by one in this case. */
4751 if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4752 ST.min--;
4753 }
4754 REGCP_SET(ST.cp);
4755 goto curly_try_B_max;
4756 }
4757 /* NOTREACHED */
4758
4759
4760 case CURLY_B_min_known_fail:
4761 /* failed to find B in a non-greedy match where c1,c2 valid */
4762 if (ST.paren && ST.count)
4763 PL_regoffs[ST.paren].end = -1;
4764
4765 PL_reginput = locinput; /* Could be reset... */
4766 REGCP_UNWIND(ST.cp);
4767 /* Couldn't or didn't -- move forward. */
4768 ST.oldloc = locinput;
4769 if (do_utf8)
4770 locinput += UTF8SKIP(locinput);
4771 else
4772 locinput++;
4773 ST.count++;
4774 curly_try_B_min_known:
4775 /* find the next place where 'B' could work, then call B */
4776 {
4777 int n;
4778 if (do_utf8) {
4779 n = (ST.oldloc == locinput) ? 0 : 1;
4780 if (ST.c1 == ST.c2) {
4781 STRLEN len;
4782 /* set n to utf8_distance(oldloc, locinput) */
4783 while (locinput <= ST.maxpos &&
4784 utf8n_to_uvchr((U8*)locinput,
4785 UTF8_MAXBYTES, &len,
4786 uniflags) != (UV)ST.c1) {
4787 locinput += len;
4788 n++;
4789 }
4790 }
4791 else {
4792 /* set n to utf8_distance(oldloc, locinput) */
4793 while (locinput <= ST.maxpos) {
4794 STRLEN len;
4795 const UV c = utf8n_to_uvchr((U8*)locinput,
4796 UTF8_MAXBYTES, &len,
4797 uniflags);
4798 if (c == (UV)ST.c1 || c == (UV)ST.c2)
4799 break;
4800 locinput += len;
4801 n++;
4802 }
4803 }
4804 }
4805 else {
4806 if (ST.c1 == ST.c2) {
4807 while (locinput <= ST.maxpos &&
4808 UCHARAT(locinput) != ST.c1)
4809 locinput++;
4810 }
4811 else {
4812 while (locinput <= ST.maxpos
4813 && UCHARAT(locinput) != ST.c1
4814 && UCHARAT(locinput) != ST.c2)
4815 locinput++;
4816 }
4817 n = locinput - ST.oldloc;
4818 }
4819 if (locinput > ST.maxpos)
4820 sayNO;
4821 /* PL_reginput == oldloc now */
4822 if (n) {
4823 ST.count += n;
4824 if (regrepeat(rex, ST.A, n, depth) < n)
4825 sayNO;
4826 }
4827 PL_reginput = locinput;
4828 CURLY_SETPAREN(ST.paren, ST.count);
4829 if (cur_eval && cur_eval->u.eval.close_paren &&
4830 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4831 goto fake_end;
4832 }
4833 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4834 }
4835 /* NOTREACHED */
4836
4837
4838 case CURLY_B_min_fail:
4839 /* failed to find B in a non-greedy match where c1,c2 invalid */
4840 if (ST.paren && ST.count)
4841 PL_regoffs[ST.paren].end = -1;
4842
4843 REGCP_UNWIND(ST.cp);
4844 /* failed -- move forward one */
4845 PL_reginput = locinput;
4846 if (regrepeat(rex, ST.A, 1, depth)) {
4847 ST.count++;
4848 locinput = PL_reginput;
4849 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4850 ST.count > 0)) /* count overflow ? */
4851 {
4852 curly_try_B_min:
4853 CURLY_SETPAREN(ST.paren, ST.count);
4854 if (cur_eval && cur_eval->u.eval.close_paren &&
4855 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4856 goto fake_end;
4857 }
4858 PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4859 }
4860 }
4861 sayNO;
4862 /* NOTREACHED */
4863
4864
4865 curly_try_B_max:
4866 /* a successful greedy match: now try to match B */
4867 if (cur_eval && cur_eval->u.eval.close_paren &&
4868 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4869 goto fake_end;
4870 }
4871 {
4872 UV c = 0;
4873 if (ST.c1 != CHRTEST_VOID)
4874 c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4875 UTF8_MAXBYTES, 0, uniflags)
4876 : (UV) UCHARAT(PL_reginput);
4877 /* If it could work, try it. */
4878 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4879 CURLY_SETPAREN(ST.paren, ST.count);
4880 PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4881 /* NOTREACHED */
4882 }
4883 }
4884 /* FALL THROUGH */
4885 case CURLY_B_max_fail:
4886 /* failed to find B in a greedy match */
4887 if (ST.paren && ST.count)
4888 PL_regoffs[ST.paren].end = -1;
4889
4890 REGCP_UNWIND(ST.cp);
4891 /* back up. */
4892 if (--ST.count < ST.min)
4893 sayNO;
4894 PL_reginput = locinput = HOPc(locinput, -1);
4895 goto curly_try_B_max;
4896
4897#undef ST
4898
4899 case END:
4900 fake_end:
4901 if (cur_eval) {
4902 /* we've just finished A in /(??{A})B/; now continue with B */
4903 I32 tmpix;
4904 st->u.eval.toggle_reg_flags
4905 = cur_eval->u.eval.toggle_reg_flags;
4906 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4907
4908 st->u.eval.prev_rex = rex_sv; /* inner */
4909 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4910 rex = (struct regexp *)SvANY(rex_sv);
4911 rexi = RXi_GET(rex);
4912 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4913 ReREFCNT_inc(rex_sv);
4914 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
4915
4916 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4917 PL_reglastparen = &rex->lastparen;
4918 PL_reglastcloseparen = &rex->lastcloseparen;
4919
4920 REGCP_SET(st->u.eval.lastcp);
4921 PL_reginput = locinput;
4922
4923 /* Restore parens of the outer rex without popping the
4924 * savestack */
4925 tmpix = PL_savestack_ix;
4926 PL_savestack_ix = cur_eval->u.eval.lastcp;
4927 regcppop(rex);
4928 PL_savestack_ix = tmpix;
4929
4930 st->u.eval.prev_eval = cur_eval;
4931 cur_eval = cur_eval->u.eval.prev_eval;
4932 DEBUG_EXECUTE_r(
4933 PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
4934 REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4935 if ( nochange_depth )
4936 nochange_depth--;
4937
4938 PUSH_YES_STATE_GOTO(EVAL_AB,
4939 st->u.eval.prev_eval->u.eval.B); /* match B */
4940 }
4941
4942 if (locinput < reginfo->till) {
4943 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4944 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4945 PL_colors[4],
4946 (long)(locinput - PL_reg_starttry),
4947 (long)(reginfo->till - PL_reg_starttry),
4948 PL_colors[5]));
4949
4950 sayNO_SILENT; /* Cannot match: too short. */
4951 }
4952 PL_reginput = locinput; /* put where regtry can find it */
4953 sayYES; /* Success! */
4954
4955 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4956 DEBUG_EXECUTE_r(
4957 PerlIO_printf(Perl_debug_log,
4958 "%*s %ssubpattern success...%s\n",
4959 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4960 PL_reginput = locinput; /* put where regtry can find it */
4961 sayYES; /* Success! */
4962
4963#undef ST
4964#define ST st->u.ifmatch
4965
4966 case SUSPEND: /* (?>A) */
4967 ST.wanted = 1;
4968 PL_reginput = locinput;
4969 goto do_ifmatch;
4970
4971 case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
4972 ST.wanted = 0;
4973 goto ifmatch_trivial_fail_test;
4974
4975 case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
4976 ST.wanted = 1;
4977 ifmatch_trivial_fail_test:
4978 if (scan->flags) {
4979 char * const s = HOPBACKc(locinput, scan->flags);
4980 if (!s) {
4981 /* trivial fail */
4982 if (logical) {
4983 logical = 0;
4984 sw = 1 - (bool)ST.wanted;
4985 }
4986 else if (ST.wanted)
4987 sayNO;
4988 next = scan + ARG(scan);
4989 if (next == scan)
4990 next = NULL;
4991 break;
4992 }
4993 PL_reginput = s;
4994 }
4995 else
4996 PL_reginput = locinput;
4997
4998 do_ifmatch:
4999 ST.me = scan;
5000 ST.logical = logical;
5001 logical = 0; /* XXX: reset state of logical once it has been saved into ST */
5002
5003 /* execute body of (?...A) */
5004 PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
5005 /* NOTREACHED */
5006
5007 case IFMATCH_A_fail: /* body of (?...A) failed */
5008 ST.wanted = !ST.wanted;
5009 /* FALL THROUGH */
5010
5011 case IFMATCH_A: /* body of (?...A) succeeded */
5012 if (ST.logical) {
5013 sw = (bool)ST.wanted;
5014 }
5015 else if (!ST.wanted)
5016 sayNO;
5017
5018 if (OP(ST.me) == SUSPEND)
5019 locinput = PL_reginput;
5020 else {
5021 locinput = PL_reginput = st->locinput;
5022 nextchr = UCHARAT(locinput);
5023 }
5024 scan = ST.me + ARG(ST.me);
5025 if (scan == ST.me)
5026 scan = NULL;
5027 continue; /* execute B */
5028
5029#undef ST
5030
5031 case LONGJMP:
5032 next = scan + ARG(scan);
5033 if (next == scan)
5034 next = NULL;
5035 break;
5036 case COMMIT:
5037 reginfo->cutpoint = PL_regeol;
5038 /* FALLTHROUGH */
5039 case PRUNE:
5040 PL_reginput = locinput;
5041 if (!scan->flags)
5042 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5043 PUSH_STATE_GOTO(COMMIT_next,next);
5044 /* NOTREACHED */
5045 case COMMIT_next_fail:
5046 no_final = 1;
5047 /* FALLTHROUGH */
5048 case OPFAIL:
5049 sayNO;
5050 /* NOTREACHED */
5051
5052#define ST st->u.mark
5053 case MARKPOINT:
5054 ST.prev_mark = mark_state;
5055 ST.mark_name = sv_commit = sv_yes_mark
5056 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5057 mark_state = st;
5058 ST.mark_loc = PL_reginput = locinput;
5059 PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5060 /* NOTREACHED */
5061 case MARKPOINT_next:
5062 mark_state = ST.prev_mark;
5063 sayYES;
5064 /* NOTREACHED */
5065 case MARKPOINT_next_fail:
5066 if (popmark && sv_eq(ST.mark_name,popmark))
5067 {
5068 if (ST.mark_loc > startpoint)
5069 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5070 popmark = NULL; /* we found our mark */
5071 sv_commit = ST.mark_name;
5072
5073 DEBUG_EXECUTE_r({
5074 PerlIO_printf(Perl_debug_log,
5075 "%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
5076 REPORT_CODE_OFF+depth*2, "",
5077 PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5078 });
5079 }
5080 mark_state = ST.prev_mark;
5081 sv_yes_mark = mark_state ?
5082 mark_state->u.mark.mark_name : NULL;
5083 sayNO;
5084 /* NOTREACHED */
5085 case SKIP:
5086 PL_reginput = locinput;
5087 if (scan->flags) {
5088 /* (*SKIP) : if we fail we cut here*/
5089 ST.mark_name = NULL;
5090 ST.mark_loc = locinput;
5091 PUSH_STATE_GOTO(SKIP_next,next);
5092 } else {
5093 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5094 otherwise do nothing. Meaning we need to scan
5095 */
5096 regmatch_state *cur = mark_state;
5097 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5098
5099 while (cur) {
5100 if ( sv_eq( cur->u.mark.mark_name,
5101 find ) )
5102 {
5103 ST.mark_name = find;
5104 PUSH_STATE_GOTO( SKIP_next, next );
5105 }
5106 cur = cur->u.mark.prev_mark;
5107 }
5108 }
5109 /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5110 break;
5111 case SKIP_next_fail:
5112 if (ST.mark_name) {
5113 /* (*CUT:NAME) - Set up to search for the name as we
5114 collapse the stack*/
5115 popmark = ST.mark_name;
5116 } else {
5117 /* (*CUT) - No name, we cut here.*/
5118 if (ST.mark_loc > startpoint)
5119 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5120 /* but we set sv_commit to latest mark_name if there
5121 is one so they can test to see how things lead to this
5122 cut */
5123 if (mark_state)
5124 sv_commit=mark_state->u.mark.mark_name;
5125 }
5126 no_final = 1;
5127 sayNO;
5128 /* NOTREACHED */
5129#undef ST
5130 case FOLDCHAR:
5131 n = ARG(scan);
5132 if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5133 locinput += ln;
5134 } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5135 sayNO;
5136 } else {
5137 U8 folded[UTF8_MAXBYTES_CASE+1];
5138 STRLEN foldlen;
5139 const char * const l = locinput;
5140 char *e = PL_regeol;
5141 to_uni_fold(n, folded, &foldlen);
5142
5143 if (ibcmp_utf8((const char*) folded, 0, foldlen, 1,
5144 l, &e, 0, do_utf8)) {
5145 sayNO;
5146 }
5147 locinput = e;
5148 }
5149 nextchr = UCHARAT(locinput);
5150 break;
5151 case LNBREAK:
5152 if ((n=is_LNBREAK(locinput,do_utf8))) {
5153 locinput += n;
5154 nextchr = UCHARAT(locinput);
5155 } else
5156 sayNO;
5157 break;
5158
5159#define CASE_CLASS(nAmE) \
5160 case nAmE: \
5161 if ((n=is_##nAmE(locinput,do_utf8))) { \
5162 locinput += n; \
5163 nextchr = UCHARAT(locinput); \
5164 } else \
5165 sayNO; \
5166 break; \
5167 case N##nAmE: \
5168 if ((n=is_##nAmE(locinput,do_utf8))) { \
5169 sayNO; \
5170 } else { \
5171 locinput += UTF8SKIP(locinput); \
5172 nextchr = UCHARAT(locinput); \
5173 } \
5174 break
5175
5176 CASE_CLASS(VERTWS);
5177 CASE_CLASS(HORIZWS);
5178#undef CASE_CLASS
5179
5180 default:
5181 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5182 PTR2UV(scan), OP(scan));
5183 Perl_croak(aTHX_ "regexp memory corruption");
5184
5185 } /* end switch */
5186
5187 /* switch break jumps here */
5188 scan = next; /* prepare to execute the next op and ... */
5189 continue; /* ... jump back to the top, reusing st */
5190 /* NOTREACHED */
5191
5192 push_yes_state:
5193 /* push a state that backtracks on success */
5194 st->u.yes.prev_yes_state = yes_state;
5195 yes_state = st;
5196 /* FALL THROUGH */
5197 push_state:
5198 /* push a new regex state, then continue at scan */
5199 {
5200 regmatch_state *newst;
5201
5202 DEBUG_STACK_r({
5203 regmatch_state *cur = st;
5204 regmatch_state *curyes = yes_state;
5205 int curd = depth;
5206 regmatch_slab *slab = PL_regmatch_slab;
5207 for (;curd > -1;cur--,curd--) {
5208 if (cur < SLAB_FIRST(slab)) {
5209 slab = slab->prev;
5210 cur = SLAB_LAST(slab);
5211 }
5212 PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5213 REPORT_CODE_OFF + 2 + depth * 2,"",
5214 curd, PL_reg_name[cur->resume_state],
5215 (curyes == cur) ? "yes" : ""
5216 );
5217 if (curyes == cur)
5218 curyes = cur->u.yes.prev_yes_state;
5219 }
5220 } else
5221 DEBUG_STATE_pp("push")
5222 );
5223 depth++;
5224 st->locinput = locinput;
5225 newst = st+1;
5226 if (newst > SLAB_LAST(PL_regmatch_slab))
5227 newst = S_push_slab(aTHX);
5228 PL_regmatch_state = newst;
5229
5230 locinput = PL_reginput;
5231 nextchr = UCHARAT(locinput);
5232 st = newst;
5233 continue;
5234 /* NOTREACHED */
5235 }
5236 }
5237
5238 /*
5239 * We get here only if there's trouble -- normally "case END" is
5240 * the terminating point.
5241 */
5242 Perl_croak(aTHX_ "corrupted regexp pointers");
5243 /*NOTREACHED*/
5244 sayNO;
5245
5246yes:
5247 if (yes_state) {
5248 /* we have successfully completed a subexpression, but we must now
5249 * pop to the state marked by yes_state and continue from there */
5250 assert(st != yes_state);
5251#ifdef DEBUGGING
5252 while (st != yes_state) {
5253 st--;
5254 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5255 PL_regmatch_slab = PL_regmatch_slab->prev;
5256 st = SLAB_LAST(PL_regmatch_slab);
5257 }
5258 DEBUG_STATE_r({
5259 if (no_final) {
5260 DEBUG_STATE_pp("pop (no final)");
5261 } else {
5262 DEBUG_STATE_pp("pop (yes)");
5263 }
5264 });
5265 depth--;
5266 }
5267#else
5268 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5269 || yes_state > SLAB_LAST(PL_regmatch_slab))
5270 {
5271 /* not in this slab, pop slab */
5272 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5273 PL_regmatch_slab = PL_regmatch_slab->prev;
5274 st = SLAB_LAST(PL_regmatch_slab);
5275 }
5276 depth -= (st - yes_state);
5277#endif
5278 st = yes_state;
5279 yes_state = st->u.yes.prev_yes_state;
5280 PL_regmatch_state = st;
5281
5282 if (no_final) {
5283 locinput= st->locinput;
5284 nextchr = UCHARAT(locinput);
5285 }
5286 state_num = st->resume_state + no_final;
5287 goto reenter_switch;
5288 }
5289
5290 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5291 PL_colors[4], PL_colors[5]));
5292
5293 if (PL_reg_eval_set) {
5294 /* each successfully executed (?{...}) block does the equivalent of
5295 * local $^R = do {...}
5296 * When popping the save stack, all these locals would be undone;
5297 * bypass this by setting the outermost saved $^R to the latest
5298 * value */
5299 if (oreplsv != GvSV(PL_replgv))
5300 sv_setsv(oreplsv, GvSV(PL_replgv));
5301 }
5302 result = 1;
5303 goto final_exit;
5304
5305no:
5306 DEBUG_EXECUTE_r(
5307 PerlIO_printf(Perl_debug_log,
5308 "%*s %sfailed...%s\n",
5309 REPORT_CODE_OFF+depth*2, "",
5310 PL_colors[4], PL_colors[5])
5311 );
5312
5313no_silent:
5314 if (no_final) {
5315 if (yes_state) {
5316 goto yes;
5317 } else {
5318 goto final_exit;
5319 }
5320 }
5321 if (depth) {
5322 /* there's a previous state to backtrack to */
5323 st--;
5324 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5325 PL_regmatch_slab = PL_regmatch_slab->prev;
5326 st = SLAB_LAST(PL_regmatch_slab);
5327 }
5328 PL_regmatch_state = st;
5329 locinput= st->locinput;
5330 nextchr = UCHARAT(locinput);
5331
5332 DEBUG_STATE_pp("pop");
5333 depth--;
5334 if (yes_state == st)
5335 yes_state = st->u.yes.prev_yes_state;
5336
5337 state_num = st->resume_state + 1; /* failure = success + 1 */
5338 goto reenter_switch;
5339 }
5340 result = 0;
5341
5342 final_exit:
5343 if (rex->intflags & PREGf_VERBARG_SEEN) {
5344 SV *sv_err = get_sv("REGERROR", 1);
5345 SV *sv_mrk = get_sv("REGMARK", 1);
5346 if (result) {
5347 sv_commit = &PL_sv_no;
5348 if (!sv_yes_mark)
5349 sv_yes_mark = &PL_sv_yes;
5350 } else {
5351 if (!sv_commit)
5352 sv_commit = &PL_sv_yes;
5353 sv_yes_mark = &PL_sv_no;
5354 }
5355 sv_setsv(sv_err, sv_commit);
5356 sv_setsv(sv_mrk, sv_yes_mark);
5357 }
5358
5359 /* clean up; in particular, free all slabs above current one */
5360 LEAVE_SCOPE(oldsave);
5361
5362 return result;
5363}
5364
5365/*
5366 - regrepeat - repeatedly match something simple, report how many
5367 */
5368/*
5369 * [This routine now assumes that it will only match on things of length 1.
5370 * That was true before, but now we assume scan - reginput is the count,
5371 * rather than incrementing count on every character. [Er, except utf8.]]
5372 */
5373STATIC I32
5374S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5375{
5376 dVAR;
5377 register char *scan;
5378 register I32 c;
5379 register char *loceol = PL_regeol;
5380 register I32 hardcount = 0;
5381 register bool do_utf8 = PL_reg_match_utf8;
5382#ifndef DEBUGGING
5383 PERL_UNUSED_ARG(depth);
5384#endif
5385
5386 PERL_ARGS_ASSERT_REGREPEAT;
5387
5388 scan = PL_reginput;
5389 if (max == REG_INFTY)
5390 max = I32_MAX;
5391 else if (max < loceol - scan)
5392 loceol = scan + max;
5393 switch (OP(p)) {
5394 case REG_ANY:
5395 if (do_utf8) {
5396 loceol = PL_regeol;
5397 while (scan < loceol && hardcount < max && *scan != '\n') {
5398 scan += UTF8SKIP(scan);
5399 hardcount++;
5400 }
5401 } else {
5402 while (scan < loceol && *scan != '\n')
5403 scan++;
5404 }
5405 break;
5406 case SANY:
5407 if (do_utf8) {
5408 loceol = PL_regeol;
5409 while (scan < loceol && hardcount < max) {
5410 scan += UTF8SKIP(scan);
5411 hardcount++;
5412 }
5413 }
5414 else
5415 scan = loceol;
5416 break;
5417 case CANY:
5418 scan = loceol;
5419 break;
5420 case EXACT: /* length of string is 1 */
5421 c = (U8)*STRING(p);
5422 while (scan < loceol && UCHARAT(scan) == c)
5423 scan++;
5424 break;
5425 case EXACTF: /* length of string is 1 */
5426 c = (U8)*STRING(p);
5427 while (scan < loceol &&
5428 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5429 scan++;
5430 break;
5431 case EXACTFL: /* length of string is 1 */
5432 PL_reg_flags |= RF_tainted;
5433 c = (U8)*STRING(p);
5434 while (scan < loceol &&
5435 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5436 scan++;
5437 break;
5438 case ANYOF:
5439 if (do_utf8) {
5440 loceol = PL_regeol;
5441 while (hardcount < max && scan < loceol &&
5442 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5443 scan += UTF8SKIP(scan);
5444 hardcount++;
5445 }
5446 } else {
5447 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5448 scan++;
5449 }
5450 break;
5451 case ALNUM:
5452 if (do_utf8) {
5453 loceol = PL_regeol;
5454 LOAD_UTF8_CHARCLASS_ALNUM();
5455 while (hardcount < max && scan < loceol &&
5456 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5457 scan += UTF8SKIP(scan);
5458 hardcount++;
5459 }
5460 } else {
5461 while (scan < loceol && isALNUM(*scan))
5462 scan++;
5463 }
5464 break;
5465 case ALNUML:
5466 PL_reg_flags |= RF_tainted;
5467 if (do_utf8) {
5468 loceol = PL_regeol;
5469 while (hardcount < max && scan < loceol &&
5470 isALNUM_LC_utf8((U8*)scan)) {
5471 scan += UTF8SKIP(scan);
5472 hardcount++;
5473 }
5474 } else {
5475 while (scan < loceol && isALNUM_LC(*scan))
5476 scan++;
5477 }
5478 break;
5479 case NALNUM:
5480 if (do_utf8) {
5481 loceol = PL_regeol;
5482 LOAD_UTF8_CHARCLASS_ALNUM();
5483 while (hardcount < max && scan < loceol &&
5484 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5485 scan += UTF8SKIP(scan);
5486 hardcount++;
5487 }
5488 } else {
5489 while (scan < loceol && !isALNUM(*scan))
5490 scan++;
5491 }
5492 break;
5493 case NALNUML:
5494 PL_reg_flags |= RF_tainted;
5495 if (do_utf8) {
5496 loceol = PL_regeol;
5497 while (hardcount < max && scan < loceol &&
5498 !isALNUM_LC_utf8((U8*)scan)) {
5499 scan += UTF8SKIP(scan);
5500 hardcount++;
5501 }
5502 } else {
5503 while (scan < loceol && !isALNUM_LC(*scan))
5504 scan++;
5505 }
5506 break;
5507 case SPACE:
5508 if (do_utf8) {
5509 loceol = PL_regeol;
5510 LOAD_UTF8_CHARCLASS_SPACE();
5511 while (hardcount < max && scan < loceol &&
5512 (*scan == ' ' ||
5513 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5514 scan += UTF8SKIP(scan);
5515 hardcount++;
5516 }
5517 } else {
5518 while (scan < loceol && isSPACE(*scan))
5519 scan++;
5520 }
5521 break;
5522 case SPACEL:
5523 PL_reg_flags |= RF_tainted;
5524 if (do_utf8) {
5525 loceol = PL_regeol;
5526 while (hardcount < max && scan < loceol &&
5527 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5528 scan += UTF8SKIP(scan);
5529 hardcount++;
5530 }
5531 } else {
5532 while (scan < loceol && isSPACE_LC(*scan))
5533 scan++;
5534 }
5535 break;
5536 case NSPACE:
5537 if (do_utf8) {
5538 loceol = PL_regeol;
5539 LOAD_UTF8_CHARCLASS_SPACE();
5540 while (hardcount < max && scan < loceol &&
5541 !(*scan == ' ' ||
5542 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5543 scan += UTF8SKIP(scan);
5544 hardcount++;
5545 }
5546 } else {
5547 while (scan < loceol && !isSPACE(*scan))
5548 scan++;
5549 }
5550 break;
5551 case NSPACEL:
5552 PL_reg_flags |= RF_tainted;
5553 if (do_utf8) {
5554 loceol = PL_regeol;
5555 while (hardcount < max && scan < loceol &&
5556 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5557 scan += UTF8SKIP(scan);
5558 hardcount++;
5559 }
5560 } else {
5561 while (scan < loceol && !isSPACE_LC(*scan))
5562 scan++;
5563 }
5564 break;
5565 case DIGIT:
5566 if (do_utf8) {
5567 loceol = PL_regeol;
5568 LOAD_UTF8_CHARCLASS_DIGIT();
5569 while (hardcount < max && scan < loceol &&
5570 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5571 scan += UTF8SKIP(scan);
5572 hardcount++;
5573 }
5574 } else {
5575 while (scan < loceol && isDIGIT(*scan))
5576 scan++;
5577 }
5578 break;
5579 case NDIGIT:
5580 if (do_utf8) {
5581 loceol = PL_regeol;
5582 LOAD_UTF8_CHARCLASS_DIGIT();
5583 while (hardcount < max && scan < loceol &&
5584 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5585 scan += UTF8SKIP(scan);
5586 hardcount++;
5587 }
5588 } else {
5589 while (scan < loceol && !isDIGIT(*scan))
5590 scan++;
5591 }
5592 case LNBREAK:
5593 if (do_utf8) {
5594 loceol = PL_regeol;
5595 while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5596 scan += c;
5597 hardcount++;
5598 }
5599 } else {
5600 /*
5601 LNBREAK can match two latin chars, which is ok,
5602 because we have a null terminated string, but we
5603 have to use hardcount in this situation
5604 */
5605 while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
5606 scan+=c;
5607 hardcount++;
5608 }
5609 }
5610 break;
5611 case HORIZWS:
5612 if (do_utf8) {
5613 loceol = PL_regeol;
5614 while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5615 scan += c;
5616 hardcount++;
5617 }
5618 } else {
5619 while (scan < loceol && is_HORIZWS_latin1(scan))
5620 scan++;
5621 }
5622 break;
5623 case NHORIZWS:
5624 if (do_utf8) {
5625 loceol = PL_regeol;
5626 while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5627 scan += UTF8SKIP(scan);
5628 hardcount++;
5629 }
5630 } else {
5631 while (scan < loceol && !is_HORIZWS_latin1(scan))
5632 scan++;
5633
5634 }
5635 break;
5636 case VERTWS:
5637 if (do_utf8) {
5638 loceol = PL_regeol;
5639 while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5640 scan += c;
5641 hardcount++;
5642 }
5643 } else {
5644 while (scan < loceol && is_VERTWS_latin1(scan))
5645 scan++;
5646
5647 }
5648 break;
5649 case NVERTWS:
5650 if (do_utf8) {
5651 loceol = PL_regeol;
5652 while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5653 scan += UTF8SKIP(scan);
5654 hardcount++;
5655 }
5656 } else {
5657 while (scan < loceol && !is_VERTWS_latin1(scan))
5658 scan++;
5659
5660 }
5661 break;
5662
5663 default: /* Called on something of 0 width. */
5664 break; /* So match right here or not at all. */
5665 }
5666
5667 if (hardcount)
5668 c = hardcount;
5669 else
5670 c = scan - PL_reginput;
5671 PL_reginput = scan;
5672
5673 DEBUG_r({
5674 GET_RE_DEBUG_FLAGS_DECL;
5675 DEBUG_EXECUTE_r({
5676 SV * const prop = sv_newmortal();
5677 regprop(prog, prop, p);
5678 PerlIO_printf(Perl_debug_log,
5679 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
5680 REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5681 });
5682 });
5683
5684 return(c);
5685}
5686
5687
5688#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5689/*
5690- regclass_swash - prepare the utf8 swash
5691*/
5692
5693SV *
5694Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5695{
5696 dVAR;
5697 SV *sw = NULL;
5698 SV *si = NULL;
5699 SV *alt = NULL;
5700 RXi_GET_DECL(prog,progi);
5701 const struct reg_data * const data = prog ? progi->data : NULL;
5702
5703 PERL_ARGS_ASSERT_REGCLASS_SWASH;
5704
5705 if (data && data->count) {
5706 const U32 n = ARG(node);
5707
5708 if (data->what[n] == 's') {
5709 SV * const rv = MUTABLE_SV(data->data[n]);
5710 AV * const av = MUTABLE_AV(SvRV(rv));
5711 SV **const ary = AvARRAY(av);
5712 SV **a, **b;
5713
5714 /* See the end of regcomp.c:S_regclass() for
5715 * documentation of these array elements. */
5716
5717 si = *ary;
5718 a = SvROK(ary[1]) ? &ary[1] : NULL;
5719 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5720
5721 if (a)
5722 sw = *a;
5723 else if (si && doinit) {
5724 sw = swash_init("utf8", "", si, 1, 0);
5725 (void)av_store(av, 1, sw);
5726 }
5727 if (b)
5728 alt = *b;
5729 }
5730 }
5731
5732 if (listsvp)
5733 *listsvp = si;
5734 if (altsvp)
5735 *altsvp = alt;
5736
5737 return sw;
5738}
5739#endif
5740
5741/*
5742 - reginclass - determine if a character falls into a character class
5743
5744 The n is the ANYOF regnode, the p is the target string, lenp
5745 is pointer to the maximum length of how far to go in the p
5746 (if the lenp is zero, UTF8SKIP(p) is used),
5747 do_utf8 tells whether the target string is in UTF-8.
5748
5749 */
5750
5751STATIC bool
5752S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5753{
5754 dVAR;
5755 const char flags = ANYOF_FLAGS(n);
5756 bool match = FALSE;
5757 UV c = *p;
5758 STRLEN len = 0;
5759 STRLEN plen;
5760
5761 PERL_ARGS_ASSERT_REGINCLASS;
5762
5763 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5764 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5765 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5766 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5767 if (len == (STRLEN)-1)
5768 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5769 }
5770
5771 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5772 if (do_utf8 || (flags & ANYOF_UNICODE)) {
5773 if (lenp)
5774 *lenp = 0;
5775 if (do_utf8 && !ANYOF_RUNTIME(n)) {
5776 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5777 match = TRUE;
5778 }
5779 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5780 match = TRUE;
5781 if (!match) {
5782 AV *av;
5783 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5784
5785 if (sw) {
5786 U8 * utf8_p;
5787 if (do_utf8) {
5788 utf8_p = (U8 *) p;
5789 } else {
5790 STRLEN len = 1;
5791 utf8_p = bytes_to_utf8(p, &len);
5792 }
5793 if (swash_fetch(sw, utf8_p, 1))
5794 match = TRUE;
5795 else if (flags & ANYOF_FOLD) {
5796 if (!match && lenp && av) {
5797 I32 i;
5798 for (i = 0; i <= av_len(av); i++) {
5799 SV* const sv = *av_fetch(av, i, FALSE);
5800 STRLEN len;
5801 const char * const s = SvPV_const(sv, len);
5802 if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5803 *lenp = len;
5804 match = TRUE;
5805 break;
5806 }
5807 }
5808 }
5809 if (!match) {
5810 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5811
5812 STRLEN tmplen;
5813 to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5814 if (swash_fetch(sw, tmpbuf, 1))
5815 match = TRUE;
5816 }
5817 }
5818
5819 /* If we allocated a string above, free it */
5820 if (! do_utf8) Safefree(utf8_p);
5821 }
5822 }
5823 if (match && lenp && *lenp == 0)
5824 *lenp = UNISKIP(NATIVE_TO_UNI(c));
5825 }
5826 if (!match && c < 256) {
5827 if (ANYOF_BITMAP_TEST(n, c))
5828 match = TRUE;
5829 else if (flags & ANYOF_FOLD) {
5830 U8 f;
5831
5832 if (flags & ANYOF_LOCALE) {
5833 PL_reg_flags |= RF_tainted;
5834 f = PL_fold_locale[c];
5835 }
5836 else
5837 f = PL_fold[c];
5838 if (f != c && ANYOF_BITMAP_TEST(n, f))
5839 match = TRUE;
5840 }
5841
5842 if (!match && (flags & ANYOF_CLASS)) {
5843 PL_reg_flags |= RF_tainted;
5844 if (
5845 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5846 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5847 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5848 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5849 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5850 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5851 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5852 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5853 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5854 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5855 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5856 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5857 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5858 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5859 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5860 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5861 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5862 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5863 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5864 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5865 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5866 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5867 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5868 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5869 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5870 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5871 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5872 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5873 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5874 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5875 ) /* How's that for a conditional? */
5876 {
5877 match = TRUE;
5878 }
5879 }
5880 }
5881
5882 return (flags & ANYOF_INVERT) ? !match : match;
5883}
5884
5885STATIC U8 *
5886S_reghop3(U8 *s, I32 off, const U8* lim)
5887{
5888 dVAR;
5889
5890 PERL_ARGS_ASSERT_REGHOP3;
5891
5892 if (off >= 0) {
5893 while (off-- && s < lim) {
5894 /* XXX could check well-formedness here */
5895 s += UTF8SKIP(s);
5896 }
5897 }
5898 else {
5899 while (off++ && s > lim) {
5900 s--;
5901 if (UTF8_IS_CONTINUED(*s)) {
5902 while (s > lim && UTF8_IS_CONTINUATION(*s))
5903 s--;
5904 }
5905 /* XXX could check well-formedness here */
5906 }
5907 }
5908 return s;
5909}
5910
5911#ifdef XXX_dmq
5912/* there are a bunch of places where we use two reghop3's that should
5913 be replaced with this routine. but since thats not done yet
5914 we ifdef it out - dmq
5915*/
5916STATIC U8 *
5917S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5918{
5919 dVAR;
5920
5921 PERL_ARGS_ASSERT_REGHOP4;
5922
5923 if (off >= 0) {
5924 while (off-- && s < rlim) {
5925 /* XXX could check well-formedness here */
5926 s += UTF8SKIP(s);
5927 }
5928 }
5929 else {
5930 while (off++ && s > llim) {
5931 s--;
5932 if (UTF8_IS_CONTINUED(*s)) {
5933 while (s > llim && UTF8_IS_CONTINUATION(*s))
5934 s--;
5935 }
5936 /* XXX could check well-formedness here */
5937 }
5938 }
5939 return s;
5940}
5941#endif
5942
5943STATIC U8 *
5944S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5945{
5946 dVAR;
5947
5948 PERL_ARGS_ASSERT_REGHOPMAYBE3;
5949
5950 if (off >= 0) {
5951 while (off-- && s < lim) {
5952 /* XXX could check well-formedness here */
5953 s += UTF8SKIP(s);
5954 }
5955 if (off >= 0)
5956 return NULL;
5957 }
5958 else {
5959 while (off++ && s > lim) {
5960 s--;
5961 if (UTF8_IS_CONTINUED(*s)) {
5962 while (s > lim && UTF8_IS_CONTINUATION(*s))
5963 s--;
5964 }
5965 /* XXX could check well-formedness here */
5966 }
5967 if (off <= 0)
5968 return NULL;
5969 }
5970 return s;
5971}
5972
5973static void
5974restore_pos(pTHX_ void *arg)
5975{
5976 dVAR;
5977 regexp * const rex = (regexp *)arg;
5978 if (PL_reg_eval_set) {
5979 if (PL_reg_oldsaved) {
5980 rex->subbeg = PL_reg_oldsaved;
5981 rex->sublen = PL_reg_oldsavedlen;
5982#ifdef PERL_OLD_COPY_ON_WRITE
5983 rex->saved_copy = PL_nrs;
5984#endif
5985 RXp_MATCH_COPIED_on(rex);
5986 }
5987 PL_reg_magic->mg_len = PL_reg_oldpos;
5988 PL_reg_eval_set = 0;
5989 PL_curpm = PL_reg_oldcurpm;
5990 }
5991}
5992
5993STATIC void
5994S_to_utf8_substr(pTHX_ register regexp *prog)
5995{
5996 int i = 1;
5997
5998 PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5999
6000 do {
6001 if (prog->substrs->data[i].substr
6002 && !prog->substrs->data[i].utf8_substr) {
6003 SV* const sv = newSVsv(prog->substrs->data[i].substr);
6004 prog->substrs->data[i].utf8_substr = sv;
6005 sv_utf8_upgrade(sv);
6006 if (SvVALID(prog->substrs->data[i].substr)) {
6007 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
6008 if (flags & FBMcf_TAIL) {
6009 /* Trim the trailing \n that fbm_compile added last
6010 time. */
6011 SvCUR_set(sv, SvCUR(sv) - 1);
6012 /* Whilst this makes the SV technically "invalid" (as its
6013 buffer is no longer followed by "\0") when fbm_compile()
6014 adds the "\n" back, a "\0" is restored. */
6015 }
6016 fbm_compile(sv, flags);
6017 }
6018 if (prog->substrs->data[i].substr == prog->check_substr)
6019 prog->check_utf8 = sv;
6020 }
6021 } while (i--);
6022}
6023
6024STATIC void
6025S_to_byte_substr(pTHX_ register regexp *prog)
6026{
6027 dVAR;
6028 int i = 1;
6029
6030 PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6031
6032 do {
6033 if (prog->substrs->data[i].utf8_substr
6034 && !prog->substrs->data[i].substr) {
6035 SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6036 if (sv_utf8_downgrade(sv, TRUE)) {
6037 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6038 const U8 flags
6039 = BmFLAGS(prog->substrs->data[i].utf8_substr);
6040 if (flags & FBMcf_TAIL) {
6041 /* Trim the trailing \n that fbm_compile added last
6042 time. */
6043 SvCUR_set(sv, SvCUR(sv) - 1);
6044 }
6045 fbm_compile(sv, flags);
6046 }
6047 } else {
6048 SvREFCNT_dec(sv);
6049 sv = &PL_sv_undef;
6050 }
6051 prog->substrs->data[i].substr = sv;
6052 if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6053 prog->check_substr = sv;
6054 }
6055 } while (i--);
6056}
6057
6058/*
6059 * Local variables:
6060 * c-indentation-style: bsd
6061 * c-basic-offset: 4
6062 * indent-tabs-mode: t
6063 * End:
6064 *
6065 * ex: set ts=8 sts=4 sw=4 noet:
6066 */