This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
subsume CURCUR (current curly) struct within the main regmatch_state
[perl5.git] / regexp.h
index 9e86a1e..b3634d8 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -1,4 +1,11 @@
 /*    regexp.h
+ *
+ *    Copyright (C) 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2003,
+ *    by Larry Wall and others
+ *
+ *    You may distribute under the terms of either the GNU General Public
+ *    License or the Artistic License, as specified in the README file.
+ *
  */
 
 /*
@@ -19,6 +26,8 @@ typedef struct regnode regnode;
 
 struct reg_substr_data;
 
+struct reg_data;
+
 typedef struct regexp {
        I32 *startp;
        I32 *endp;
@@ -28,12 +37,17 @@ typedef struct regexp {
         struct reg_data *data; /* Additional data. */
        char *subbeg;           /* saved or original string 
                                   so \digit works forever. */
+#ifdef PERL_OLD_COPY_ON_WRITE
+        SV *saved_copy;         /* If non-NULL, SV which is COW from original */
+#endif
+        U32 *offsets;           /* offset annotations 20001228 MJD */
        I32 sublen;             /* Length of string pointed by subbeg */
        I32 refcnt;
        I32 minlen;             /* mininum possible length of $& */
        I32 prelen;             /* length of precomp */
        U32 nparens;            /* number of parentheses */
        U32 lastparen;          /* last paren matched */
+       U32 lastcloseparen;     /* last paren matched */
        U32 reganch;            /* Internal use only +
                                   Tainted information used by regexec? */
        regnode program[1];     /* Unwarranted chumminess with compiler. */
@@ -52,6 +66,8 @@ typedef struct regexp {
 #define ROPT_CHECK_ALL         0x00100
 #define ROPT_LOOKBEHIND_SEEN   0x00200
 #define ROPT_EVAL_SEEN         0x00400
+#define ROPT_CANY_SEEN         0x00800
+#define ROPT_SANY_SEEN         ROPT_CANY_SEEN /* src bckwrd cmpt */
 
 /* 0xf800 of reganch is used by PMf_COMPILETIME */
 
@@ -59,6 +75,7 @@ typedef struct regexp {
 #define ROPT_NAUGHTY           0x20000 /* how exponential is this pattern? */
 #define ROPT_COPY_DONE         0x40000 /* subbeg is a copy of the string */
 #define ROPT_TAINTED_SEEN      0x80000
+#define ROPT_MATCH_UTF8                0x10000000 /* subbeg is utf-8 */
 
 #define RE_USE_INTUIT_NOML     0x0100000 /* Best to intuit before matching */
 #define RE_USE_INTUIT_ML       0x0200000
@@ -87,14 +104,37 @@ typedef struct regexp {
                                         ? RX_MATCH_COPIED_on(prog) \
                                         : RX_MATCH_COPIED_off(prog))
 
+#ifdef PERL_OLD_COPY_ON_WRITE
+#define RX_MATCH_COPY_FREE(rx) \
+       STMT_START {if (rx->saved_copy) { \
+           SV_CHECK_THINKFIRST_COW_DROP(rx->saved_copy); \
+       } \
+       if (RX_MATCH_COPIED(rx)) { \
+           Safefree(rx->subbeg); \
+           RX_MATCH_COPIED_off(rx); \
+       }} STMT_END
+#else
+#define RX_MATCH_COPY_FREE(rx) \
+       STMT_START {if (RX_MATCH_COPIED(rx)) { \
+           Safefree(rx->subbeg); \
+           RX_MATCH_COPIED_off(rx); \
+       }} STMT_END
+#endif
+
+#define RX_MATCH_UTF8(prog)            ((prog)->reganch & ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_on(prog)         ((prog)->reganch |= ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_off(prog)                ((prog)->reganch &= ~ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_set(prog, t)     ((t) \
+                       ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
+                       : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))
+    
 #define REXEC_COPY_STR 0x01            /* Need to copy the string. */
 #define REXEC_CHECKED  0x02            /* check_substr already checked. */
 #define REXEC_SCREAM   0x04            /* use scream table. */
 #define REXEC_IGNOREPOS        0x08            /* \G matches at start. */
 #define REXEC_NOT_FIRST        0x10            /* This is another iteration of //g. */
-#define REXEC_ML       0x20            /* $* was set. */
 
-#define ReREFCNT_inc(re) ((re && re->refcnt++), re)
+#define ReREFCNT_inc(re) ((void)(re && re->refcnt++), re)
 #define ReREFCNT_dec(re) CALLREGFREE(aTHX_ re)
 
 #define FBMcf_TAIL_DOLLAR      1
@@ -106,3 +146,144 @@ typedef struct regexp {
 #define FBMrf_MULTILINE        1
 
 struct re_scream_pos_data_s;
+
+/* an accepting state/position*/
+struct _reg_trie_accepted {
+    U8   *endpos;
+    U16  wordnum;
+};
+typedef struct _reg_trie_accepted reg_trie_accepted;
+
+
+/* structures for holding and saving the state maintained by regmatch() */
+
+typedef I32 CHECKPOINT;
+
+typedef struct re_cc_state
+{
+    I32 ss;
+    regnode *node;
+    struct re_cc_state *prev;
+    struct regmatch_state *cc; /* state corresponding to the current curly */
+    regexp *re;
+} re_cc_state;
+
+
+typedef enum {
+    resume_TRIE1,
+    resume_TRIE2,
+    resume_CURLYX,
+    resume_WHILEM1,
+    resume_WHILEM2,
+    resume_WHILEM3,
+    resume_WHILEM4,
+    resume_WHILEM5,
+    resume_WHILEM6,
+    resume_CURLYM1,
+    resume_CURLYM2,
+    resume_CURLYM3,
+    resume_CURLYM4,
+    resume_IFMATCH,
+    resume_PLUS1,
+    resume_PLUS2,
+    resume_PLUS3,
+    resume_PLUS4,
+    resume_END
+} regmatch_resume_states;
+
+
+typedef struct regmatch_state {
+
+    /* these vars contain state that needs to be maintained
+     * across the main while loop ... */
+
+    regmatch_resume_states resume_state; /* where to jump to on return */
+    regnode *scan;             /* Current node. */
+    regnode *next;             /* Next node. */
+    bool minmod;               /* the next "{n.m}" is a "{n,m}?" */
+    bool sw;                   /* the condition value in (?(cond)a|b) */
+    int logical;
+    I32 unwind;                        /* savestack index of current unwind block */
+    struct regmatch_state  *cc;        /* current innermost curly state */
+    char *locinput;
+
+    /* ... while the rest of these are local to an individual branch */
+
+    I32 n;                     /* no or next */
+    I32 ln;                    /* len or last */
+
+    union {
+       struct {
+           reg_trie_accepted *accept_buff;
+           U32 accepted;       /* how many accepting states we have seen */
+       } trie;
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           CHECKPOINT lastcp;
+       } eval;
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           struct regmatch_state *outercc; /* outer CURLYX state if any */
+
+           /* these contain the current curly state, and are accessed
+            * by subsequent WHILEMs */
+           int         parenfloor;/* how far back to strip paren data */
+           int         cur;    /* how many instances of scan we've matched */
+           int         min;    /* the minimal number of scans to match */
+           int         max;    /* the maximal number of scans to match */
+           regnode *   scan;   /* the thing to match */
+           char *      lastloc;/* where we started matching this scan */
+       } curlyx;
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           CHECKPOINT lastcp;
+           struct regmatch_state *savecc;
+           char *lastloc;      /* Detection of 0-len. */
+           I32 cache_offset;
+           I32 cache_bit;
+       } whilem;
+
+       struct {
+           I32 paren;
+           I32 c1, c2;         /* case fold search */
+           CHECKPOINT lastcp;
+           I32 l;
+           I32 matches;
+           I32 maxwanted;
+       } curlym;
+
+       struct {
+           I32 paren;
+           CHECKPOINT lastcp;
+           I32 c1, c2;         /* case fold search */
+           char *e;
+           char *old;
+           int count;
+       } plus; /* and CURLYN/CURLY/STAR */
+
+       struct {
+           CHECKPOINT cp;      /* remember current savestack indexes */
+           CHECKPOINT lastcp;
+           struct regmatch_state *savecc;
+           re_cc_state *cur_call_cc;
+           regexp *end_re;
+       } end;
+    };
+
+    re_cc_state *reg_call_cc;  /* saved value of PL_reg_call_cc */
+} regmatch_state;
+
+/* how many regmatch_state structs to allocate as a single slab.
+ * We do it in 4K blocks for efficiency. The "3" is 2 for the next/prev
+ * pointers, plus 1 for any mythical malloc overhead. */
+#define PERL_REGMATCH_SLAB_SLOTS \
+    ((4096 - 3 * sizeof (void*)) / sizeof(regmatch_state))
+
+typedef struct regmatch_slab {
+    regmatch_state states[PERL_REGMATCH_SLAB_SLOTS];
+    struct regmatch_slab *prev, *next;
+} regmatch_slab;