regcomp.c: Add function to skip pattern white space
authorKarl Williamson <public@khwilliamson.com>
Thu, 10 Jan 2013 22:42:36 +0000 (15:42 -0700)
committerKarl Williamson <public@khwilliamson.com>
Fri, 11 Jan 2013 18:50:37 +0000 (11:50 -0700)
The plan is to eventually convert all of regcomp to use this for white
space ignoring under /x, but this will be used for now in just the new
syntax for (?[ ]), coming in a few commits.  Until then, this function
is unused.

embed.fnc
embed.h
proto.h
regcomp.c

index c781b75..f51e37c 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1982,6 +1982,8 @@ Es        |U32    |join_exact     |NN struct RExC_state_t *pRExC_state \
                                |U32 flags|NULLOK regnode *val|U32 depth
 EsRn   |char * |regwhite       |NN struct RExC_state_t *pRExC_state \
                                |NN char *p
+EsRn   |char * |regpatws       |NN struct RExC_state_t *pRExC_state \
+                               |NN char *p|const bool recognize_comment
 Ei     |void   |alloc_maybe_populate_EXACT|NN struct RExC_state_t *pRExC_state \
                                |NN regnode *node|NN I32 *flagp|STRLEN len \
                                |UV code_point
diff --git a/embed.h b/embed.h
index 5bae00e..57e4219 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define regbranch(a,b,c,d)     S_regbranch(aTHX_ a,b,c,d)
 #define regclass(a,b,c,d,e,f)  S_regclass(aTHX_ a,b,c,d,e,f)
 #define reginsert(a,b,c,d)     S_reginsert(aTHX_ a,b,c,d)
+#define regpatws               S_regpatws
 #define regpiece(a,b,c)                S_regpiece(aTHX_ a,b,c)
 #define regpposixcc(a,b,c)     S_regpposixcc(aTHX_ a,b,c)
 #define regtail(a,b,c,d)       S_regtail(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h
index 37ceea3..ae55c51 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6651,6 +6651,13 @@ STATIC void      S_reginsert(pTHX_ struct RExC_state_t *pRExC_state, U8 op, regnode *
 #define PERL_ARGS_ASSERT_REGINSERT     \
        assert(pRExC_state); assert(opnd)
 
+STATIC char *  S_regpatws(struct RExC_state_t *pRExC_state, char *p, const bool recognize_comment)
+                       __attribute__warn_unused_result__
+                       __attribute__nonnull__(1)
+                       __attribute__nonnull__(2);
+#define PERL_ARGS_ASSERT_REGPATWS      \
+       assert(pRExC_state); assert(p)
+
 STATIC regnode*        S_regpiece(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
index 3c1c06d..36db53d 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -11093,6 +11093,40 @@ S_regwhite( RExC_state_t *pRExC_state, char *p )
     return p;
 }
 
+STATIC char *
+S_regpatws( RExC_state_t *pRExC_state, char *p , const bool recognize_comment )
+{
+    /* Returns the next non-pattern-white space, non-comment character (the
+     * latter only if 'recognize_comment is true) in the string p, which is
+     * ended by RExC_end.  If there is no line break ending a comment,
+     * RExC_seen has added the REG_SEEN_RUN_ON_COMMENT flag; */
+    const char *e = RExC_end;
+
+    PERL_ARGS_ASSERT_REGPATWS;
+
+    while (p < e) {
+        STRLEN len;
+       if ((len = is_PATWS_safe(p, e, UTF))) {
+           p += len;
+        }
+       else if (recognize_comment && *p == '#') {
+            bool ended = 0;
+           do {
+                p++;
+                if (is_LNBREAK_safe(p, e, UTF)) {
+                   ended = 1;
+                   break;
+               }
+           } while (p < e);
+           if (!ended)
+               RExC_seen |= REG_SEEN_RUN_ON_COMMENT;
+       }
+       else
+           break;
+    }
+    return p;
+}
+
 /* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
    Character classes ([:foo:]) can also be negated ([:^foo:]).
    Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.