This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Extract code to inline function
authorKarl Williamson <public@khwilliamson.com>
Sat, 7 Jul 2012 03:41:07 +0000 (21:41 -0600)
committerKarl Williamson <public@khwilliamson.com>
Wed, 25 Jul 2012 03:13:45 +0000 (21:13 -0600)
Future commits will use this paradigm in additional places, so extract
it to a function, so they all do things right.  This isn't a great API,
but it works for the few places this will be called.

embed.fnc
embed.h
proto.h
regcomp.c

index e302122..3b7fd77 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1947,6 +1947,8 @@ Es        |U32    |join_exact     |NN struct RExC_state_t *pRExC_state \
                                |U32 flags|NULLOK regnode *val|U32 depth
 EsRn   |char * |regwhite       |NN struct RExC_state_t *pRExC_state \
                                |NN char *p
+Ei     |void   |alloc_maybe_populate_EXACT|NN struct RExC_state_t *pRExC_state \
+                               |NN regnode *node|STRLEN len|UV code_point
 Es     |char * |nextchar       |NN struct RExC_state_t *pRExC_state
 Es     |bool   |reg_skipcomment|NN struct RExC_state_t *pRExC_state
 Es     |void   |scan_commit    |NN const struct RExC_state_t *pRExC_state \
diff --git a/embed.h b/embed.h
index 9bfab3f..6a10e72 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define add_alternate(a,b,c)   S_add_alternate(aTHX_ a,b,c)
 #define add_cp_to_invlist(a,b) S_add_cp_to_invlist(aTHX_ a,b)
 #define add_data               S_add_data
+#define alloc_maybe_populate_EXACT(a,b,c,d)    S_alloc_maybe_populate_EXACT(aTHX_ a,b,c,d)
 #define checkposixcc(a)                S_checkposixcc(aTHX_ a)
 #define cl_and                 S_cl_and
 #define cl_anything            S_cl_anything
diff --git a/proto.h b/proto.h
index b7c0258..91db7c0 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6398,6 +6398,12 @@ STATIC U32       S_add_data(struct RExC_state_t *pRExC_state, U32 n, const char *s)
 #define PERL_ARGS_ASSERT_ADD_DATA      \
        assert(pRExC_state); assert(s)
 
+PERL_STATIC_INLINE void        S_alloc_maybe_populate_EXACT(pTHX_ struct RExC_state_t *pRExC_state, regnode *node, STRLEN len, UV code_point)
+                       __attribute__nonnull__(pTHX_1)
+                       __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT    \
+       assert(pRExC_state); assert(node)
+
 STATIC void    S_checkposixcc(pTHX_ struct RExC_state_t *pRExC_state)
                        __attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_CHECKPOSIXCC  \
index 7460680..b57be3f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -9738,6 +9738,61 @@ S_reg_recode(pTHX_ const char value, SV **encp)
     return uv;
 }
 
+PERL_STATIC_INLINE void
+S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, STRLEN len, UV code_point)
+{
+    /* This knows the details about sizing an EXACTish node, and potentially
+     * populating it with a single character.  If <len> is non-zero, it assumes
+     * that the node has already been populated, and just does the sizing,
+     * ignoring <code_point>.  Otherwise it looks at <code_point> and
+     * calculates what <len> should be.  In pass 1, it sizes the node
+     * appropriately.  In pass 2, it additionally will populate the node's
+     * STRING with <code_point>, if <len> is 0.
+     *
+     * It knows that under FOLD, UTF characters and the Latin Sharp S must be
+     * folded (the latter only when the rules indicate it can match 'ss') */
+
+    bool len_passed_in = cBOOL(len != 0);
+    U8 character[UTF8_MAXBYTES_CASE+1];
+
+    PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
+
+    if (! len_passed_in) {
+        if (UTF) {
+            if (FOLD) {
+                to_uni_fold(NATIVE_TO_UNI(code_point), character, &len);
+            }
+            else {
+                uvchr_to_utf8( character, code_point);
+                len = UTF8SKIP(character);
+            }
+        }
+        else if (! FOLD
+                 || code_point != LATIN_SMALL_LETTER_SHARP_S
+                 || MORE_ASCII_RESTRICTED
+                 || ! AT_LEAST_UNI_SEMANTICS)
+        {
+            *character = (U8) code_point;
+            len = 1;
+        }
+        else {
+            *character = 's';
+            *(character + 1) = 's';
+            len = 2;
+        }
+    }
+
+    if (SIZE_ONLY) {
+        RExC_size += STR_SZ(len);
+    }
+    else {
+        RExC_emit += STR_SZ(len);
+        STR_LEN(node) = len;
+        if (! len_passed_in) {
+            Copy((char *) character, STRING(node), len, char);
+        }
+    }
+}
 
 /*
  - regatom - the lowest level
@@ -10637,12 +10692,7 @@ tryagain:
            if (len == 1 && UNI_IS_INVARIANT(ender))
                *flagp |= SIMPLE;
 
-           if (SIZE_ONLY)
-               RExC_size += STR_SZ(len);
-           else {
-               STR_LEN(ret) = len;
-               RExC_emit += STR_SZ(len);
-            }
+            alloc_maybe_populate_EXACT(pRExC_state, ret, len, 0);
        }
        break;
     }