regcomp.c: Add parameter to regclass()
authorKarl Williamson <public@khwilliamson.com>
Thu, 10 Jan 2013 22:03:39 +0000 (15:03 -0700)
committerKarl Williamson <public@khwilliamson.com>
Fri, 11 Jan 2013 18:50:37 +0000 (11:50 -0700)
This parameter allows the caller to specify whether multi-character
folds should be allowed or not.  In general it should, and in the case
where this commit says it shouldn't, they never are returned anyway from
Unicode properties.

This capability will be put to real use by future commits

embed.fnc
embed.h
proto.h
regcomp.c

index e6bb9bc..4d1d81f 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1960,7 +1960,8 @@ Es        |regnode*|regbranch     |NN struct RExC_state_t *pRExC_state \
 Es     |STRLEN |reguni         |NN const struct RExC_state_t *pRExC_state \
                                |UV uv|NN char *s
 Es     |regnode*|regclass      |NN struct RExC_state_t *pRExC_state \
-                               |NN I32 *flagp|U32 depth|const bool stop_at_1
+                               |NN I32 *flagp|U32 depth|const bool stop_at_1 \
+                               |bool allow_multi_fold
 Es     |regnode*|reg_node      |NN struct RExC_state_t *pRExC_state|U8 op
 Es     |UV     |reg_recode     |const char value|NN SV **encp
 Es     |regnode*|regpiece      |NN struct RExC_state_t *pRExC_state \
diff --git a/embed.h b/embed.h
index 86d9006..3f4034e 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define reganode(a,b,c)                S_reganode(aTHX_ a,b,c)
 #define regatom(a,b,c)         S_regatom(aTHX_ a,b,c)
 #define regbranch(a,b,c,d)     S_regbranch(aTHX_ a,b,c,d)
-#define regclass(a,b,c,d)      S_regclass(aTHX_ a,b,c,d)
+#define regclass(a,b,c,d,e)    S_regclass(aTHX_ a,b,c,d,e)
 #define reginsert(a,b,c,d)     S_reginsert(aTHX_ a,b,c,d)
 #define regpiece(a,b,c)                S_regpiece(aTHX_ a,b,c)
 #define regpposixcc(a,b,c)     S_regpposixcc(aTHX_ a,b,c)
diff --git a/proto.h b/proto.h
index 40784c2..9bb1228 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6639,7 +6639,7 @@ STATIC regnode*   S_regbranch(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp,
 #define PERL_ARGS_ASSERT_REGBRANCH     \
        assert(pRExC_state); assert(flagp)
 
-STATIC regnode*        S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1)
+STATIC regnode*        S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
 #define PERL_ARGS_ASSERT_REGCLASS      \
index 1d411cb..5c52f77 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -10106,7 +10106,8 @@ tryagain:
     {
        char * const oregcomp_parse = ++RExC_parse;
         ret = regclass(pRExC_state, flagp,depth+1,
-                       FALSE /* means parse the whole char class */ );
+                       FALSE, /* means parse the whole char class */
+                       TRUE); /* allow multi-char folds */
        if (*RExC_parse != ']') {
            RExC_parse = oregcomp_parse;
            vFAIL("Unmatched [");
@@ -10301,7 +10302,8 @@ tryagain:
                RExC_parse--;
 
                 ret = regclass(pRExC_state, flagp,depth+1,
-                               TRUE /* means just parse this element */ );
+                               TRUE, /* means just parse this element */
+                               FALSE); /* don't allow multi-char folds */
 
                RExC_parse--;
 
@@ -11229,7 +11231,8 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me)
 #define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
 
 STATIC regnode *
-S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1)
+S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
+                 const bool stop_at_1, bool allow_multi_folds)
 {
     /* parse a bracketed class specification.  Most of these will produce an ANYOF node;
      * but something like [a] will produce an EXACT node; [aA], an EXACTFish
@@ -11325,6 +11328,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool st
     if (UCHARAT(RExC_parse) == '^') {  /* Complement of range. */
        RExC_parse++;
         invert = TRUE;
+        allow_multi_folds = FALSE;
         RExC_naughty++;
     }
 
@@ -12009,7 +12013,7 @@ parseit:
          *  "ss"  =~ /^[^\xDF]+$/i => N
          *
          * See [perl #89750] */
-        if (FOLD && ! invert && value == prevvalue) {
+        if (FOLD && allow_multi_folds && value == prevvalue) {
             if (value == LATIN_SMALL_LETTER_SHARP_S
                 || (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
                                                         value)))