regcomp.c: Keep separate list for [:word:] from \p{Word}

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 8889ef1..2499435 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4404,6 +4404,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             data->flags |= (OP(scan) == MEOL
                             ? SF_BEFORE_MEOL
                             : SF_BEFORE_SEOL);
             data->flags |= (OP(scan) == MEOL
                             ? SF_BEFORE_MEOL
                             : SF_BEFORE_SEOL);
+           SCAN_COMMIT(pRExC_state, data, minlenp);
+
         }
         else if (  PL_regkind[OP(scan)] == BRANCHJ
                  /* Lookbehind, or need to calculate parens/evals/stclass: */
         }
         else if (  PL_regkind[OP(scan)] == BRANCHJ
                  /* Lookbehind, or need to calculate parens/evals/stclass: */
@@ -10966,7 +10968,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
      register regnode *ret;
      STRLEN numlen;
      UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
      register regnode *ret;
      STRLEN numlen;
-    IV namedclass;
+    IV namedclass = OOB_NAMEDCLASS;
      char *rangebegin = NULL;
      bool need_class = 0;
      bool allow_full_fold = TRUE;   /* Assume wants multi-char folding */
      char *rangebegin = NULL;
      bool need_class = 0;
      bool allow_full_fold = TRUE;   /* Assume wants multi-char folding */
@@ -10974,6 +10976,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
                                       than just initialized.  */
      SV* properties = NULL;    /* Code points that match \p{} \P{} */
      STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
                                       than just initialized.  */
      SV* properties = NULL;    /* Code points that match \p{} \P{} */
+    SV* posixes = NULL;     /* Code points that match classes like, [:word:],
+                               extended beyond the Latin1 range */
      UV element_count = 0;   /* Number of distinct elements in the class.
                                Optimizations may be possible if this is tiny */
      UV n;
      UV element_count = 0;   /* Number of distinct elements in the class.
                                Optimizations may be possible if this is tiny */
      UV n;
@@ -10995,7 +10999,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
  
      /* Set if a component of this character class is user-defined; just passed
       * on to the engine */
  
      /* Set if a component of this character class is user-defined; just passed
       * on to the engine */
-    UV has_user_defined_property = 0;
+    bool has_user_defined_property = FALSE;
  
      /* inversion list of code points this node matches only when the target
       * string is in UTF-8.  (Because is under /d) */
  
      /* inversion list of code points this node matches only when the target
       * string is in UTF-8.  (Because is under /d) */
@@ -11218,7 +11222,7 @@ parseit:
                          Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
                                          (value == 'p' ? '+' : '!'),
                                          name);
                          Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
                                          (value == 'p' ? '+' : '!'),
                                          name);
-                        has_user_defined_property = 1;
+                        has_user_defined_property = TRUE;
  
                          /* We don't know yet, so have to assume that the
                           * property could match something in the Latin1 range,
  
                          /* We don't know yet, so have to assume that the
                           * property could match something in the Latin1 range,
@@ -11230,17 +11234,14 @@ parseit:
                          /* Here, did get the swash and its inversion list.  If
                           * the swash is from a user-defined property, then this
                           * whole character class should be regarded as such */
                          /* Here, did get the swash and its inversion list.  If
                           * the swash is from a user-defined property, then this
                           * whole character class should be regarded as such */
-                        SV** user_defined_svp =
-                                            hv_fetchs(MUTABLE_HV(SvRV(swash)),
-                                                        "USER_DEFINED", FALSE);
-                        if (user_defined_svp) {
-                            has_user_defined_property
-                                                    |= SvUV(*user_defined_svp);
-                        }
+                        has_user_defined_property =
+                                                _is_swash_user_defined(swash);
  
                          /* Invert if asking for the complement */
                          if (value == 'P') {
  
                          /* Invert if asking for the complement */
                          if (value == 'P') {
-                           _invlist_union_complement_2nd(properties, invlist, &properties);
+                           _invlist_union_complement_2nd(properties,
+                                                          invlist,
+                                                          &properties);
  
                              /* The swash can't be used as-is, because we've
                              * inverted things; delay removing it to here after
  
                              /* The swash can't be used as-is, because we've
                              * inverted things; delay removing it to here after
@@ -11342,15 +11343,20 @@ parseit:
             literal_endpoint++;
  #endif
  
             literal_endpoint++;
  #endif
  
-       if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
-
-           /* What matches in a locale is not known until runtime, so need to
-            * (one time per class) allocate extra space to pass to regexec.
-            * The space will contain a bit for each named class that is to be
-            * matched against.  This isn't needed for \p{} and pseudo-classes,
-            * as they are not affected by locale, and hence are dealt with
-            * separately */
-           if (LOC && namedclass < ANYOF_MAX && ! need_class) {
+            /* What matches in a locale is not known until runtime.  This
+             * includes what the Posix classes (like \w, [:space:]) match.
+             * Room must be reserved (one time per class) to store such
+             * classes, either if Perl is compiled so that locale nodes always
+             * should have this space, or if there is such class info to be
+             * stored.  The space will contain a bit for each named class that
+             * is to be matched against.  This isn't needed for \p{} and
+             * pseudo-classes, as they are not affected by locale, and hence
+             * are dealt with separately */
+           if (LOC
+                && ! need_class
+                && (ANYOF_LOCALE == ANYOF_CLASS
+                    || (namedclass > OOB_NAMEDCLASS && namedclass < ANYOF_MAX)))
+            {
                 need_class = 1;
                 if (SIZE_ONLY) {
                     RExC_size += ANYOF_CLASS_SKIP - ANYOF_SKIP;
                 need_class = 1;
                 if (SIZE_ONLY) {
                     RExC_size += ANYOF_CLASS_SKIP - ANYOF_SKIP;
@@ -11362,6 +11368,8 @@ parseit:
                 ANYOF_FLAGS(ret) |= ANYOF_CLASS;
             }
  
                 ANYOF_FLAGS(ret) |= ANYOF_CLASS;
             }
  
+       if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
+
             /* a bad range like a-\d, a-[:digit:].  The '-' is taken as a
              * literal, as is the character that began the false range, i.e.
              * the 'a' in the examples */
             /* a bad range like a-\d, a-[:digit:].  The '-' is taken as a
              * literal, as is the character that began the false range, i.e.
              * the 'a' in the examples */
@@ -11419,19 +11427,19 @@ parseit:
                 switch ((I32)namedclass) {
  
                 case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
                 switch ((I32)namedclass) {
  
                 case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
                     break;
                 case ANYOF_NALNUMC:
                          PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
                     break;
                 case ANYOF_NALNUMC:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
                     break;
                 case ANYOF_ALPHA:
                          PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
                     break;
                 case ANYOF_ALPHA:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
                     break;
                 case ANYOF_NALPHA:
                          PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
                     break;
                 case ANYOF_NALPHA:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
                     break;
                 case ANYOF_ASCII:
                          PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
                     break;
                 case ANYOF_ASCII:
@@ -11439,7 +11447,7 @@ parseit:
                         ANYOF_CLASS_SET(ret, namedclass);
                     }
                      else {
                         ANYOF_CLASS_SET(ret, namedclass);
                     }
                      else {
-                        _invlist_union(properties, PL_ASCII, &properties);
+                        _invlist_union(posixes, PL_ASCII, &posixes);
                      }
                     break;
                 case ANYOF_NASCII:
                      }
                     break;
                 case ANYOF_NASCII:
@@ -11447,48 +11455,48 @@ parseit:
                         ANYOF_CLASS_SET(ret, namedclass);
                     }
                      else {
                         ANYOF_CLASS_SET(ret, namedclass);
                     }
                      else {
-                        _invlist_union_complement_2nd(properties,
-                                                    PL_ASCII, &properties);
+                        _invlist_union_complement_2nd(posixes,
+                                                    PL_ASCII, &posixes);
                          if (DEPENDS_SEMANTICS) {
                              ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
                          }
                      }
                     break;
                 case ANYOF_BLANK:
                          if (DEPENDS_SEMANTICS) {
                              ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
                          }
                      }
                     break;
                 case ANYOF_BLANK:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixBlank, PL_XPosixBlank);
                     break;
                 case ANYOF_NBLANK:
                                              PL_PosixBlank, PL_XPosixBlank);
                     break;
                 case ANYOF_NBLANK:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixBlank, PL_XPosixBlank);
                     break;
                 case ANYOF_CNTRL:
                                              PL_PosixBlank, PL_XPosixBlank);
                     break;
                 case ANYOF_CNTRL:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixCntrl, PL_XPosixCntrl);
                     break;
                 case ANYOF_NCNTRL:
                                              PL_PosixCntrl, PL_XPosixCntrl);
                     break;
                 case ANYOF_NCNTRL:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixCntrl, PL_XPosixCntrl);
                     break;
                 case ANYOF_DIGIT:
                     /* There are no digits in the Latin1 range outside of
                      * ASCII, so call the macro that doesn't have to resolve
                      * them */
                                              PL_PosixCntrl, PL_XPosixCntrl);
                     break;
                 case ANYOF_DIGIT:
                     /* There are no digits in the Latin1 range outside of
                      * ASCII, so call the macro that doesn't have to resolve
                      * them */
-                   DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, posixes,
                          PL_PosixDigit, "XPosixDigit", listsv);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NDIGIT:
                          PL_PosixDigit, "XPosixDigit", listsv);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NDIGIT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_GRAPH:
                          PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_GRAPH:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
                     break;
                 case ANYOF_NGRAPH:
                          PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
                     break;
                 case ANYOF_NGRAPH:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
                     break;
                 case ANYOF_HORIZWS:
                          PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
                     break;
                 case ANYOF_HORIZWS:
@@ -11526,46 +11534,46 @@ parseit:
                         Xname = "XPosixLower";
                     }
                     if (namedclass == ANYOF_LOWER) {
                         Xname = "XPosixLower";
                     }
                     if (namedclass == ANYOF_LOWER) {
-                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                      ascii_source, l1_source, Xname, listsv);
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
                                      ascii_source, l1_source, Xname, listsv);
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                            properties, ascii_source, l1_source, Xname, listsv);
+                            posixes, ascii_source, l1_source, Xname, listsv);
                     }
                     break;
                 }
                 case ANYOF_PRINT:
                     }
                     break;
                 }
                 case ANYOF_PRINT:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
                     break;
                 case ANYOF_NPRINT:
                          PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
                     break;
                 case ANYOF_NPRINT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
                     break;
                 case ANYOF_PUNCT:
                          PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
                     break;
                 case ANYOF_PUNCT:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
                     break;
                 case ANYOF_NPUNCT:
                          PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
                     break;
                 case ANYOF_NPUNCT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
                     break;
                 case ANYOF_PSXSPC:
                          PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
                     break;
                 case ANYOF_PSXSPC:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixSpace, PL_XPosixSpace);
                     break;
                 case ANYOF_NPSXSPC:
                                              PL_PosixSpace, PL_XPosixSpace);
                     break;
                 case ANYOF_NPSXSPC:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixSpace, PL_XPosixSpace);
                     break;
                 case ANYOF_SPACE:
                                              PL_PosixSpace, PL_XPosixSpace);
                     break;
                 case ANYOF_SPACE:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PerlSpace, PL_XPerlSpace);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NSPACE:
                                              PL_PerlSpace, PL_XPerlSpace);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NSPACE:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PerlSpace, PL_XPerlSpace);
                      has_special_charset_op = TRUE;
                     break;
                                              PL_PerlSpace, PL_XPerlSpace);
                      has_special_charset_op = TRUE;
                     break;
@@ -11587,22 +11595,22 @@ parseit:
                         Xname = "XPosixUpper";
                     }
                     if (namedclass == ANYOF_UPPER) {
                         Xname = "XPosixUpper";
                     }
                     if (namedclass == ANYOF_UPPER) {
-                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                      ascii_source, l1_source, Xname, listsv);
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
                                      ascii_source, l1_source, Xname, listsv);
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                        properties, ascii_source, l1_source, Xname, listsv);
+                        posixes, ascii_source, l1_source, Xname, listsv);
                     }
                     break;
                 }
                 case ANYOF_ALNUM:   /* Really is 'Word' */
                     }
                     break;
                 }
                 case ANYOF_ALNUM:   /* Really is 'Word' */
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                              PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NALNUM:
                              PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
                      has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NALNUM:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                              PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
                      has_special_charset_op = TRUE;
                     break;
                              PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
                      has_special_charset_op = TRUE;
                     break;
@@ -11620,11 +11628,11 @@ parseit:
                      has_special_non_charset_op = TRUE;
                     break;
                 case ANYOF_XDIGIT:
                      has_special_non_charset_op = TRUE;
                     break;
                 case ANYOF_XDIGIT:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixXDigit, PL_XPosixXDigit);
                     break;
                 case ANYOF_NXDIGIT:
                                              PL_PosixXDigit, PL_XPosixXDigit);
                     break;
                 case ANYOF_NXDIGIT:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixXDigit, PL_XPosixXDigit);
                     break;
                 case ANYOF_MAX:
                                              PL_PosixXDigit, PL_XPosixXDigit);
                     break;
                 case ANYOF_MAX:
@@ -12109,36 +12117,36 @@ parseit:
         SvREFCNT_dec(fold_intersection);
      }
  
         SvREFCNT_dec(fold_intersection);
      }
  
-    /* And combine the result (if any) with any inversion list from properties.
-     * The lists are kept separate up to now because we don't want to fold the
-     * properties */
-    if (properties) {
+    /* And combine the result (if any) with any inversion list from posix
+     * classes.  The lists are kept separate up to now because we don't want to
+     * fold the classes */
+    if (posixes) {
          if (AT_LEAST_UNI_SEMANTICS) {
              if (cp_list) {
          if (AT_LEAST_UNI_SEMANTICS) {
              if (cp_list) {
-                _invlist_union(cp_list, properties, &cp_list);
-                SvREFCNT_dec(properties);
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
              }
              else {
              }
              else {
-                cp_list = properties;
+                cp_list = posixes;
              }
          }
          else {
  
              }
          }
          else {
  
-            /* Under /d, we put the things that match only when the target
-             * string is utf8, into a separate list */
+            /* Under /d, we put into a separate list the Latin1 things that
+             * match only when the target string is utf8 */
              SV* nonascii_but_latin1_properties = NULL;
              SV* nonascii_but_latin1_properties = NULL;
-            _invlist_intersection(properties, PL_Latin1,
+            _invlist_intersection(posixes, PL_Latin1,
                                    &nonascii_but_latin1_properties);
              _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
                                &nonascii_but_latin1_properties);
                                    &nonascii_but_latin1_properties);
              _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
                                &nonascii_but_latin1_properties);
-            _invlist_subtract(properties, nonascii_but_latin1_properties,
-                              &properties);
+            _invlist_subtract(posixes, nonascii_but_latin1_properties,
+                              &posixes);
              if (cp_list) {
              if (cp_list) {
-                _invlist_union(cp_list, properties, &cp_list);
-                SvREFCNT_dec(properties);
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
              }
              else {
              }
              else {
-                cp_list = properties;
+                cp_list = posixes;
              }
  
              if (depends_list) {
              }
  
              if (depends_list) {
@@ -12152,6 +12160,20 @@ parseit:
          }
      }
  
          }
      }
  
+    /* And combine the result (if any) with any inversion list from properties.
+     * (Note that in this case, unlike the Posix one above, there is no
+     * <depends_list>, because having a Unicode property forces Unicode
+     * semantics */
+    if (properties) {
+        if (cp_list) {
+            _invlist_union(cp_list, properties, &cp_list);
+            SvREFCNT_dec(properties);
+        }
+        else {
+            cp_list = properties;
+        }
+    }
+
      /* Here, we have calculated what code points should be in the character
       * class.
       *
      /* Here, we have calculated what code points should be in the character
       * class.
       *
@@ -12376,9 +12398,9 @@ parseit:
          * av[2] stores the multicharacter foldings, used later in
          *       regexec.c:S_reginclass().
          * av[3] stores the cp_list inversion list for use in addition or
          * av[2] stores the multicharacter foldings, used later in
          *       regexec.c:S_reginclass().
          * av[3] stores the cp_list inversion list for use in addition or
-        *       instead of av[0]; not used if av[1] isn't NULL
+        *       instead of av[0]; used only if av[1] is NULL
          * av[4] is set if any component of the class is from a user-defined
          * av[4] is set if any component of the class is from a user-defined
-        *       property; not used if av[1] isn't NULL */
+        *       property; used only if av[1] is NULL */
         AV * const av = newAV();
         SV *rv;
  
         AV * const av = newAV();
         SV *rv;