This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
concise.t: work with PERL_UNICODE=""
[perl5.git] / regcomp.c
index da01f05..bba5a2b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1283,8 +1283,8 @@ S_ssc_anything(pTHX_ regnode_ssc *ssc)
 
     assert(is_ANYOF_SYNTHETIC(ssc));
 
-    ssc->invlist = sv_2mortal(_new_invlist(2)); /* mortalize so won't leak */
-    _append_range_to_invlist(ssc->invlist, 0, UV_MAX);
+    /* mortalize so won't leak */
+    ssc->invlist = sv_2mortal(_add_range_to_invlist(NULL, 0, UV_MAX));
     ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
 }
 
@@ -8376,6 +8376,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
 
 /* The header definitions are in F<invlist_inline.h> */
 
+#ifndef PERL_IN_XSUB_RE
+
 PERL_STATIC_INLINE UV*
 S__invlist_array_init(SV* const invlist, const bool will_have_0)
 {
@@ -8402,6 +8404,8 @@ S__invlist_array_init(SV* const invlist, const bool will_have_0)
     return zero_addr + *offset;
 }
 
+#endif
+
 PERL_STATIC_INLINE void
 S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
 {
@@ -8538,6 +8542,8 @@ S_invlist_is_iterating(SV* const invlist)
     return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
 }
 
+#ifndef PERL_IN_XSUB_RE
+
 PERL_STATIC_INLINE UV
 S_invlist_max(SV* const invlist)
 {
@@ -8554,8 +8560,6 @@ S_invlist_max(SV* const invlist)
            ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
            : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
 }
-
-#ifndef PERL_IN_XSUB_RE
 SV*
 Perl__new_invlist(pTHX_ IV initial_size)
 {
@@ -8641,7 +8645,6 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list)
 
     return invlist;
 }
-#endif /* ifndef PERL_IN_XSUB_RE */
 
 STATIC void
 S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
@@ -8743,8 +8746,6 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
     }
 }
 
-#ifndef PERL_IN_XSUB_RE
-
 SSize_t
 Perl__invlist_search(SV* const invlist, const UV cp)
 {
@@ -8985,8 +8986,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
          * It's easiest to create a new inversion list that matches everything.
          * */
         if (complement_b) {
-            SV* everything = _new_invlist(1);
-            _append_range_to_invlist(everything, 0, UV_MAX);
+            SV* everything = _add_range_to_invlist(NULL, 0, UV_MAX);
 
             /* If the output didn't exist, just point it at the new list */
             if (*output == NULL) {
@@ -9788,7 +9788,7 @@ Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0,
 
     PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
 
-    _append_range_to_invlist(invlist, element0, element0);
+    invlist = add_cp_to_invlist(invlist, element0);
     offset = *get_invlist_offset_addr(invlist);
 
     invlist_set_len(invlist, size, offset);
@@ -10860,7 +10860,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 RExC_seen |= REG_LOOKBEHIND_SEEN;
                RExC_in_lookbehind++;
                RExC_parse++;
-                assert(RExC_parse < RExC_end);
+                if (RExC_parse >= RExC_end) {
+                    vFAIL("Sequence (?... not terminated");
+                }
+
                 /* FALLTHROUGH */
            case '=':           /* (?=...) */
                RExC_seen_zerolen++;
@@ -17472,22 +17475,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                       &nonascii_but_latin1_properties);
 
                 /* And add them to the final list of such characters. */
-                if (has_upper_latin1_only_utf8_matches) {
-                    _invlist_union(has_upper_latin1_only_utf8_matches,
-                                   nonascii_but_latin1_properties,
-                                   &has_upper_latin1_only_utf8_matches);
-                    SvREFCNT_dec_NN(nonascii_but_latin1_properties);
-                }
-                else {
-                    has_upper_latin1_only_utf8_matches
-                                                = nonascii_but_latin1_properties;
-                }
+                _invlist_union(has_upper_latin1_only_utf8_matches,
+                               nonascii_but_latin1_properties,
+                               &has_upper_latin1_only_utf8_matches);
 
                 /* Remove them from what now becomes the unconditional list */
                 _invlist_subtract(posixes, nonascii_but_latin1_properties,
                                   &posixes);
 
-                /* And the remainder are the unconditional ones */
+                /* And add those unconditional ones to the final list */
                 if (cp_list) {
                     _invlist_union(cp_list, posixes, &cp_list);
                     SvREFCNT_dec_NN(posixes);
@@ -17497,8 +17493,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     cp_list = posixes;
                 }
 
+                SvREFCNT_dec(nonascii_but_latin1_properties);
+
                 /* Get rid of any characters that we now know are matched
-                 * unconditionally from the conditional list */
+                 * unconditionally from the conditional list, which may make
+                 * that list empty */
                 _invlist_subtract(has_upper_latin1_only_utf8_matches,
                                   cp_list,
                                   &has_upper_latin1_only_utf8_matches);