Reinstate "PATCH: {perl #127582] Over eager warning for /[.foo.]/""

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 7c2f419..b33cd7b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -105,6 +105,10 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  #define MIN(a,b) ((a) < (b) ? (a) : (b))
  #endif
  
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
  /* this is a chain of data about sub patterns we are processing that
     need to be handled separately/specially in study_chunk. Its so
     we can simulate recursion without losing state.  */
@@ -1356,7 +1360,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
       * returned list must, and will, contain every code point that is a
       * possibility. */
  
-    SV* invlist = sv_2mortal(_new_invlist(0));
+    SV* invlist = NULL;
      SV* only_utf8_locale_invlist = NULL;
      unsigned int i;
      const U32 n = ARG(node);
@@ -1378,6 +1382,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
  
              /* Here, no compile-time swash, and there are things that won't be
               * known until runtime -- we have to assume it could be anything */
+            invlist = sv_2mortal(_new_invlist(1));
              return _add_range_to_invlist(invlist, 0, UV_MAX);
          }
          else if (ary[3] && ary[3] != &PL_sv_undef) {
@@ -1395,6 +1400,10 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
          }
      }
  
+    if (! invlist) {
+        invlist = sv_2mortal(_new_invlist(0));
+    }
+
      /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
       * code points, and an inversion list for the others, but if there are code
       * points that should match only conditionally on the target string being
@@ -1414,7 +1423,12 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      /* Add in the points from the bit map */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (ANYOF_BITMAP_TEST(node, i)) {
-            invlist = add_cp_to_invlist(invlist, i);
+            unsigned int start = i++;
+
+            for (; i < NUM_ANYOF_CODE_POINTS && ANYOF_BITMAP_TEST(node, i); ++i) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
              new_node_has_latin1 = TRUE;
          }
      }
@@ -8348,7 +8362,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  #ifndef PERL_IN_XSUB_RE
  
  STATIC void
-S_invlist_replace_list(pTHX_ SV * dest, SV * src)
+S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
  {
      /* Replaces the inversion list in 'src' with the one in 'dest'.  It steals
       * the list from 'src', so 'src' is made to have a NULL list.  This is
@@ -8357,16 +8371,17 @@ S_invlist_replace_list(pTHX_ SV * dest, SV * src)
  
      const UV src_len          = _invlist_len(src);
      const bool src_offset     = *get_invlist_offset_addr(src);
-    const STRLEN src_byte_len = SvCUR(src);
+    const STRLEN src_byte_len = SvLEN(src);
      char * array              = SvPVX(src);
  
      const int oldtainted = TAINT_get;
  
-    PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST;
+    PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
  
      assert(SvTYPE(src) == SVt_INVLIST);
      assert(SvTYPE(dest) == SVt_INVLIST);
      assert(! invlist_is_iterating(src));
+    assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
  
      /* Make sure it ends in the right place with a NUL, as our inversion list
       * manipulations aren't careful to keep this true, but sv_usepvn_flags()
@@ -8429,16 +8444,28 @@ S_invlist_set_previous_index(SV* const invlist, const IV index)
  PERL_STATIC_INLINE void
  S_invlist_trim(SV* invlist)
  {
+    /* Free the not currently-being-used space in an inversion list */
+
+    /* But don't free up the space needed for the 0 UV that is always at the
+     * beginning of the list, nor the trailing NUL */
+    const UV min_size = TO_INTERNAL_SIZE(1) + 1;
+
      PERL_ARGS_ASSERT_INVLIST_TRIM;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
  
-    /* Change the length of the inversion list to how many entries it currently
-     * has.  But don't shorten it so that it would free up the required
-     * initial 0 UV (and a trailing NUL byte) */
-    if (SvCUR(invlist) > TO_INTERNAL_SIZE(1) + 1) {
-        SvPV_shrink_to_cur(invlist);
-    }
+    SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
+}
+
+PERL_STATIC_INLINE void
+S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
+{
+    PERL_ARGS_ASSERT_INVLIST_CLEAR;
+
+    assert(SvTYPE(invlist) == SVt_INVLIST);
+
+    invlist_set_len(invlist, 0, 0);
+    invlist_trim(invlist);
  }
  
  #endif /* ifndef PERL_IN_XSUB_RE */
@@ -8893,56 +8920,103 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
      assert(a != b);
  
-    /* If either one is empty, the union is the other one */
-    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
-        bool make_temp = FALSE; /* Should we mortalize the result? */
+    len_b = _invlist_len(b);
+    if (len_b == 0) {
  
-       if (*output == a) {
-            if (a != NULL) {
-                if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                    SvREFCNT_dec_NN(a);
-                }
+        /* Here, 'b' is empty.  If the output is the complement of 'b', the
+         * union is all possible code points, and we need not even look at 'a'.
+         * It's easiest to create a new inversion list that matches everything.
+         * */
+        if (complement_b) {
+            SV* everything = _new_invlist(1);
+            _append_range_to_invlist(everything, 0, UV_MAX);
+
+            /* If the output didn't exist, just point it at the new list */
+            if (*output == NULL) {
+                *output = everything;
+                return;
              }
-       }
-       if (*output != b) {
-           *output = invlist_clone(b);
-            if (complement_b) {
-                _invlist_invert(*output);
+
+            /* Otherwise, replace its contents with the new list */
+            invlist_replace_list_destroys_src(*output, everything);
+            SvREFCNT_dec_NN(everything);
+            return;
+        }
+
+        /* Here, we don't want the complement of 'b', and since it is empty,
+         * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
+         * output will be empty */
+
+        if (a == NULL) {
+            *output = _new_invlist(0);
+            return;
+        }
+
+        if (_invlist_len(a) == 0) {
+            invlist_clear(*output);
+            return;
+        }
+
+        /* Here, 'a' is not empty, and entirely determines the union.  If the
+         * output is not to overwrite 'b', we can just return 'a'. */
+        if (*output != b) {
+
+            /* If the output is to overwrite 'a', we have a no-op, as it's
+             * already in 'a' */
+            if (*output == a) {
+                return;
              }
-       } /* else *output already = b; */
  
-        if (make_temp) {
-            sv_2mortal(*output);
+            /* But otherwise we have to copy 'a' to the output */
+            *output = invlist_clone(a);
+            return;
          }
+
+        /* Here, 'b' is to be overwritten by the output, which will be 'a' */
+        u = invlist_clone(a);
+        invlist_replace_list_destroys_src(*output, u);
+        SvREFCNT_dec_NN(u);
+
         return;
      }
-    else if ((len_b = _invlist_len(b)) == 0) {
-        bool make_temp = FALSE;
-       if (*output == b) {
-            if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                SvREFCNT_dec_NN(b);
+
+    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
+
+        /* Here, 'a' is empty.  That means the union will come entirely from
+         * 'b'.  If the output is not to overwrite 'a', we can just return
+         * what's in 'b'.  */
+        if (*output != a) {
+
+            /* If the output is to overwrite 'b', it's already in 'b', but
+             * otherwise we have to copy 'b' to the output */
+            if (*output != b) {
+                *output = invlist_clone(b);
              }
-       }
  
-        /* The complement of an empty list is a list that has everything in it,
-         * so the union with <a> includes everything too */
-        if (complement_b) {
-            if (a == *output) {
-                if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                    SvREFCNT_dec_NN(a);
-                }
+            /* And if the output is to be the inversion of 'b', do that */
+            if (complement_b) {
+                _invlist_invert(*output);
              }
-            *output = _new_invlist(1);
-            _append_range_to_invlist(*output, 0, UV_MAX);
+
+            return;
          }
-        else if (*output != a) {
-            *output = invlist_clone(a);
+
+        /* Here, 'a', which is empty or even NULL, is to be overwritten by the
+         * output, which will either be 'b' or the complement of 'b' */
+
+        if (a == NULL) {
+            *output = invlist_clone(b);
          }
-        /* else *output already = a; */
+        else {
+            u = invlist_clone(b);
+            invlist_replace_list_destroys_src(*output, u);
+            SvREFCNT_dec_NN(u);
+       }
  
-        if (make_temp) {
-            sv_2mortal(*output);
+        if (complement_b) {
+            _invlist_invert(*output);
          }
+
         return;
      }
  
@@ -9008,7 +9082,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
         /* Here, have chosen which of the two inputs to look at.  Only output
          * if the running count changes to/from 0, which marks the
-        * beginning/end of a range in that's in the set */
+        * beginning/end of a range that's in the set */
         if (cp_in_set) {
             if (count == 0) {
                 array_u[i_u++] = cp;
@@ -9069,7 +9143,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* When 'count' is 0, the list that was exhausted (if one was shorter than
       * the other) ended with everything above it not in its set.  That means
       * that the remaining part of the union is precisely the same as the
-     * non-exhausted list, so can just copy it unchanged.  (If both list were
+     * non-exhausted list, so can just copy it unchanged.  (If both lists were
       * exhausted at the same time, then the operations below will be both 0.)
       */
      if (count == 0) {
@@ -9101,7 +9175,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
              *output = u;
          }
          else {
-            invlist_replace_list(*output, u);
+            invlist_replace_list_destroys_src(*output, u);
              SvREFCNT_dec_NN(u);
          }
      }
@@ -9158,50 +9232,38 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Special case if either one is empty */
      len_a = (a == NULL) ? 0 : _invlist_len(a);
      if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
-        bool make_temp = FALSE;
-
          if (len_a != 0 && complement_b) {
  
-            /* Here, 'a' is not empty, therefore from the above 'if', 'b' must
-             * be empty.  Here, also we are using 'b's complement, which hence
-             * must be every possible code point.  Thus the intersection is
-             * simply 'a'. */
-            if (*i != a) {
-                if (*i == b) {
-                    if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                        SvREFCNT_dec_NN(b);
-                    }
-                }
+            /* Here, 'a' is not empty, therefore from the enclosing 'if', 'b'
+             * must be empty.  Here, also we are using 'b's complement, which
+             * hence must be every possible code point.  Thus the intersection
+             * is simply 'a'. */
  
-                *i = invlist_clone(a);
+            if (*i == a) {  /* No-op */
+                return;
              }
-            /* else *i is already 'a' */
  
-            if (make_temp) {
-                sv_2mortal(*i);
+            /* If not overwriting either input, just make a copy of 'a' */
+            if (*i != b) {
+                *i = invlist_clone(a);
+                return;
              }
+
+            /* Here we are overwriting 'b' with 'a's contents */
+            r = invlist_clone(a);
+            invlist_replace_list_destroys_src(*i, r);
+            SvREFCNT_dec_NN(r);
              return;
          }
  
          /* Here, 'a' or 'b' is empty and not using the complement of 'b'.  The
           * intersection must be empty */
-       if (*i == a) {
-            if (a != NULL) {
-                if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                    SvREFCNT_dec_NN(a);
-                }
-            }
-       }
-       else if (*i == b) {
-            if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                SvREFCNT_dec_NN(b);
-            }
-       }
-       *i = _new_invlist(0);
-        if (make_temp) {
-            sv_2mortal(*i);
+        if (*i == NULL) {
+            *i = _new_invlist(0);
+            return;
          }
  
+        invlist_clear(*i);
         return;
      }
  
@@ -9353,11 +9415,10 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
          }
          else {
              if (len_r) {
-                invlist_replace_list(*i, r);
+                invlist_replace_list_destroys_src(*i, r);
              }
              else {
-                invlist_set_len(*i, 0, 0);
-                invlist_trim(*i);
+                invlist_clear(*i);
              }
              SvREFCNT_dec_NN(r);
          }
@@ -9598,7 +9659,7 @@ S_invlist_highest(SV* const invlist)
             : array[len - 1] - 1;
  }
  
-SV *
+STATIC SV *
  S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
  {
      /* Get the contents of an inversion list into a string SV so that they can
@@ -10257,7 +10318,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
           * indivisible */
          bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
  
-        assert(RExC_parse < RExC_end);
+        if (RExC_parse >= RExC_end) {
+           vFAIL("Unmatched (");
+        }
  
          if ( *RExC_parse == '*') { /* (*VERB:ARG) */
             char *start_verb = RExC_parse + 1;
@@ -13554,9 +13617,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
  /* 'posix_warnings' and 'warn_text' are names of variables in the following
   * routine. q.v. */
  #define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
-        if (posix_warnings && (   posix_warnings != (AV **) -1              \
-                               || (PASS2 && ckWARN(WARN_REGEXP))))          \
-        {                                                                   \
+        if (posix_warnings) {                                               \
              if (! warn_text) warn_text = newAV();                           \
              av_push(warn_text, Perl_newSVpvf(aTHX_                          \
                                               WARNING_PREFIX                 \
@@ -13575,8 +13636,9 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                                    besides RExC_parse. */
      char ** updated_parse_ptr, /* Where to set the updated parse pointer, or
                                    NULL */
-    AV ** posix_warnings       /* Where to place any generated warnings, or -1
-                                  if to output them, or NULL */
+    AV ** posix_warnings,      /* Where to place any generated warnings, or
+                                  NULL */
+    const bool check_only      /* Don't die if error */
  )
  {
      /* This parses what the caller thinks may be one of the three POSIX
@@ -13602,19 +13664,13 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
       *      'updated_parse_ptr' is not changed.  No warnings nor errors are
       *      raised.
       *
-     * In b) there may be warnings and even errors generated.  What to do about
-     * these is determined by the 'posix_warnings' parameter.  If it is NULL,
-     * this call is treated as a check-only, scouting-out-the-territory call,
-     * and no warnings nor errors are generated at all.  Otherwise, any errors
-     * are raised if found.  If 'posix_warnings' is -1 (appropriately cast),
-     * warnings are generated and displayed (in pass 2), just as they would be
-     * for any other message of the same type from this file.  If it isn't NULL
-     * and not -1, warnings aren't displayed, but instead an AV is generated
-     * with all the warning messages (that aren't to be ignored) stored into
-     * it, so that the caller can output them if it wants.  This is done in all
+     * In b) there may be errors or warnings generated.  If 'check_only' is
+     * TRUE, then any errors are discarded.  Warnings are returned to the
+     * caller via an AV* created into '*posix_warnings' if it is not NULL.  If
+     * instead it is NULL, warnings are suppressed.  This is done in all
       * passes.  The reason for this is that the rest of the parsing is heavily
       * dependent on whether this routine found a valid posix class or not.  If
-     * it did, the closing ']' is absorbed as part of the class.  If no class
+     * it did, the closing ']' is absorbed as part of the class.  If no class,
       * or an invalid one is found, any ']' will be considered the terminator of
       * the outer bracketed character class, leading to very different results.
       * In particular, a '(?[ ])' construct will likely have a syntax error if
@@ -13736,40 +13792,59 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
  
      /* For [. .] and [= =].  These are quite different internally from [: :],
       * so they are handled separately.  */
-    if (POSIXCC_NOTYET(*p)) {
+    if (POSIXCC_NOTYET(*p) && p < e - 3) /* 1 for the close, and 1 for the ']'
+                                            and 1 for at least one char in it
+                                          */
+    {
          const char open_char  = *p;
          const char * temp_ptr = p + 1;
-        unsigned int len      = 0;
  
          /* These two constructs are not handled by perl, and if we find a
-         * syntactically valid one, we croak.  It looks like just about any
-         * byte can be in them, but they are likely very short, like [.ch.] to
-         * denote a ligature 'ch' single character.  If we find something that
-         * started out to look like one of these constructs, but isn't, we
-         * break so that it can be checked for being a class name with a typo
-         * of '.' or '=' instead of a colon */
-        while (temp_ptr < e) {
-            len++;
-
-            /* qr/[[.].]]/, for example, is valid.  But otherwise we quit on an
-             * unexpected ']'.  It is possible, it appears, for such a ']' to
-             * be not in the final position, but that's so unlikely that that
-             * case is not handled. */
-            if (*temp_ptr == ']' && temp_ptr[1] != open_char) {
-                break;
-            }
-
-            /* XXX this could be cut down, but this value is certainly large
-             * enough */
-            if (len > 10) {
-                break;
-            }
+         * syntactically valid one, we croak.  khw, who wrote this code, finds
+         * this explanation of them very unclear:
+         * http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap09.html
+         * And searching the rest of the internet wasn't very helpful either.
+         * It looks like just about any byte can be in these constructs,
+         * depending on the locale.  But unless the pattern is being compiled
+         * under /l, which is very rare, Perl runs under the C or POSIX locale.
+         * In that case, it looks like [= =] isn't allowed at all, and that
+         * [. .] could be any single code point, but for longer strings the
+         * constituent characters would have to be the ASCII alphabetics plus
+         * the minus-hyphen.  Any sensible locale definition would limit itself
+         * to these.  And any portable one definitely should.  Trying to parse
+         * the general case is a nightmare (see [perl #127604]).  So, this code
+         * looks only for interiors of these constructs that match:
+         *      qr/.|[-\w]{2,}/
+         * Using \w relaxes the apparent rules a little, without adding much
+         * danger of mistaking something else for one of these constructs.
+         *
+         * [. .] in some implementations described on the internet is usable to
+         * escape a character that otherwise is special in bracketed character
+         * classes.  For example [.].] means a literal right bracket instead of
+         * the ending of the class
+         *
+         * [= =] can legitimately contain a [. .] construct, but we don't
+         * handle this case, as that [. .] construct will later get parsed
+         * itself and croak then.  And [= =] is checked for even when not under
+         * /l, as Perl has long done so.
+         *
+         * The code below relies on there being a trailing NUL, so it doesn't
+         * have to keep checking if the parse ptr < e.
+         */
+        if (temp_ptr[1] == open_char) {
+            temp_ptr++;
+        }
+        else while (    temp_ptr < e
+                    && (isWORDCHAR(*temp_ptr) || *temp_ptr == '-'))
+        {
+            temp_ptr++;
+        }
  
-            if (*temp_ptr == open_char) {
+        if (*temp_ptr == open_char) {
                  temp_ptr++;
                  if (*temp_ptr == ']') {
                      temp_ptr++;
-                    if (! found_problem && posix_warnings) {
+                    if (! found_problem && ! check_only) {
                          RExC_parse = (char *) temp_ptr;
                          vFAIL3("POSIX syntax [%c %c] is reserved for future "
                                 "extensions", open_char, open_char);
@@ -13783,38 +13858,11 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
  
                      return OOB_NAMEDCLASS;
                  }
-            }
-            else if (*temp_ptr == '\\') {
-
-                /* A backslash is treate as like any other character, unless it
-                 * precedes a comment starter.  XXX multiple backslashes in a
-                 * row are not handled specially here, nor would they ever
-                 * likely to be handled specially in one of these constructs */
-                if (temp_ptr[1] == '#' && (RExC_flags & RXf_PMf_EXTENDED)) {
-                    temp_ptr++;
-                }
-                temp_ptr++;
-            }
-            else if (*temp_ptr == '#' && (RExC_flags & RXf_PMf_EXTENDED)) {
-                break;  /* Under no circumstances can we look at the interior
-                           of a comment */
-            }
-            else if (*temp_ptr == '\n') {   /* And we don't allow newlines
-                                               either as it's extremely
-                                               unlikely that one could be in an
-                                               intended class */
-                break;
-            }
-            else if (UTF && ! UTF8_IS_INVARIANT(*temp_ptr)) {
-                /* XXX Since perl will never handle multi-byte locales, except
-                 * for UTF-8, we could break if we found a byte above latin1,
-                 * but perhaps the person intended to use one. */
-                temp_ptr += UTF8SKIP(temp_ptr);
-            }
-            else {
-                temp_ptr++;
-            }
          }
+        /* If we find something that started out to look like one of these
+         * constructs, but isn't, we continue below so that it can be checked
+         * for being a class name with a typo of '.' or '=' instead of a colon.
+         * */
      }
  
      /* Here, we think there is a possibility that a [: :] class was meant, and
@@ -14335,16 +14383,11 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              }
  
              if (warn_text) {
-                if (posix_warnings != (AV **) -1) {
-                    *posix_warnings = warn_text;
+                if (posix_warnings) {
+                    /* mortalize to avoid a leak with FATAL warnings */
+                    *posix_warnings = (AV *) sv_2mortal((SV *) warn_text);
                  }
                  else {
-                    SV * msg;
-                    while ((msg = av_shift(warn_text)) != &PL_sv_undef) {
-                        Perl_warner(aTHX_ packWARN(WARN_REGEXP),
-                                    "%s", SvPVX(msg));
-                        SvREFCNT_dec_NN(msg);
-                    }
                      SvREFCNT_dec_NN(warn_text);
                  }
              }
@@ -14355,7 +14398,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
               * one */
              return class_number + complement;
          }
-        else if (posix_warnings) {
+        else if (! check_only) {
  
              /* Here, it is an unrecognized class.  This is an error (unless the
              * call is to check only, which we've already handled above) */
@@ -14478,10 +14521,11 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  {
                      /* See if this is a [:posix:] class. */
                      bool is_posix_class = (OOB_NAMEDCLASS
-                                           < handle_possible_posix(pRExC_state,
-                                                                RExC_parse + 1,
-                                                                NULL,
-                                                                NULL));
+                            < handle_possible_posix(pRExC_state,
+                                                RExC_parse + 1,
+                                                NULL,
+                                                NULL,
+                                                TRUE /* checking only */));
                      /* If it is a posix class, leave the parse pointer at the
                       * '[' to fool regclass() into thinking it is part of a
                       * '[[:posix:]]'. */
@@ -14539,13 +14583,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
        no_close:
          /* We output the messages even if warnings are off, because we'll fail
           * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings) {
-            SV * msg;
-            while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
-                Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
-                SvREFCNT_dec_NN(msg);
-            }
-            SvREFCNT_dec_NN(posix_warnings);
+        if (posix_warnings && av_tindex(posix_warnings) >= 0) {
+            output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
          }
  
          FAIL("Syntax error in (?[...])");
@@ -14783,10 +14822,11 @@ redo_curchar:
              {
                  /* See if this is a [:posix:] class. */
                  bool is_posix_class = (OOB_NAMEDCLASS
-                                        < handle_possible_posix(pRExC_state,
-                                                            RExC_parse + 1,
-                                                            NULL,
-                                                            NULL));
+                            < handle_possible_posix(pRExC_state,
+                                                RExC_parse + 1,
+                                                NULL,
+                                                NULL,
+                                                TRUE /* checking only */));
                  /* If it is a posix class, leave the parse pointer at the '['
                   * to fool regclass() into thinking it is part of a
                   * '[[:posix:]]'. */
@@ -15261,6 +15301,43 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
      }
  }
  
+STATIC void
+S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+{
+    /* If the final parameter is NULL, output the elements of the array given
+     * by '*posix_warnings' as REGEXP warnings.  Otherwise, the elements are
+     * pushed onto it, (creating if necessary) */
+
+    SV * msg;
+    const bool first_is_fatal =  ! return_posix_warnings
+                                && ckDEAD(packWARN(WARN_REGEXP));
+
+    PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+
+    while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
+        if (return_posix_warnings) {
+            if (! *return_posix_warnings) { /* mortalize to not leak if
+                                               warnings are fatal */
+                *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
+            }
+            av_push(*return_posix_warnings, msg);
+        }
+        else {
+            if (first_is_fatal) {           /* Avoid leaking this */
+                av_undef(posix_warnings);   /* This isn't necessary if the
+                                               array is mortal, but is a
+                                               fail-safe */
+                (void) sv_2mortal(msg);
+                if (PASS2) {
+                    SAVEFREESV(RExC_rx_sv);
+                }
+            }
+            Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+            SvREFCNT_dec_NN(msg);
+        }
+    }
+}
+
  STATIC AV *
  S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
  {
@@ -15342,7 +15419,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   bool optimizable,                  /* ? Allow a non-ANYOF return
                                                         node */
                   SV** ret_invlist, /* Return an inversion list, not a node */
-                 AV** posix_warnings
+                 AV** return_posix_warnings
            )
  {
      /* parse a bracketed class specification.  Most of these will produce an
@@ -15456,12 +15533,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      const SSize_t orig_size = RExC_size;
      bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
  
-    /* This variable is used to mark where in the input something that looks
-     * like a POSIX construct ends.  During the parse, when something looks
-     * like it could be such a construct is encountered, it is checked for
-     * being one, but not if we've already checked this area of the input.
-     * Only after this position is reached do we check again */
-    char *dont_check_for_posix_end = RExC_parse - 1;
+    /* This variable is used to mark where the end in the input is of something
+     * that looks like a POSIX construct but isn't.  During the parse, when
+     * something looks like it could be such a construct is encountered, it is
+     * checked for being one, but not if we've already checked this area of the
+     * input.  Only after this position is reached do we check again */
+    char *not_posix_region_end = RExC_parse - 1;
+
+    AV* posix_warnings = NULL;
+    const bool do_posix_warnings =     return_posix_warnings
+                                   || (PASS2 && ckWARN(WARN_REGEXP));
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -15478,10 +15559,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      allow_multi_folds = FALSE;
  #endif
  
-    if (posix_warnings == NULL) {
-        posix_warnings = (AV **) -1;
-    }
-
      /* Assume we are going to generate an ANYOF node. */
      ret = reganode(pRExC_state,
                     (LOC)
@@ -15516,25 +15593,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      /* Check that they didn't say [:posix:] instead of [[:posix:]] */
      if (! ret_invlist && MAYBE_POSIXCC(UCHARAT(RExC_parse))) {
-        char *class_end;
-        int maybe_class = handle_possible_posix(pRExC_state, RExC_parse,
-                                             &class_end, NULL);
-        if (maybe_class >= OOB_NAMEDCLASS) {
-            dont_check_for_posix_end = class_end;
-            if (PASS2 && posix_warnings == (AV **) -1) {
-                SAVEFREESV(RExC_rx_sv);
-                ckWARN4reg(class_end,
-                        "POSIX syntax [%c %c] belongs inside character classes%s",
-                        *RExC_parse, *RExC_parse,
-                        (maybe_class == OOB_NAMEDCLASS)
-                        ? ((POSIXCC_NOTYET(*RExC_parse))
-                            ? " (but this one isn't implemented)"
-                            : " (but this one isn't fully valid)")
-                        : ""
-                        );
-                (void)ReREFCNT_inc(RExC_rx_sv);
-            }
-       }
+        int maybe_class = handle_possible_posix(pRExC_state,
+                                                RExC_parse,
+                                                &not_posix_region_end,
+                                                NULL,
+                                                TRUE /* checking only */);
+        if (PASS2 && maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
+            SAVEFREESV(RExC_rx_sv);
+            ckWARN4reg(not_posix_region_end,
+                    "POSIX syntax [%c %c] belongs inside character classes%s",
+                    *RExC_parse, *RExC_parse,
+                    (maybe_class == OOB_NAMEDCLASS)
+                    ? ((POSIXCC_NOTYET(*RExC_parse))
+                        ? " (but this one isn't implemented)"
+                        : " (but this one isn't fully valid)")
+                    : ""
+                    );
+            (void)ReREFCNT_inc(RExC_rx_sv);
+        }
      }
  
      /* If the caller wants us to just parse a single element, accomplish this
@@ -15548,6 +15624,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         goto charclassloop;
  
      while (1) {
+
+        if (   posix_warnings
+            && av_tindex(posix_warnings) >= 0
+            && RExC_parse > not_posix_region_end)
+        {
+            /* Warnings about posix class issues are considered tentative until
+             * we are far enough along in the parse that we can no longer
+             * change our mind, at which point we either output them or add
+             * them, if it has so specified, to what gets returned to the
+             * caller.  This is done each time through the loop so that a later
+             * class won't zap them before they have been dealt with. */
+            output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                            return_posix_warnings);
+        }
+
          if  (RExC_parse >= stop_ptr) {
              break;
          }
@@ -15579,19 +15670,44 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             value = UCHARAT(RExC_parse++);
  
          if (value == '[') {
-            namedclass = handle_possible_posix(pRExC_state, RExC_parse, &dont_check_for_posix_end, posix_warnings);
+            char * posix_class_end;
+            namedclass = handle_possible_posix(pRExC_state,
+                                               RExC_parse,
+                                               &posix_class_end,
+                                               do_posix_warnings ? &posix_warnings : NULL,
+                                               FALSE    /* die if error */);
              if (namedclass > OOB_NAMEDCLASS) {
-                RExC_parse = dont_check_for_posix_end;
+
+                /* If there was an earlier attempt to parse this particular
+                 * posix class, and it failed, it was a false alarm, as this
+                 * successful one proves */
+                if (   posix_warnings
+                    && av_tindex(posix_warnings) >= 0
+                    && not_posix_region_end >= RExC_parse
+                    && not_posix_region_end <= posix_class_end)
+                {
+                    av_undef(posix_warnings);
+                }
+
+                RExC_parse = posix_class_end;
+            }
+            else if (namedclass == OOB_NAMEDCLASS) {
+                not_posix_region_end = posix_class_end;
              }
              else {
                  namedclass = OOB_NAMEDCLASS;
              }
          }
-        else if (   RExC_parse - 1 > dont_check_for_posix_end
+        else if (   RExC_parse - 1 > not_posix_region_end
                   && MAYBE_POSIXCC(value))
          {
-            (void) handle_possible_posix(pRExC_state, RExC_parse - 1,  /* -1 because parse has already been advanced */
-                    &dont_check_for_posix_end, posix_warnings);
+            (void) handle_possible_posix(
+                        pRExC_state,
+                        RExC_parse - 1,  /* -1 because parse has already been
+                                            advanced */
+                        &not_posix_region_end,
+                        do_posix_warnings ? &posix_warnings : NULL,
+                        TRUE /* checking only */);
          }
          else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
@@ -15753,6 +15869,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      SV* invlist;
                      char* name;
                      char* base_name;    /* name after any packages are stripped */
+                    char* lookup_name = NULL;
                      const char * const colon_colon = "::";
  
                      /* Try to get the definition of the property into
@@ -15760,23 +15877,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                       * will have its name be <__NAME_i>.  The design is
                       * discussed in commit
                       * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
-                    name = savepv(Perl_form(aTHX_
-                                          "%s%.*s%s\n",
-                                          (FOLD) ? "__" : "",
-                                          (int)n,
-                                          RExC_parse,
-                                          (FOLD) ? "_i" : ""
-                                ));
+                    name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
+                    if (FOLD) {
+                        lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
+                    }
  
                      /* Look up the property name, and get its swash and
                       * inversion list, if the property is found  */
                      SvREFCNT_dec(swash); /* Free any left-overs */
-                    swash = _core_swash_init("utf8", name, &PL_sv_undef,
+                    swash = _core_swash_init("utf8",
+                                             (lookup_name)
+                                              ? lookup_name
+                                              : name,
+                                             &PL_sv_undef,
                                               1, /* binary */
                                               0, /* not tr/// */
                                               NULL, /* No inversion list */
                                               &swash_init_flags
                                              );
+                    if (lookup_name) {
+                        Safefree(lookup_name);
+                    }
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
@@ -15843,9 +15964,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  name = savepvn(full_name, n);
                              }
                          }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n",
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
                                          (value == 'p' ? '+' : '!'),
-                                        UTF8fARG(UTF, n, name));
+                                        (FOLD) ? "__" : "",
+                                        UTF8fARG(UTF, n, name),
+                                        (FOLD) ? "_i" : "");
                          has_user_defined_property = TRUE;
                          optimizable = FALSE;    /* Will have to leave this an
                                                     ANYOF node */
@@ -16499,6 +16622,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         range = 0; /* this range (if it was one) is done now */
      } /* End of loop through all the text within the brackets */
  
+
+    if (   posix_warnings && av_tindex(posix_warnings) >= 0) {
+        output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                        return_posix_warnings);
+    }
+
      /* If anything in the class expands to more than one character, we have to
       * deal with them by building up a substitute parse string, and recursively
       * calling reg() on it, instead of proceeding */
@@ -18675,7 +18804,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          }
  
          /* And, finally, add the above-the-bitmap stuff */
-        if (nonbitmap_invlist) {
+        if (nonbitmap_invlist && _invlist_len(nonbitmap_invlist)) {
              SV* contents;
  
              /* See if truncation size is overridden */
@@ -19837,7 +19966,11 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      /* Accumulate the bit map into the unconditional match list */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (BITMAP_TEST(bitmap, i)) {
-            invlist = add_cp_to_invlist(invlist, i);
+            int start = i++;
+            for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
          }
      }