PATCH: [perl #127581] Spurious warning about posix class
authorKarl Williamson <khw@cpan.org>
Tue, 1 Mar 2016 01:39:20 +0000 (18:39 -0700)
committerKarl Williamson <khw@cpan.org>
Tue, 1 Mar 2016 16:24:25 +0000 (09:24 -0700)
embed.fnc
embed.h
proto.h
regcomp.c
t/re/reg_mesg.t

index a08ebdc..bd59024 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2143,6 +2143,10 @@ Es       |void    |set_ANYOF_arg |NN RExC_state_t* const pRExC_state \
                                |NULLOK SV* const only_utf8_locale_list    \
                                |NULLOK SV* const swash                    \
                                |const bool has_user_defined_property
+Es     |void   |output_or_return_posix_warnings                            \
+                               |NN RExC_state_t *pRExC_state               \
+                               |NN AV* posix_warnings                      \
+                               |NULLOK AV** return_posix_warnings
 Es     |AV*     |add_multi_match|NULLOK AV* multi_char_matches             \
                                |NN SV* multi_string                        \
                                |const STRLEN cp_count
@@ -2245,7 +2249,8 @@ Es        |int    |handle_possible_posix                                      \
                                |NN RExC_state_t *pRExC_state               \
                                |NN const char* const s                     \
                                |NULLOK char ** updated_parse_ptr           \
-                               |NULLOK AV** posix_warnings
+                               |NULLOK AV** posix_warnings                 \
+                               |const bool check_only
 Es     |I32    |make_trie      |NN RExC_state_t *pRExC_state \
                                |NN regnode *startbranch|NN regnode *first \
                                |NN regnode *last|NN regnode *tail \
diff --git a/embed.h b/embed.h
index ede3520..ff3b57c 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define get_invlist_iter_addr  S_get_invlist_iter_addr
 #define grok_bslash_N(a,b,c,d,e,f,g)   S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
 #define handle_named_backref(a,b,c,d)  S_handle_named_backref(aTHX_ a,b,c,d)
-#define handle_possible_posix(a,b,c,d) S_handle_possible_posix(aTHX_ a,b,c,d)
+#define handle_possible_posix(a,b,c,d,e)       S_handle_possible_posix(aTHX_ a,b,c,d,e)
 #define handle_regex_sets(a,b,c,d,e)   S_handle_regex_sets(aTHX_ a,b,c,d,e)
 #define invlist_clear(a)       S_invlist_clear(aTHX_ a)
 #define invlist_clone(a)       S_invlist_clone(aTHX_ a)
 #define join_exact(a,b,c,d,e,f,g)      S_join_exact(aTHX_ a,b,c,d,e,f,g)
 #define make_trie(a,b,c,d,e,f,g,h)     S_make_trie(aTHX_ a,b,c,d,e,f,g,h)
 #define nextchar(a)            S_nextchar(aTHX_ a)
+#define output_or_return_posix_warnings(a,b,c) S_output_or_return_posix_warnings(aTHX_ a,b,c)
 #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
 #define populate_ANYOF_from_invlist(a,b)       S_populate_ANYOF_from_invlist(aTHX_ a,b)
 #define reg(a,b,c,d)           S_reg(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h
index 0762b5c..72bed22 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -4743,7 +4743,7 @@ STATIC bool       S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** nodep, UV
 PERL_STATIC_INLINE regnode*    S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
 #define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF  \
        assert(pRExC_state); assert(flagp); assert(parse_start)
-STATIC int     S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings);
+STATIC int     S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
 #define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX \
        assert(pRExC_state); assert(s)
 STATIC regnode*        S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
@@ -4806,6 +4806,9 @@ STATIC I32        S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, re
 STATIC void    S_nextchar(pTHX_ RExC_state_t *pRExC_state);
 #define PERL_ARGS_ASSERT_NEXTCHAR      \
        assert(pRExC_state)
+STATIC void    S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings);
+#define PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS       \
+       assert(pRExC_state); assert(posix_warnings)
 STATIC void    S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state);
 #define PERL_ARGS_ASSERT_PARSE_LPAREN_QUESTION_FLAGS   \
        assert(pRExC_state)
index 83e2635..7a4a6d9 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -13610,9 +13610,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
 /* 'posix_warnings' and 'warn_text' are names of variables in the following
  * routine. q.v. */
 #define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
-        if (posix_warnings && (   posix_warnings != (AV **) -1              \
-                               || (PASS2 && ckWARN(WARN_REGEXP))))          \
-        {                                                                   \
+        if (posix_warnings) {                                               \
             if (! warn_text) warn_text = newAV();                           \
             av_push(warn_text, Perl_newSVpvf(aTHX_                          \
                                              WARNING_PREFIX                 \
@@ -13631,8 +13629,9 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                                   besides RExC_parse. */
     char ** updated_parse_ptr, /* Where to set the updated parse pointer, or
                                   NULL */
-    AV ** posix_warnings       /* Where to place any generated warnings, or -1
-                                  if to output them, or NULL */
+    AV ** posix_warnings,      /* Where to place any generated warnings, or
+                                  NULL */
+    const bool check_only      /* Don't die if error */
 )
 {
     /* This parses what the caller thinks may be one of the three POSIX
@@ -13658,16 +13657,10 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
      *      'updated_parse_ptr' is not changed.  No warnings nor errors are
      *      raised.
      *
-     * In b) there may be warnings and even errors generated.  What to do about
-     * these is determined by the 'posix_warnings' parameter.  If it is NULL,
-     * this call is treated as a check-only, scouting-out-the-territory call,
-     * and no warnings nor errors are generated at all.  Otherwise, any errors
-     * are raised if found.  If 'posix_warnings' is -1 (appropriately cast),
-     * warnings are generated and displayed (in pass 2), just as they would be
-     * for any other message of the same type from this file.  If it isn't NULL
-     * and not -1, warnings aren't displayed, but instead an AV is generated
-     * with all the warning messages (that aren't to be ignored) stored into
-     * it, so that the caller can output them if it wants.  This is done in all
+     * In b) there may be errors or warnings generated.  If 'check_only' is
+     * TRUE, then any errors are discarded.  Warnings are returned to the
+     * caller via an AV* created into '*posix_warnings' if it is not NULL.  If
+     * instead it is NULL, warnings are suppressed.  This is done in all
      * passes.  The reason for this is that the rest of the parsing is heavily
      * dependent on whether this routine found a valid posix class or not.  If
      * it did, the closing ']' is absorbed as part of the class.  If no class,
@@ -13825,7 +13818,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                 temp_ptr++;
                 if (*temp_ptr == ']') {
                     temp_ptr++;
-                    if (! found_problem && posix_warnings) {
+                    if (! found_problem && ! check_only) {
                         RExC_parse = (char *) temp_ptr;
                         vFAIL3("POSIX syntax [%c %c] is reserved for future "
                                "extensions", open_char, open_char);
@@ -14391,16 +14384,11 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
             }
 
             if (warn_text) {
-                if (posix_warnings != (AV **) -1) {
-                    *posix_warnings = warn_text;
+                if (posix_warnings) {
+                    /* mortalize to avoid a leak with FATAL warnings */
+                    *posix_warnings = (AV *) sv_2mortal((SV *) warn_text);
                 }
                 else {
-                    SV * msg;
-                    while ((msg = av_shift(warn_text)) != &PL_sv_undef) {
-                        Perl_warner(aTHX_ packWARN(WARN_REGEXP),
-                                    "%s", SvPVX(msg));
-                        SvREFCNT_dec_NN(msg);
-                    }
                     SvREFCNT_dec_NN(warn_text);
                 }
             }
@@ -14411,7 +14399,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              * one */
             return class_number + complement;
         }
-        else if (posix_warnings) {
+        else if (! check_only) {
 
             /* Here, it is an unrecognized class.  This is an error (unless the
             * call is to check only, which we've already handled above) */
@@ -14537,7 +14525,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                             < handle_possible_posix(pRExC_state,
                                                 RExC_parse + 1,
                                                 NULL,
-                                                NULL));
+                                                NULL,
+                                                TRUE /* checking only */));
                     /* If it is a posix class, leave the parse pointer at the
                      * '[' to fool regclass() into thinking it is part of a
                      * '[[:posix:]]'. */
@@ -14595,13 +14584,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       no_close:
         /* We output the messages even if warnings are off, because we'll fail
          * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings) {
-            SV * msg;
-            while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
-                Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
-                SvREFCNT_dec_NN(msg);
-            }
-            SvREFCNT_dec_NN(posix_warnings);
+        if (posix_warnings && av_tindex(posix_warnings) >= 0) {
+            output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
         }
 
         FAIL("Syntax error in (?[...])");
@@ -14842,7 +14826,8 @@ redo_curchar:
                             < handle_possible_posix(pRExC_state,
                                                 RExC_parse + 1,
                                                 NULL,
-                                                NULL));
+                                                NULL,
+                                                TRUE /* checking only */));
                 /* If it is a posix class, leave the parse pointer at the '['
                  * to fool regclass() into thinking it is part of a
                  * '[[:posix:]]'. */
@@ -15317,6 +15302,43 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
     }
 }
 
+STATIC void
+S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+{
+    /* If the final parameter is NULL, output the elements of the array given
+     * by '*posix_warnings' as REGEXP warnings.  Otherwise, the elements are
+     * pushed onto it, (creating if necessary) */
+
+    SV * msg;
+    const bool first_is_fatal =  ! return_posix_warnings
+                                && ckDEAD(packWARN(WARN_REGEXP));
+
+    PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+
+    while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
+        if (return_posix_warnings) {
+            if (! *return_posix_warnings) { /* mortalize to not leak if
+                                               warnings are fatal */
+                *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
+            }
+            av_push(*return_posix_warnings, msg);
+        }
+        else {
+            if (first_is_fatal) {           /* Avoid leaking this */
+                av_undef(posix_warnings);   /* This isn't necessary if the
+                                               array is mortal, but is a
+                                               fail-safe */
+                (void) sv_2mortal(msg);
+                if (PASS2) {
+                    SAVEFREESV(RExC_rx_sv);
+                }
+            }
+            Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+            SvREFCNT_dec_NN(msg);
+        }
+    }
+}
+
 STATIC AV *
 S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
 {
@@ -15519,6 +15541,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      * input.  Only after this position is reached do we check again */
     char *not_posix_region_end = RExC_parse - 1;
 
+    AV* posix_warnings = NULL;
+    const bool do_posix_warnings =     return_posix_warnings
+                                   || (PASS2 && ckWARN(WARN_REGEXP));
+
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGCLASS;
@@ -15534,10 +15560,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     allow_multi_folds = FALSE;
 #endif
 
-    if (return_posix_warnings == NULL) {
-        return_posix_warnings = (AV **) -1;
-    }
-
     /* Assume we are going to generate an ANYOF node. */
     ret = reganode(pRExC_state,
                    (LOC)
@@ -15572,27 +15594,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     /* Check that they didn't say [:posix:] instead of [[:posix:]] */
     if (! ret_invlist && MAYBE_POSIXCC(UCHARAT(RExC_parse))) {
-        char *class_end;
         int maybe_class = handle_possible_posix(pRExC_state,
                                                 RExC_parse,
-                                                &class_end,
-                                                NULL);
-        if (maybe_class >= OOB_NAMEDCLASS) {
-            not_posix_region_end = class_end;
-            if (PASS2 && return_posix_warnings == (AV **) -1) {
-                SAVEFREESV(RExC_rx_sv);
-                ckWARN4reg(class_end,
-                        "POSIX syntax [%c %c] belongs inside character classes%s",
-                        *RExC_parse, *RExC_parse,
-                        (maybe_class == OOB_NAMEDCLASS)
-                        ? ((POSIXCC_NOTYET(*RExC_parse))
-                            ? " (but this one isn't implemented)"
-                            : " (but this one isn't fully valid)")
-                        : ""
-                        );
-                (void)ReREFCNT_inc(RExC_rx_sv);
-            }
-       }
+                                                &not_posix_region_end,
+                                                NULL,
+                                                TRUE /* checking only */);
+        if (PASS2 && maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
+            SAVEFREESV(RExC_rx_sv);
+            ckWARN4reg(not_posix_region_end,
+                    "POSIX syntax [%c %c] belongs inside character classes%s",
+                    *RExC_parse, *RExC_parse,
+                    (maybe_class == OOB_NAMEDCLASS)
+                    ? ((POSIXCC_NOTYET(*RExC_parse))
+                        ? " (but this one isn't implemented)"
+                        : " (but this one isn't fully valid)")
+                    : ""
+                    );
+            (void)ReREFCNT_inc(RExC_rx_sv);
+        }
     }
 
     /* If the caller wants us to just parse a single element, accomplish this
@@ -15606,6 +15625,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
        goto charclassloop;
 
     while (1) {
+
+        if (   posix_warnings
+            && av_tindex(posix_warnings) >= 0
+            && RExC_parse > not_posix_region_end)
+        {
+            /* Warnings about posix class issues are considered tentative until
+             * we are far enough along in the parse that we can no longer
+             * change our mind, at which point we either output them or add
+             * them, if it has so specified, to what gets returned to the
+             * caller.  This is done each time through the loop so that a later
+             * class won't zap them before they have been dealt with. */
+            output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                            return_posix_warnings);
+        }
+
         if  (RExC_parse >= stop_ptr) {
             break;
         }
@@ -15637,12 +15671,29 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
            value = UCHARAT(RExC_parse++);
 
         if (value == '[') {
+            char * posix_class_end;
             namedclass = handle_possible_posix(pRExC_state,
                                                RExC_parse,
-                                               &not_posix_region_end,
-                                               return_posix_warnings);
+                                               &posix_class_end,
+                                               do_posix_warnings ? &posix_warnings : NULL,
+                                               FALSE    /* die if error */);
             if (namedclass > OOB_NAMEDCLASS) {
-                RExC_parse = not_posix_region_end;
+
+                /* If there was an earlier attempt to parse this particular
+                 * posix class, and it failed, it was a false alarm, as this
+                 * successful one proves */
+                if (   posix_warnings
+                    && av_tindex(posix_warnings) >= 0
+                    && not_posix_region_end >= RExC_parse
+                    && not_posix_region_end <= posix_class_end)
+                {
+                    av_undef(posix_warnings);
+                }
+
+                RExC_parse = posix_class_end;
+            }
+            else if (namedclass == OOB_NAMEDCLASS) {
+                not_posix_region_end = posix_class_end;
             }
             else {
                 namedclass = OOB_NAMEDCLASS;
@@ -15656,7 +15707,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         RExC_parse - 1,  /* -1 because parse has already been
                                             advanced */
                         &not_posix_region_end,
-                        return_posix_warnings);
+                        do_posix_warnings ? &posix_warnings : NULL,
+                        TRUE /* checking only */);
         }
         else if (value == '\\') {
             /* Is a backslash; get the code point of the char after it */
@@ -16571,6 +16623,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
        range = 0; /* this range (if it was one) is done now */
     } /* End of loop through all the text within the brackets */
 
+
+    if (   posix_warnings && av_tindex(posix_warnings) >= 0) {
+        output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                        return_posix_warnings);
+    }
+
     /* If anything in the class expands to more than one character, we have to
      * deal with them by building up a substitute parse string, and recursively
      * calling reg() on it, instead of proceeding */
index d05922e..0763be0 100644 (file)
@@ -204,6 +204,7 @@ my @death =
  '/(?[[[::]]])/' => "Syntax error in (?[...]) in regex m/(?[[[::]]])/",
  '/(?[[[:w:]]])/' => "Syntax error in (?[...]) in regex m/(?[[[:w:]]])/",
  '/(?[[:w:]])/' => "",
+ '/[][[:alpha:]]' => "",    # [perl #127581]
  '/(?[a])/' =>  'Unexpected character {#} m/(?[a{#}])/',
  '/(?[ + \t ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/(?[ +{#} \t ])/',
  '/(?[ \cK - ( + \t ) ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/(?[ \cK - ( +{#} \t ) ])/',