This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #124280] don't warn for 'my $foo, *bar'
[perl5.git] / regcomp.c
index 3455814..da6eb16 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -669,6 +669,14 @@ static const scan_data_t zero_scan_data =
             REPORT_LOCATION_ARGS(offset));         \
 } STMT_END
 
+#define vFAIL3utf8f(m, a1, a2) STMT_START {             \
+    const IV offset = RExC_parse - RExC_precomp;        \
+    if (!SIZE_ONLY)                                     \
+        SAVEFREESV(RExC_rx_sv);                         \
+    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,  \
+            REPORT_LOCATION_ARGS(offset));              \
+} STMT_END
+
 /* These have asserts in them because of [perl #122671] Many warnings in
  * regcomp.c can occur twice.  If they get output in pass1 and later in that
  * pass, the pattern has to be converted to UTF-8 and the pass restarted, they
@@ -6671,7 +6679,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_pm_flags = pm_flags;
 
     if (runtime_code) {
-       if (TAINTING_get && TAINT_get)
+        assert(TAINTING_get || !TAINT_get);
+       if (TAINT_get)
            Perl_croak(aTHX_ "Eval-group in insecure regular expression");
 
        if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
@@ -9789,9 +9798,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
         ++RExC_parse;
     }
 
-    if (PASS2) {
-        STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
-    }
+    vFAIL("Sequence (?... not terminated");
 }
 
 /*
@@ -10932,7 +10939,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      * enough space for all the things we are about to throw
                      * away, but we can shrink it by the ammount we are about
                      * to re-use here */
-                    RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
+                    RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                 }
                 else {
                     ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
@@ -10948,8 +10955,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                "Useless use of greediness modifier '%c'",
                                *RExC_parse);
                 }
-                /* Absorb the modifier, so later code doesn't see nor use it */
-                nextchar(pRExC_state);
             }
 
          do_curly:
@@ -11419,10 +11424,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  * it returns U+FFFD (Replacement character) and sets *encp to NULL.
  */
 STATIC UV
-S_reg_recode(pTHX_ const char value, SV **encp)
+S_reg_recode(pTHX_ const U8 value, SV **encp)
 {
     STRLEN numlen = 1;
-    SV * const sv = newSVpvn_flags(&value, numlen, SVs_TEMP);
+    SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
     const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
     const STRLEN newlen = SvCUR(sv);
     UV uv = UNICODE_REPLACEMENT;
@@ -12613,7 +12618,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      recode_encoding:
                        if (! RExC_override_recoding) {
                            SV* enc = _get_encoding();
-                           ender = reg_recode((const char)(U8)ender, &enc);
+                           ender = reg_recode((U8)ender, &enc);
                            if (!enc && PASS2)
                                ckWARNreg(p, "Invalid escape in the specified encoding");
                            REQUIRE_UTF8(flagp);
@@ -13436,6 +13441,10 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                      * default: case next time and keep on incrementing until
                      * we find one of the invariants we do handle. */
                     RExC_parse++;
+                    if (*RExC_parse == 'c') {
+                            /* Skip the \cX notation for control characters */
+                            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                    }
                     break;
                 case '[':
                 {
@@ -13799,8 +13808,12 @@ redo_curchar:
                 /* Having gotten rid of the fence, we pop the operand at the
                  * stack top and process it as a newly encountered operand */
                 current = av_pop(stack);
-                assert(IS_OPERAND(current));
-                goto handle_operand;
+                if (IS_OPERAND(current)) {
+                    goto handle_operand;
+                }
+
+                RExC_parse++;
+                goto bad_syntax;
 
             case '&':
             case '|':
@@ -13876,11 +13889,23 @@ redo_curchar:
                 /* Here, the new operator has equal or lower precedence than
                  * what's already there.  This means the operation already
                  * there should be performed now, before the new one. */
+
                 rhs = av_pop(stack);
+                if (! IS_OPERAND(rhs)) {
+
+                    /* This can happen when a ! is not followed by an operand,
+                     * like in /(?[\t &!])/ */
+                    goto bad_syntax;
+                }
+
                 lhs = av_pop(stack);
 
-                assert(IS_OPERAND(rhs));
-                assert(IS_OPERAND(lhs));
+                if (! IS_OPERAND(lhs)) {
+
+                    /* This can happen when there is an empty (), like in
+                     * /(?[[0]+()+])/ */
+                    goto bad_syntax;
+                }
 
                 switch (stacked_operator) {
                     case '&':
@@ -13926,9 +13951,20 @@ redo_curchar:
                 av_push(stack, rhs);
                 goto redo_curchar;
 
-            case '!':   /* Highest priority, right associative, so just push
-                           onto stack */
-                av_push(stack, newSVuv(curchar));
+            case '!':   /* Highest priority, right associative */
+
+                /* If what's already at the top of the stack is another '!",
+                 * they just cancel each other out */
+                if (   (top_ptr = av_fetch(stack, top_index, FALSE))
+                    && (IS_OPERATOR(*top_ptr) && SvUV(*top_ptr) == '!'))
+                {
+                    only_to_avoid_leaks = av_pop(stack);
+                    SvREFCNT_dec(only_to_avoid_leaks);
+                }
+                else { /* Otherwise, since it's right associative, just push
+                          onto the stack */
+                    av_push(stack, newSVuv(curchar));
+                }
                 break;
 
             default:
@@ -14003,6 +14039,7 @@ redo_curchar:
         || SvTYPE(final) != SVt_INVLIST
         || av_tindex(stack) >= 0)  /* More left on stack */
     {
+      bad_syntax:
         SvREFCNT_dec(final);
         vFAIL("Incomplete expression within '(?[ ])'");
     }
@@ -14642,6 +14679,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                if (!SIZE_ONLY) {
                     SV* invlist;
                     char* name;
+                    char* base_name;    /* name after any packages are stripped */
+                    const char * const colon_colon = "::";
 
                     /* Try to get the definition of the property into
                      * <invlist>.  If /i is in effect, the effective property
@@ -14671,6 +14710,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         HV* curpkg = (IN_PERL_COMPILETIME)
                                       ? PL_curstash
                                       : CopSTASH(PL_curcop);
+                        UV final_n = n;
+                        bool has_pkg;
+
                         if (swash) {    /* Got a swash but no inversion list.
                                            Something is likely wrong that will
                                            be sorted-out later */
@@ -14682,25 +14724,43 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          * typo) in specifying a Unicode property, or it could
                          * be a user-defined property that will be available at
                          * run-time.  The names of these must begin with 'In'
-                         * or 'Is'.  So
+                         * or 'Is' (after any packages are stripped off).  So
                          * if not one of those, or if we accept only
                          * compile-time properties, is an error; otherwise add
                          * it to the list for run-time look up. */
-                        if (   n < 3
-                            || name[0] != 'I'
-                            || (name[1] != 's' && name[1] != 'n')
+                        if ((base_name = rninstr(name, name + n,
+                                                 colon_colon, colon_colon + 2)))
+                        { /* Has ::.  We know this must be a user-defined
+                             property */
+                            base_name += 2;
+                            final_n -= base_name - name;
+                            has_pkg = TRUE;
+                        }
+                        else {
+                            base_name = name;
+                            has_pkg = FALSE;
+                        }
+
+                        if (   final_n < 3
+                            || base_name[0] != 'I'
+                            || (base_name[1] != 's' && base_name[1] != 'n')
                             || ret_invlist)
                         {
+                            const char * const msg
+                                = (has_pkg)
+                                  ? "Illegal user-defined property name"
+                                  : "Can't find Unicode property definition";
                             RExC_parse = e + 1;
-                            vFAIL2utf8f(
-                                "Can't find Unicode property definition \"%"UTF8f"\"",
-                                UTF8fARG(UTF, n, name));
+
+                            /* diag_listed_as: Can't find Unicode property definition "%s" */
+                            vFAIL3utf8f("%s \"%"UTF8f"\"",
+                                msg, UTF8fARG(UTF, n, name));
                         }
 
                         /* If the property name doesn't already have a package
                          * name, add the current one to it so that it can be
                          * referred to outside it. [perl #121777] */
-                        if (curpkg && ! instr(name, "::")) {
+                        if (! has_pkg && curpkg) {
                             char* pkgname = HvNAME(curpkg);
                             if (strNE(pkgname, "main")) {
                                 char* full_name = Perl_form(aTHX_
@@ -14863,7 +14923,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              recode_encoding:
                if (! RExC_override_recoding) {
                    SV* enc = _get_encoding();
-                   value = reg_recode((const char)(U8)value, &enc);
+                   value = reg_recode((U8)value, &enc);
                    if (!enc) {
                         if (strict) {
                             vFAIL("Invalid escape in the specified encoding");