This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
(toke|regcomp).c: Use common fcn to handle \0 problems
authorKarl Williamson <khw@cpan.org>
Thu, 16 Jan 2020 21:42:35 +0000 (14:42 -0700)
committerKarl Williamson <khw@cpan.org>
Thu, 23 Jan 2020 22:46:56 +0000 (15:46 -0700)
This changes warning messages for too short \0 octal constants to use
the function introduced in the previous commit.  This function assures a
consistent and clear warning message, which is slightly different than
the one this commit replaces.  I know of no CPAN code which depends on
this warning's wording.

dquote_inline.h
embed.fnc
embed.h
perl.h
pod/perldelta.pod
pod/perldiag.pod
proto.h
regcomp.c
t/re/reg_mesg.t
toke.c

index f0ce9d6..59612ac 100644 (file)
@@ -33,35 +33,4 @@ S_regcurly(const char *s)
     return *s == '}';
 }
 
-/* This is inline not for speed, but because it is so tiny */
-
-PERL_STATIC_INLINE char*
-S_form_short_octal_warning(pTHX_
-                           const char * const s, /* Points to first non-octal */
-                           const STRLEN len      /* Length of octals string, so
-                                                    (s-len) points to first
-                                                    octal */
-)
-{
-    /* Return a character string consisting of a warning message for when a
-     * string constant in octal is weird, like "\078".  */
-
-    const char * sans_leading_zeros = s - len;
-
-    PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING;
-
-    assert(*s == '8' || *s == '9');
-
-    /* Remove the leading zeros, retaining one zero so won't be zero length */
-    while (*sans_leading_zeros == '0') sans_leading_zeros++;
-    if (sans_leading_zeros == s) {
-        sans_leading_zeros--;
-    }
-
-    return Perl_form(aTHX_
-                     "'%.*s' resolved to '\\o{%.*s}%c'",
-                     (int) (len + 2), s - len - 1,
-                     (int) (s - sans_leading_zeros), sans_leading_zeros,
-                     *s);
-}
 #endif  /* PERL_DQUOTE_INLINE_H_ */
index db38c9e..a4079b7 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1162,8 +1162,6 @@ EpRX      |const char *|form_alien_digit_msg|const U8 which       \
                                |NN const char * const send     \
                                |const bool UTF                 \
                                |const bool braced
-EiR    |char*|form_short_octal_warning|NN const char * const s  \
-                               |const STRLEN len
 EiRT   |I32    |regcurly       |NN const char *s
 #endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
diff --git a/embed.h b/embed.h
index 426ff1e..9b3f0ce 100644 (file)
--- a/embed.h
+++ b/embed.h
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
 #define form_alien_digit_msg(a,b,c,d,e,f)      Perl_form_alien_digit_msg(aTHX_ a,b,c,d,e,f)
 #define form_cp_too_large_msg(a,b,c,d) Perl_form_cp_too_large_msg(aTHX_ a,b,c,d)
-#define form_short_octal_warning(a,b)  S_form_short_octal_warning(aTHX_ a,b)
 #define grok_bslash_c(a,b,c,d) Perl_grok_bslash_c(aTHX_ a,b,c,d)
 #define grok_bslash_o(a,b,c,d,e,f,g,h) Perl_grok_bslash_o(aTHX_ a,b,c,d,e,f,g,h)
 #define grok_bslash_x(a,b,c,d,e,f,g,h) Perl_grok_bslash_x(aTHX_ a,b,c,d,e,f,g,h)
diff --git a/perl.h b/perl.h
index c75dded..003e4c9 100644 (file)
--- a/perl.h
+++ b/perl.h
@@ -7143,8 +7143,8 @@ A synonym for L</grok_numeric_radix>
 #define PERL_SCAN_TRAILING            0x10 /* grok_number_flags() allow trailing
                                               and set IS_NUMBER_TRAILING */
 
-#ifdef PERL_CORE    /* These are considered experimental, so not exposed
-                       publicly */
+/* These are considered experimental, so not exposed publicly */
+#if defined(PERL_CORE) || defined(PERL_EXT)
 /* grok_??? don't warn about very large numbers which are <= UV_MAX;
  * output: found such a number */
 #  define PERL_SCAN_SILENT_NON_PORTABLE 0x20
index 54ddebe..8611b58 100644 (file)
@@ -277,6 +277,14 @@ at the end, when raised during regular expression pattern compilation,
 marking where precisely in the pattern it occured.  In some instances
 the text of the resolution has been clarified.
 
+=item *
+
+L<'%s' resolved to '\o{%s}%d'|perldiag/'%s' resolved to '\o{%s}%d'>
+
+As of Perl 5.32, this message is no longer generated.  Instead,
+L<perldiag/Non-octal character '%c' terminates \o early.  Resolved as "%s">
+is.
+
 =back
 
 =head1 Utility Changes
index 11750fb..cef5716 100644 (file)
@@ -4174,6 +4174,13 @@ an octal one was expected, like
 unexpectedly encountered that isn't octal.  The resulting value
 is as indicated.
 
+When not using C<\o{...}>, you wrote something like C<\08>, or C<\179>
+in a double-quotish string.  The resolution is as indicated, with all
+but the last digit treated as a single character, specified in octal.
+The last digit is the next character in the string.  To tell Perl that
+this is indeed what you want, you can use the C<\o{ }> syntax, or use
+exactly three digits to specify the octal for the character.
+
 Note that, within braces, every character starting with the first
 non-octal up to the ending brace is ignored.
 
@@ -5453,6 +5460,8 @@ Supply these or check that you are using the right construct.
 
 =item '%s' resolved to '\o{%s}%d'
 
+As of Perl 5.32, this message is no longer generated.  Instead, see
+L</Non-octal character '%c' terminates \o early.  Resolved as "%s">.
 (W misc, regexp)  You wrote something like C<\08>, or C<\179> in a
 double-quotish string.  All but the last digit is treated as a single
 character, specified in octal.  The last digit is the next character in
diff --git a/proto.h b/proto.h
index e1f8864..bc939fd 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5914,13 +5914,6 @@ PERL_CALLCONV const char *       Perl_form_cp_too_large_msg(pTHX_ const U8 which, cons
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_FORM_CP_TOO_LARGE_MSG
 
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE char*       S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN len)
-                       __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING      \
-       assert(s)
-#endif
-
 PERL_CALLCONV bool     Perl_grok_bslash_c(pTHX_ const char source, U8 * result, const char** message, U32 * packed_warn)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_GROK_BSLASH_C \
index a004217..118e40f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -14185,17 +14185,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         /* FALLTHROUGH */
                     case '0':
                        {
-                           I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                           I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                                      | PERL_SCAN_NOTIFY_ILLDIGIT;
                            STRLEN numlen = 3;
                            ender = grok_oct(p, &numlen, &flags, NULL);
                            p += numlen;
-                            if (   isDIGIT(*p)  /* like \08, \178 */
-                                && ckWARN(WARN_REGEXP)
-                                && numlen < 3)
+                            if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
+                                && isDIGIT(*p)  /* like \08, \178 */
+                                && ckWARN(WARN_REGEXP))
                             {
                                reg_warn_non_literal_string(
-                                         p + 1,
-                                         form_short_octal_warning(p, numlen));
+                                     p + 1,
+                                     form_alien_digit_msg(8, numlen, p,
+                                                        RExC_end, UTF, FALSE));
                             }
                        }
                        break;
@@ -17705,7 +17707,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
            case '5': case '6': case '7':
                {
                    /* Take 1-3 octal digits */
-                   I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                   I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                              | PERL_SCAN_NOTIFY_ILLDIGIT;
                     numlen = (strict) ? 4 : 3;
                     value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
                    RExC_parse += numlen;
@@ -17716,14 +17719,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                           : 1;
                             vFAIL("Need exactly 3 octal digits");
                         }
-                        else if (   numlen < 3 /* like \08, \178 */
+                        else if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
                                  && RExC_parse < RExC_end
                                  && isDIGIT(*RExC_parse)
                                  && ckWARN(WARN_REGEXP))
                         {
                             reg_warn_non_literal_string(
                                  RExC_parse + 1,
-                                 form_short_octal_warning(RExC_parse, numlen));
+                                 form_alien_digit_msg(8, numlen, RExC_parse,
+                                                        RExC_end, UTF, FALSE));
                         }
                     }
                     if (value < 256) {
index 3376028..29d1ae9 100644 (file)
@@ -366,9 +366,9 @@ my @death_only_under_strict = (
                                             => '\N{} here is restricted to one character {#} m/[\x03-\N{U+100.300{#}}]/',
     'm/[\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}-\x{10FFFF}]/' => 'Using just the first character returned by \N{} in character class {#} m/[\N{U+100.300}{#}-\x{10FFFF}]/',
                                                   => '\N{} here is restricted to one character {#} m/[\N{U+100.300{#}}-\x{10FFFF}]/',
-    '/[\08]/'   => '\'\08\' resolved to \'\o{0}8\' {#} m/[\08{#}]/',
+    '/[\08]/'   => 'Non-octal character \'8\' terminates \0 early.  Resolved as "\0008" {#} m/[\08{#}]/',
                 => 'Need exactly 3 octal digits {#} m/[\08{#}]/',
-    '/[\018]/'  => '\'\018\' resolved to \'\o{1}8\' {#} m/[\018{#}]/',
+    '/[\018]/'  => 'Non-octal character \'8\' terminates \0 early.  Resolved as "\0018" {#} m/[\018{#}]/',
                 => 'Need exactly 3 octal digits {#} m/[\018{#}]/',
     '/[\_\0]/'  => "",
                 => 'Need exactly 3 octal digits {#} m/[\_\0]{#}/',
@@ -533,8 +533,9 @@ my @warning = (
     'm/(?[[:word]])\x{100}/' => "Assuming NOT a POSIX class since there is no terminating ':' {#} m/(?[[:word{#}]])\\x{100}/",
     "m'\\y\\x{100}'"     => 'Unrecognized escape \y passed through {#} m/\y{#}\x{100}/',
     '/x{3,1}/'   => 'Quantifier {n,m} with n > m can\'t match {#} m/x{3,1}{#}/',
-    '/\08/' => '\'\08\' resolved to \'\o{0}8\' {#} m/\08{#}/',
-    '/\018/' => '\'\018\' resolved to \'\o{1}8\' {#} m/\018{#}/',
+    '/\08/' => 'Non-octal character \'8\' terminates \0 early.  Resolved as "\0008" {#} m/\08{#}/',
+
+    '/\018/' => 'Non-octal character \'8\' terminates \0 early.  Resolved as "\0018" {#} m/\018{#}/',
     '/(?=a)*/' => '(?=a)* matches null string many times {#} m/(?=a)*{#}/',
     'my $x = \'\m\'; qr/a$x/' => 'Unrecognized escape \m passed through {#} m/a\m{#}/',
     '/\q/' => 'Unrecognized escape \q passed through {#} m/\q{#}/',
diff --git a/toke.c b/toke.c
index d54e79e..d407268 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -3534,15 +3534,18 @@ S_scan_const(pTHX_ char *start)
            case '0': case '1': case '2': case '3':
            case '4': case '5': case '6': case '7':
                {
-                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                              | PERL_SCAN_NOTIFY_ILLDIGIT;
                     STRLEN len = 3;
-                   uv = grok_oct(s, &len, &flags, NULL);
-                   s += len;
-                    if (len < 3 && s < send && isDIGIT(*s)
+                    uv = grok_oct(s, &len, &flags, NULL);
+                    s += len;
+                    if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
+                        && s < send
+                        && isDIGIT(*s)  /* like \08, \178 */
                         && ckWARN(WARN_MISC))
                     {
-                        Perl_warner(aTHX_ packWARN(WARN_MISC),
-                                    "%s", form_short_octal_warning(s, len));
+                        Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
+                            form_alien_digit_msg(8, len, s, send, UTF, FALSE));
                     }
                }
                goto NUM_ESCAPE_INSERT;