From fcc04d73946f50bda2ffb344bea778338ce39003 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 16 Jan 2020 14:42:35 -0700 Subject: [PATCH] (toke|regcomp).c: Use common fcn to handle \0 problems This changes warning messages for too short \0 octal constants to use the function introduced in the previous commit. This function assures a consistent and clear warning message, which is slightly different than the one this commit replaces. I know of no CPAN code which depends on this warning's wording. --- dquote_inline.h | 31 ------------------------------- embed.fnc | 2 -- embed.h | 1 - perl.h | 4 ++-- pod/perldelta.pod | 8 ++++++++ pod/perldiag.pod | 9 +++++++++ proto.h | 7 ------- regcomp.c | 22 +++++++++++++--------- t/re/reg_mesg.t | 9 +++++---- toke.c | 15 +++++++++------ 10 files changed, 46 insertions(+), 62 deletions(-) diff --git a/dquote_inline.h b/dquote_inline.h index f0ce9d6..59612ac 100644 --- a/dquote_inline.h +++ b/dquote_inline.h @@ -33,35 +33,4 @@ S_regcurly(const char *s) return *s == '}'; } -/* This is inline not for speed, but because it is so tiny */ - -PERL_STATIC_INLINE char* -S_form_short_octal_warning(pTHX_ - const char * const s, /* Points to first non-octal */ - const STRLEN len /* Length of octals string, so - (s-len) points to first - octal */ -) -{ - /* Return a character string consisting of a warning message for when a - * string constant in octal is weird, like "\078". */ - - const char * sans_leading_zeros = s - len; - - PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING; - - assert(*s == '8' || *s == '9'); - - /* Remove the leading zeros, retaining one zero so won't be zero length */ - while (*sans_leading_zeros == '0') sans_leading_zeros++; - if (sans_leading_zeros == s) { - sans_leading_zeros--; - } - - return Perl_form(aTHX_ - "'%.*s' resolved to '\\o{%.*s}%c'", - (int) (len + 2), s - len - 1, - (int) (s - sans_leading_zeros), sans_leading_zeros, - *s); -} #endif /* PERL_DQUOTE_INLINE_H_ */ diff --git a/embed.fnc b/embed.fnc index db38c9e..a4079b7 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1162,8 +1162,6 @@ EpRX |const char *|form_alien_digit_msg|const U8 which \ |NN const char * const send \ |const bool UTF \ |const bool braced -EiR |char*|form_short_octal_warning|NN const char * const s \ - |const STRLEN len EiRT |I32 |regcurly |NN const char *s #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C) diff --git a/embed.h b/embed.h index 426ff1e..9b3f0ce 100644 --- a/embed.h +++ b/embed.h @@ -1110,7 +1110,6 @@ # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C) #define form_alien_digit_msg(a,b,c,d,e,f) Perl_form_alien_digit_msg(aTHX_ a,b,c,d,e,f) #define form_cp_too_large_msg(a,b,c,d) Perl_form_cp_too_large_msg(aTHX_ a,b,c,d) -#define form_short_octal_warning(a,b) S_form_short_octal_warning(aTHX_ a,b) #define grok_bslash_c(a,b,c,d) Perl_grok_bslash_c(aTHX_ a,b,c,d) #define grok_bslash_o(a,b,c,d,e,f,g,h) Perl_grok_bslash_o(aTHX_ a,b,c,d,e,f,g,h) #define grok_bslash_x(a,b,c,d,e,f,g,h) Perl_grok_bslash_x(aTHX_ a,b,c,d,e,f,g,h) diff --git a/perl.h b/perl.h index c75dded..003e4c9 100644 --- a/perl.h +++ b/perl.h @@ -7143,8 +7143,8 @@ A synonym for L #define PERL_SCAN_TRAILING 0x10 /* grok_number_flags() allow trailing and set IS_NUMBER_TRAILING */ -#ifdef PERL_CORE /* These are considered experimental, so not exposed - publicly */ +/* These are considered experimental, so not exposed publicly */ +#if defined(PERL_CORE) || defined(PERL_EXT) /* grok_??? don't warn about very large numbers which are <= UV_MAX; * output: found such a number */ # define PERL_SCAN_SILENT_NON_PORTABLE 0x20 diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 54ddebe..8611b58 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -277,6 +277,14 @@ at the end, when raised during regular expression pattern compilation, marking where precisely in the pattern it occured. In some instances the text of the resolution has been clarified. +=item * + +L<'%s' resolved to '\o{%s}%d'|perldiag/'%s' resolved to '\o{%s}%d'> + +As of Perl 5.32, this message is no longer generated. Instead, +L +is. + =back =head1 Utility Changes diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 11750fb..cef5716 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4174,6 +4174,13 @@ an octal one was expected, like unexpectedly encountered that isn't octal. The resulting value is as indicated. +When not using C<\o{...}>, you wrote something like C<\08>, or C<\179> +in a double-quotish string. The resolution is as indicated, with all +but the last digit treated as a single character, specified in octal. +The last digit is the next character in the string. To tell Perl that +this is indeed what you want, you can use the C<\o{ }> syntax, or use +exactly three digits to specify the octal for the character. + Note that, within braces, every character starting with the first non-octal up to the ending brace is ignored. @@ -5453,6 +5460,8 @@ Supply these or check that you are using the right construct. =item '%s' resolved to '\o{%s}%d' +As of Perl 5.32, this message is no longer generated. Instead, see +L. (W misc, regexp) You wrote something like C<\08>, or C<\179> in a double-quotish string. All but the last digit is treated as a single character, specified in octal. The last digit is the next character in diff --git a/proto.h b/proto.h index e1f8864..bc939fd 100644 --- a/proto.h +++ b/proto.h @@ -5914,13 +5914,6 @@ PERL_CALLCONV const char * Perl_form_cp_too_large_msg(pTHX_ const U8 which, cons __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_FORM_CP_TOO_LARGE_MSG -#ifndef PERL_NO_INLINE_FUNCTIONS -PERL_STATIC_INLINE char* S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN len) - __attribute__warn_unused_result__; -#define PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING \ - assert(s) -#endif - PERL_CALLCONV bool Perl_grok_bslash_c(pTHX_ const char source, U8 * result, const char** message, U32 * packed_warn) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_GROK_BSLASH_C \ diff --git a/regcomp.c b/regcomp.c index a004217..118e40f 100644 --- a/regcomp.c +++ b/regcomp.c @@ -14185,17 +14185,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) /* FALLTHROUGH */ case '0': { - I32 flags = PERL_SCAN_SILENT_ILLDIGIT; + I32 flags = PERL_SCAN_SILENT_ILLDIGIT + | PERL_SCAN_NOTIFY_ILLDIGIT; STRLEN numlen = 3; ender = grok_oct(p, &numlen, &flags, NULL); p += numlen; - if ( isDIGIT(*p) /* like \08, \178 */ - && ckWARN(WARN_REGEXP) - && numlen < 3) + if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) + && isDIGIT(*p) /* like \08, \178 */ + && ckWARN(WARN_REGEXP)) { reg_warn_non_literal_string( - p + 1, - form_short_octal_warning(p, numlen)); + p + 1, + form_alien_digit_msg(8, numlen, p, + RExC_end, UTF, FALSE)); } } break; @@ -17705,7 +17707,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, case '5': case '6': case '7': { /* Take 1-3 octal digits */ - I32 flags = PERL_SCAN_SILENT_ILLDIGIT; + I32 flags = PERL_SCAN_SILENT_ILLDIGIT + | PERL_SCAN_NOTIFY_ILLDIGIT; numlen = (strict) ? 4 : 3; value = grok_oct(--RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; @@ -17716,14 +17719,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, : 1; vFAIL("Need exactly 3 octal digits"); } - else if ( numlen < 3 /* like \08, \178 */ + else if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) && RExC_parse < RExC_end && isDIGIT(*RExC_parse) && ckWARN(WARN_REGEXP)) { reg_warn_non_literal_string( RExC_parse + 1, - form_short_octal_warning(RExC_parse, numlen)); + form_alien_digit_msg(8, numlen, RExC_parse, + RExC_end, UTF, FALSE)); } } if (value < 256) { diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 3376028..29d1ae9 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -366,9 +366,9 @@ my @death_only_under_strict = ( => '\N{} here is restricted to one character {#} m/[\x03-\N{U+100.300{#}}]/', 'm/[\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}-\x{10FFFF}]/' => 'Using just the first character returned by \N{} in character class {#} m/[\N{U+100.300}{#}-\x{10FFFF}]/', => '\N{} here is restricted to one character {#} m/[\N{U+100.300{#}}-\x{10FFFF}]/', - '/[\08]/' => '\'\08\' resolved to \'\o{0}8\' {#} m/[\08{#}]/', + '/[\08]/' => 'Non-octal character \'8\' terminates \0 early. Resolved as "\0008" {#} m/[\08{#}]/', => 'Need exactly 3 octal digits {#} m/[\08{#}]/', - '/[\018]/' => '\'\018\' resolved to \'\o{1}8\' {#} m/[\018{#}]/', + '/[\018]/' => 'Non-octal character \'8\' terminates \0 early. Resolved as "\0018" {#} m/[\018{#}]/', => 'Need exactly 3 octal digits {#} m/[\018{#}]/', '/[\_\0]/' => "", => 'Need exactly 3 octal digits {#} m/[\_\0]{#}/', @@ -533,8 +533,9 @@ my @warning = ( 'm/(?[[:word]])\x{100}/' => "Assuming NOT a POSIX class since there is no terminating ':' {#} m/(?[[:word{#}]])\\x{100}/", "m'\\y\\x{100}'" => 'Unrecognized escape \y passed through {#} m/\y{#}\x{100}/', '/x{3,1}/' => 'Quantifier {n,m} with n > m can\'t match {#} m/x{3,1}{#}/', - '/\08/' => '\'\08\' resolved to \'\o{0}8\' {#} m/\08{#}/', - '/\018/' => '\'\018\' resolved to \'\o{1}8\' {#} m/\018{#}/', + '/\08/' => 'Non-octal character \'8\' terminates \0 early. Resolved as "\0008" {#} m/\08{#}/', + + '/\018/' => 'Non-octal character \'8\' terminates \0 early. Resolved as "\0018" {#} m/\018{#}/', '/(?=a)*/' => '(?=a)* matches null string many times {#} m/(?=a)*{#}/', 'my $x = \'\m\'; qr/a$x/' => 'Unrecognized escape \m passed through {#} m/a\m{#}/', '/\q/' => 'Unrecognized escape \q passed through {#} m/\q{#}/', diff --git a/toke.c b/toke.c index d54e79e..d407268 100644 --- a/toke.c +++ b/toke.c @@ -3534,15 +3534,18 @@ S_scan_const(pTHX_ char *start) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { - I32 flags = PERL_SCAN_SILENT_ILLDIGIT; + I32 flags = PERL_SCAN_SILENT_ILLDIGIT + | PERL_SCAN_NOTIFY_ILLDIGIT; STRLEN len = 3; - uv = grok_oct(s, &len, &flags, NULL); - s += len; - if (len < 3 && s < send && isDIGIT(*s) + uv = grok_oct(s, &len, &flags, NULL); + s += len; + if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) + && s < send + && isDIGIT(*s) /* like \08, \178 */ && ckWARN(WARN_MISC)) { - Perl_warner(aTHX_ packWARN(WARN_MISC), - "%s", form_short_octal_warning(s, len)); + Perl_warner(aTHX_ packWARN(WARN_MISC), "%s", + form_alien_digit_msg(8, len, s, send, UTF, FALSE)); } } goto NUM_ESCAPE_INSERT; -- 1.8.3.1