This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Hoist code point portability warnings
authorKarl Williamson <khw@cpan.org>
Sun, 12 Jan 2020 11:05:59 +0000 (04:05 -0700)
committerKarl Williamson <khw@cpan.org>
Thu, 23 Jan 2020 22:46:56 +0000 (15:46 -0700)
dquote.c
embed.fnc
embed.h
pod/perldelta.pod
proto.h
regcomp.c
t/lib/warnings/regcomp
t/lib/warnings/toke
toke.c

index bf5cf90..4898b35 100644 (file)
--- a/dquote.c
+++ b/dquote.c
@@ -59,7 +59,6 @@ bool
 Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv,
                       const char** error_msg,
                       const bool output_warning, const bool strict,
 Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv,
                       const char** error_msg,
                       const bool output_warning, const bool strict,
-                      const bool silence_non_portable,
                       const bool UTF)
 {
 
                       const bool UTF)
 {
 
@@ -88,16 +87,13 @@ Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv,
  *         them
  *     strict is true if this should fail instead of warn if there are
  *         non-octal digits within the braces
  *         them
  *     strict is true if this should fail instead of warn if there are
  *         non-octal digits within the braces
- *      silence_non_portable is true if to suppress warnings about the code
- *          point returned being too large to fit on all platforms.
  *     UTF is true iff the string *s is encoded in UTF-8.
  */
     char* e;
     STRLEN numbers_len;
     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
                | PERL_SCAN_DISALLOW_PREFIX
  *     UTF is true iff the string *s is encoded in UTF-8.
  */
     char* e;
     STRLEN numbers_len;
     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
                | PERL_SCAN_DISALLOW_PREFIX
-               /* XXX Until the message is improved in grok_oct, handle errors
-                * ourselves */
+                | PERL_SCAN_SILENT_NON_PORTABLE
                | PERL_SCAN_SILENT_ILLDIGIT;
 
     PERL_ARGS_ASSERT_GROK_BSLASH_O;
                | PERL_SCAN_SILENT_ILLDIGIT;
 
     PERL_ARGS_ASSERT_GROK_BSLASH_O;
@@ -130,10 +126,6 @@ Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv,
        return FALSE;
     }
 
        return FALSE;
     }
 
-    if (silence_non_portable) {
-        flags |= PERL_SCAN_SILENT_NON_PORTABLE;
-    }
-
     *uv = grok_oct(*s, &numbers_len, &flags, NULL);
     /* Note that if has non-octal, will ignore everything starting with that up
      * to the '}' */
     *uv = grok_oct(*s, &numbers_len, &flags, NULL);
     /* Note that if has non-octal, will ignore everything starting with that up
      * to the '}' */
@@ -165,7 +157,6 @@ bool
 Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv,
                       const char** error_msg,
                       const bool output_warning, const bool strict,
 Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv,
                       const char** error_msg,
                       const bool output_warning, const bool strict,
-                      const bool silence_non_portable,
                       const bool UTF)
 {
 
                       const bool UTF)
 {
 
@@ -197,13 +188,12 @@ Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv,
  *         fail instead of warn or be silent.  For example, it requires
  *         exactly 2 digits following the \x (when there are no braces).
  *         3 digits could be a mistake, so is forbidden in this mode.
  *         fail instead of warn or be silent.  For example, it requires
  *         exactly 2 digits following the \x (when there are no braces).
  *         3 digits could be a mistake, so is forbidden in this mode.
- *      silence_non_portable is true if to suppress warnings about the code
- *          point returned being too large to fit on all platforms.
  *     UTF is true iff the string *s is encoded in UTF-8.
  */
     char* e;
     STRLEN numbers_len;
  *     UTF is true iff the string *s is encoded in UTF-8.
  */
     char* e;
     STRLEN numbers_len;
-    I32 flags = PERL_SCAN_DISALLOW_PREFIX;
+    I32 flags = PERL_SCAN_DISALLOW_PREFIX
+              | PERL_SCAN_SILENT_NON_PORTABLE;
 
 
     PERL_ARGS_ASSERT_GROK_BSLASH_X;
 
 
     PERL_ARGS_ASSERT_GROK_BSLASH_X;
@@ -275,9 +265,6 @@ Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv,
     }
 
     flags |= PERL_SCAN_ALLOW_UNDERSCORES;
     }
 
     flags |= PERL_SCAN_ALLOW_UNDERSCORES;
-    if (silence_non_portable) {
-        flags |= PERL_SCAN_SILENT_NON_PORTABLE;
-    }
 
     *uv = grok_hex(*s, &numbers_len, &flags, NULL);
     /* Note that if has non-hex, will ignore everything starting with that up
 
     *uv = grok_hex(*s, &numbers_len, &flags, NULL);
     /* Note that if has non-hex, will ignore everything starting with that up
index 23fbc23..36472ad 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1142,7 +1142,6 @@ EpRX      |bool   |grok_bslash_x  |NN char** s             \
                                |NN const char** error_msg       \
                                |const bool output_warning       \
                                |const bool strict               \
                                |NN const char** error_msg       \
                                |const bool output_warning       \
                                |const bool strict               \
-                               |const bool silence_non_portable \
                                |const bool utf8
 EpRX   |char   |grok_bslash_c  |const char source|const bool output_warning
 EpRX   |bool   |grok_bslash_o  |NN char** s             \
                                |const bool utf8
 EpRX   |char   |grok_bslash_c  |const char source|const bool output_warning
 EpRX   |bool   |grok_bslash_o  |NN char** s             \
@@ -1151,7 +1150,6 @@ EpRX      |bool   |grok_bslash_o  |NN char** s             \
                                |NN const char** error_msg       \
                                |const bool output_warning       \
                                |const bool strict               \
                                |NN const char** error_msg       \
                                |const bool output_warning       \
                                |const bool strict               \
-                               |const bool silence_non_portable \
                                |const bool utf8
 EiR    |char*|form_short_octal_warning|NN const char * const s  \
                                |const STRLEN len
                                |const bool utf8
 EiR    |char*|form_short_octal_warning|NN const char * const s  \
                                |const STRLEN len
diff --git a/embed.h b/embed.h
index a2c0dc1..6fbbed7 100644 (file)
--- a/embed.h
+++ b/embed.h
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
 #define form_short_octal_warning(a,b)  S_form_short_octal_warning(aTHX_ a,b)
 #define grok_bslash_c(a,b)     Perl_grok_bslash_c(aTHX_ a,b)
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_DQUOTE_C)
 #define form_short_octal_warning(a,b)  S_form_short_octal_warning(aTHX_ a,b)
 #define grok_bslash_c(a,b)     Perl_grok_bslash_c(aTHX_ a,b)
-#define grok_bslash_o(a,b,c,d,e,f,g,h) Perl_grok_bslash_o(aTHX_ a,b,c,d,e,f,g,h)
-#define grok_bslash_x(a,b,c,d,e,f,g,h) Perl_grok_bslash_x(aTHX_ a,b,c,d,e,f,g,h)
+#define grok_bslash_o(a,b,c,d,e,f,g)   Perl_grok_bslash_o(aTHX_ a,b,c,d,e,f,g)
+#define grok_bslash_x(a,b,c,d,e,f,g)   Perl_grok_bslash_x(aTHX_ a,b,c,d,e,f,g)
 #define regcurly               S_regcurly
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
 #define regcurly               S_regcurly
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
index cac576a..0901ccc 100644 (file)
@@ -212,6 +212,10 @@ L<Code point 0x%X is not Unicode, and not portable|perldiag/"Code point 0x%X is
 This is actually not a new message, but it is now output when the
 warnings category C<portable> is enabled.
 
 This is actually not a new message, but it is now output when the
 warnings category C<portable> is enabled.
 
+When raised during regular expression pattern compilation, the warning
+has extra text added at the end marking where precisely in the pattern
+it occured.
+
 =item *
 
 L<message|perldiag/"message">
 =item *
 
 L<message|perldiag/"message">
diff --git a/proto.h b/proto.h
index deed243..0cf4442 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5916,12 +5916,12 @@ PERL_CALLCONV char      Perl_grok_bslash_c(pTHX_ const char source, const bool output
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_GROK_BSLASH_C
 
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_GROK_BSLASH_C
 
-PERL_CALLCONV bool     Perl_grok_bslash_o(pTHX_ char** s, const char* const send, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool silence_non_portable, const bool utf8)
+PERL_CALLCONV bool     Perl_grok_bslash_o(pTHX_ char** s, const char* const send, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool utf8)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_GROK_BSLASH_O \
        assert(s); assert(send); assert(uv); assert(error_msg)
 
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_GROK_BSLASH_O \
        assert(s); assert(send); assert(uv); assert(error_msg)
 
-PERL_CALLCONV bool     Perl_grok_bslash_x(pTHX_ char** s, const char* const send, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool silence_non_portable, const bool utf8)
+PERL_CALLCONV bool     Perl_grok_bslash_x(pTHX_ char** s, const char* const send, UV* uv, const char** error_msg, const bool output_warning, const bool strict, const bool utf8)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_GROK_BSLASH_X \
        assert(s); assert(send); assert(uv); assert(error_msg)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_GROK_BSLASH_X \
        assert(s); assert(send); assert(uv); assert(error_msg)
index 2fec913..cc44f98 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -894,6 +894,20 @@ static const scan_data_t zero_scan_data = {
                                        "%s" REPORT_LOCATION,            \
                                   m, REPORT_LOCATION_ARGS(loc)))
 
                                        "%s" REPORT_LOCATION,            \
                                   m, REPORT_LOCATION_ARGS(loc)))
 
+#define ckWARN2_non_literal_string(loc, packwarn, m, a1)                    \
+    STMT_START {                                                            \
+                char * format;                                              \
+                Size_t format_size = strlen(m) + strlen(REPORT_LOCATION)+ 1;\
+                Newx(format, format_size, char);                            \
+                my_strlcpy(format, m, format_size);                         \
+                my_strlcat(format, REPORT_LOCATION, format_size);           \
+                SAVEFREEPV(format);                                         \
+                _WARN_HELPER(loc, packwarn,                                 \
+                      Perl_ck_warner(aTHX_ packwarn,                        \
+                                        format,                             \
+                                        a1, REPORT_LOCATION_ARGS(loc)));    \
+    } STMT_END
+
 #define        ckWARNreg(loc,m)                                                \
     _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
                       Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
 #define        ckWARNreg(loc,m)                                                \
     _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
                       Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),       \
@@ -14077,9 +14091,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                                       &error_msg,
                                                       TO_OUTPUT_WARNINGS(p),
                                                        (bool) RExC_strict,
                                                       &error_msg,
                                                       TO_OUTPUT_WARNINGS(p),
                                                        (bool) RExC_strict,
-                                                       TRUE, /* Output warnings
-                                                                for non-
-                                                                portables */
                                                        UTF);
                            if (! valid) {
                                RExC_parse = p; /* going to die anyway; point
                                                        UTF);
                            if (! valid) {
                                RExC_parse = p; /* going to die anyway; point
@@ -14102,9 +14113,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                                       &error_msg,
                                                        TO_OUTPUT_WARNINGS(p),
                                                        (bool) RExC_strict,
                                                       &error_msg,
                                                        TO_OUTPUT_WARNINGS(p),
                                                        (bool) RExC_strict,
-                                                       TRUE, /* Silence warnings
-                                                                for non-
-                                                                portables */
                                                        UTF);
                            if (! valid) {
                                RExC_parse = p; /* going to die anyway; point
                                                        UTF);
                            if (! valid) {
                                RExC_parse = p; /* going to die anyway; point
@@ -14249,6 +14257,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
                 if (ender > 255) {
                     REQUIRE_UTF8(flagp);
 
                 if (ender > 255) {
                     REQUIRE_UTF8(flagp);
+                    if (   UNICODE_IS_PERL_EXTENDED(ender)
+                        && TO_OUTPUT_WARNINGS(p))
+                    {
+                        ckWARN2_non_literal_string(p,
+                                                   packWARN(WARN_PORTABLE),
+                                                   PL_extended_cp_format,
+                                                   ender);
+                    }
                 }
 
                 /* We need to check if the next non-ignored thing is a
                 }
 
                 /* We need to check if the next non-ignored thing is a
@@ -17612,7 +17628,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                               &error_msg,
                                                TO_OUTPUT_WARNINGS(RExC_parse),
                                                strict,
                                               &error_msg,
                                                TO_OUTPUT_WARNINGS(RExC_parse),
                                                strict,
-                                               silence_non_portable,
                                                UTF);
                    if (! valid) {
                        vFAIL(error_msg);
                                                UTF);
                    if (! valid) {
                        vFAIL(error_msg);
@@ -17631,7 +17646,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                               &error_msg,
                                               TO_OUTPUT_WARNINGS(RExC_parse),
                                                strict,
                                               &error_msg,
                                               TO_OUTPUT_WARNINGS(RExC_parse),
                                                strict,
-                                               silence_non_portable,
                                                UTF);
                     if (! valid) {
                        vFAIL(error_msg);
                                                UTF);
                     if (! valid) {
                        vFAIL(error_msg);
@@ -17934,6 +17948,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
        /* non-Latin1 code point implies unicode semantics. */
        if (value > 255) {
             REQUIRE_UNI_RULES(flagp, 0);
        /* non-Latin1 code point implies unicode semantics. */
        if (value > 255) {
             REQUIRE_UNI_RULES(flagp, 0);
+            if (  ! silence_non_portable
+                &&  UNICODE_IS_PERL_EXTENDED(value)
+                &&  TO_OUTPUT_WARNINGS(RExC_parse))
+            {
+                ckWARN2_non_literal_string(RExC_parse,
+                                           packWARN(WARN_PORTABLE),
+                                           PL_extended_cp_format,
+                                           value);
+            }
        }
 
         /* Ready to process either the single value, or the completed range.
        }
 
         /* Ready to process either the single value, or the completed range.
index eb827b6..6dcb789 100644 (file)
@@ -113,3 +113,15 @@ my $qr = qr {
 }x;
 EXPECT
 ########
 }x;
 EXPECT
 ########
+# NAME Warn on 32-bit code points
+# SKIP ? $Config{uvsize} < 8
+use warnings 'portable';
+qr/\x{8000_0000}/;
+qr/[\x{8000_0000}]/;
+qr/\o{20_000_000_000}/;
+qr/[\o{20_000_000_000}]/;
+EXPECT
+Code point 0x80000000 is not Unicode, requires a Perl extension, and so is not portable in regex; marked by <-- HERE in m/\x{8000_0000} <-- HERE / at - line 2.
+Code point 0x80000000 is not Unicode, requires a Perl extension, and so is not portable in regex; marked by <-- HERE in m/[\x{8000_0000} <-- HERE ]/ at - line 3.
+Code point 0x80000000 is not Unicode, requires a Perl extension, and so is not portable in regex; marked by <-- HERE in m/\o{20_000_000_000} <-- HERE / at - line 4.
+Code point 0x80000000 is not Unicode, requires a Perl extension, and so is not portable in regex; marked by <-- HERE in m/[\o{20_000_000_000} <-- HERE ]/ at - line 5.
index e6591fa..e66558a 100644 (file)
@@ -1718,3 +1718,12 @@ $_ = "";
 s/^/ @x {a} /e
 EXPECT
 Scalar value @x{"a"} better written as $x{"a"} at - line 4.
 s/^/ @x {a} /e
 EXPECT
 Scalar value @x{"a"} better written as $x{"a"} at - line 4.
+########
+# NAME Warn on 32-bit code points
+# SKIP ? $Config{uvsize} < 8
+use warnings 'portable';
+my $a = "\x{8000_0000}";
+my $b = "\o{20_000_000_000}";
+EXPECT
+Code point 0x80000000 is not Unicode, requires a Perl extension, and so is not portable at - line 2.
+Code point 0x80000000 is not Unicode, requires a Perl extension, and so is not portable at - line 3.
diff --git a/toke.c b/toke.c
index f192c6c..0638b98 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -3556,8 +3556,6 @@ S_scan_const(pTHX_ char *start)
                                                &uv, &error,
                                                TRUE, /* Output warning */
                                                FALSE, /* Not strict */
                                                &uv, &error,
                                                TRUE, /* Output warning */
                                                FALSE, /* Not strict */
-                                               TRUE, /* Output warnings for
-                                                         non-portables */
                                                UTF);
                    if (! valid) {
                        yyerror(error);
                                                UTF);
                    if (! valid) {
                        yyerror(error);
@@ -3575,8 +3573,6 @@ S_scan_const(pTHX_ char *start)
                                                &uv, &error,
                                                TRUE, /* Output warning */
                                                FALSE, /* Not strict */
                                                &uv, &error,
                                                TRUE, /* Output warning */
                                                FALSE, /* Not strict */
-                                               TRUE,  /* Output warnings for
-                                                         non-portables */
                                                UTF);
                    if (! valid) {
                        yyerror(error);
                                                UTF);
                    if (! valid) {
                        yyerror(error);
@@ -3644,7 +3640,10 @@ S_scan_const(pTHX_ char *start)
                             d = SvCUR(sv) + SvGROW(sv, needed);
                         }
 
                             d = SvCUR(sv) + SvGROW(sv, needed);
                         }
 
-                       d = (char*)uvchr_to_utf8((U8*)d, uv);
+                       d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
+                                                   (ckWARN(WARN_PORTABLE))
+                                                   ? UNICODE_WARN_PERL_EXTENDED
+                                                   : 0);
                    }
                }
 #ifdef EBCDIC
                    }
                }
 #ifdef EBCDIC
@@ -3789,7 +3788,10 @@ S_scan_const(pTHX_ char *start)
                            *d++ = (char) LATIN1_TO_NATIVE(uv);
                        }
                        else {
                            *d++ = (char) LATIN1_TO_NATIVE(uv);
                        }
                        else {
-                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
+                                                   (ckWARN(WARN_PORTABLE))
+                                                   ? UNICODE_WARN_PERL_EXTENDED
+                                                   : 0);
                         }
                    }
                }
                         }
                    }
                }