Don't allow non-graphemes as pattern delimiters
authorKarl Williamson <khw@cpan.org>
Sun, 20 May 2018 18:52:33 +0000 (12:52 -0600)
committerKarl Williamson <khw@cpan.org>
Mon, 25 Jun 2018 14:18:20 +0000 (08:18 -0600)
This has been deprecated, and scheduled for removal in 5.30.

pod/perldelta.pod
pod/perldeprecation.pod
t/lib/warnings/toke
toke.c

index 5df958e..3f2eea7 100644 (file)
@@ -43,7 +43,11 @@ XXX For a release on a stable branch, this section aspires to be:
     If any exist, they are bugs, and we request that you submit a
     report.  See L</Reporting Bugs> below.
 
-[ List each incompatible change as a =head2 entry ]
+=head2 Pattern delimiters now must be graphemes
+
+This usage has been deprecated and scheduled for removal in 5.30.  See
+L<perldeprecation/Use of unassigned code point or non-standalone
+grapheme for a delimiter.>
 
 =head1 Deprecations
 
index 40ad2ec..5b4f406 100644 (file)
@@ -247,8 +247,8 @@ L<code points that are above the legal Unicode maximum|
 perlunicode/Beyond Unicode code points>, those can be delimiters, and
 their use won't raise this warning.
 
-In Perl 5.30, delimiters which are unassigned code points, or which
-are non-standalone graphemes will be fatal.
+As of Perl 5.30, delimiters which are unassigned code points, or which
+are non-standalone graphemes are fatal.
 
 =head3 In XS code, use of various macros dealing with UTF-8.
 
index ffa6307..c770e9c 100644 (file)
@@ -1676,7 +1676,9 @@ BEGIN{
 use utf8;
 my $a = qr ̂foobar̂;
 EXPECT
-Use of unassigned code point or non-standalone grapheme for a delimiter will be a fatal error starting in Perl 5.30 at - line 8.
+Use of unassigned code point or non-standalone grapheme for a delimiter is not allowed at - line 8, near "= "
+Use of unassigned code point or non-standalone grapheme for a delimiter is not allowed at - line 8, near "= "
+Execution of - aborted due to compilation errors.
 ########
 # NAME  [perl #130567] Assertion failure
 BEGIN {
diff --git a/toke.c b/toke.c
index 66a02e2..3806b55 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -10613,14 +10613,11 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
     const char * opening_delims = "([{<";
     const char * closing_delims = ")]}>";
 
+    /* The only non-UTF character that isn't a stand alone grapheme is
+     * white-space, hence can't be a delimiter. */
     const char * non_grapheme_msg = "Use of unassigned code point or"
                                     " non-standalone grapheme for a delimiter"
-                                    " will be a fatal error starting in Perl"
-                                    " 5.30";
-    /* The only non-UTF character that isn't a stand alone grapheme is
-     * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
-    bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
-
+                                    " is not allowed";
     PERL_ARGS_ASSERT_SCAN_STR;
 
     /* skip space before the delimiter */
@@ -10639,18 +10636,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
     }
     else {
        termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
-        if (UTF) {
-                 if (UNLIKELY(! _is_grapheme((U8 *) start,
-                                             (U8 *) s,
-                                             (U8 *) PL_bufend,
-                                             termcode)))
-            {
-                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
-
-                /* Don't have to check the other end, as have already warned at
-                 * this one */
-                check_grapheme = FALSE;
-            }
+        if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+                                           (U8 *) s,
+                                           (U8 *) PL_bufend,
+                                                  termcode)))
+        {
+            yyerror(non_grapheme_msg);
         }
 
        Copy(s, termstr, termlen, U8);
@@ -10716,14 +10707,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                     if (   s + termlen <= PL_bufend
                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
                     {
-                        if (   check_grapheme
+                        if (   UTF
                             && UNLIKELY(! _is_grapheme((U8 *) start,
-                                                              (U8 *) s,
-                                                              (U8 *) PL_bufend,
+                                                       (U8 *) s,
+                                                       (U8 *) PL_bufend,
                                                               termcode)))
                         {
-                            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                                        "%s", non_grapheme_msg);
+                            yyerror(non_grapheme_msg);
                         }
                        break;
                     }