Merge branch 'release-5.29.2' into blead

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 0ef3341..24e614f 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -39,6 +39,7 @@ Individual members of C<PL_parser> have their own documentation.
  #define PERL_IN_TOKE_C
  #include "perl.h"
  #include "dquote_inline.h"
+#include "invlist_inline.h"
  
  #define new_constant(a,b,c,d,e,f,g)    \
         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
@@ -2683,14 +2684,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              s += 2;
          }
          else {
-            if (! PL_utf8_charname_begin) {
-                U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-                PL_utf8_charname_begin = _core_swash_init("utf8",
-                                                        "_Perl_Charname_Begin",
-                                                        &PL_sv_undef,
-                                                        1, 0, NULL, &flags);
-            }
-            if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+            if (! _invlist_contains_cp(PL_utf8_charname_begin,
+                                       utf8_to_uvchr_buf((U8 *) s,
+                                                         (U8 *) e,
+                                                         NULL)))
+            {
                  goto bad_charname;
              }
              s += UTF8SKIP(s);
@@ -2714,14 +2712,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s += 2;
              }
              else {
-                if (! PL_utf8_charname_continue) {
-                    U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-                    PL_utf8_charname_continue = _core_swash_init("utf8",
-                                                "_Perl_Charname_Continue",
-                                                &PL_sv_undef,
-                                                1, 0, NULL, &flags);
-                }
-                if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+                if (! _invlist_contains_cp(PL_utf8_charname_continue,
+                                           utf8_to_uvchr_buf((U8 *) s,
+                                                             (U8 *) e,
+                                                             NULL)))
+                {
                      goto bad_charname;
                  }
                  s += UTF8SKIP(s);
@@ -9057,7 +9052,7 @@ S_pending_ident(pTHX)
                 HEK * const stashname = HvNAME_HEK(stash);
                 SV *  const sym = newSVhek(stashname);
                  sv_catpvs(sym, "::");
-                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
+                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
                  pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
                  pl_yylval.opval->op_private = OPpCONST_ENTERED;
                  if (pit != '&')
@@ -9085,7 +9080,7 @@ S_pending_ident(pTHX)
          && PL_lex_state != LEX_NORMAL
          && !PL_lex_brackets)
      {
-        GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
+        GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
                                           ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
                                           SVt_PVAV);
          if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
@@ -9102,11 +9097,11 @@ S_pending_ident(pTHX)
      /* build ops for a bareword */
      pl_yylval.opval = newSVOP(OP_CONST, 0,
                                    newSVpvn_flags(PL_tokenbuf + 1,
-                                                     tokenbuf_len - 1,
+                                                      tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
                                                        UTF ? SVf_UTF8 : 0 ));
      pl_yylval.opval->op_private = OPpCONST_ENTERED;
      if (pit != '&')
-       gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+        gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
                       | ( UTF ? SVf_UTF8 : 0 ),
                      ((PL_tokenbuf[0] == '$') ? SVt_PV
@@ -10336,7 +10331,7 @@ S_scan_heredoc(pTHX_ char *s)
         while (ss < se) {
             /* newline only? Copy and move on */
             if (*ss == '\n') {
-               sv_catpv(newstr,"\n");
+               sv_catpvs(newstr,"\n");
                 ss++;
                 linecount++;
  
@@ -10618,14 +10613,11 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      const char * opening_delims = "([{<";
      const char * closing_delims = ")]}>";
  
+    /* The only non-UTF character that isn't a stand alone grapheme is
+     * white-space, hence can't be a delimiter. */
      const char * non_grapheme_msg = "Use of unassigned code point or"
                                      " non-standalone grapheme for a delimiter"
-                                    " will be a fatal error starting in Perl"
-                                    " 5.30";
-    /* The only non-UTF character that isn't a stand alone grapheme is
-     * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
-    bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
-
+                                    " is not allowed";
      PERL_ARGS_ASSERT_SCAN_STR;
  
      /* skip space before the delimiter */
@@ -10644,26 +10636,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      }
      else {
         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
-        if (check_grapheme) {
-            if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
-                || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
-            {
-                /* These are considered graphemes, and since the ending
-                 * delimiter will be the same, we don't have to check the other
-                 * end */
-                check_grapheme = FALSE;
-            }
-            else if (UNLIKELY(! _is_grapheme((U8 *) start,
-                                             (U8 *) s,
-                                             (U8 *) PL_bufend,
-                                             termcode)))
-            {
-                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
-
-                /* Don't have to check the other end, as have already warned at
-                 * this one */
-                check_grapheme = FALSE;
-            }
+        if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+                                           (U8 *) s,
+                                           (U8 *) PL_bufend,
+                                                  termcode)))
+        {
+            yyerror(non_grapheme_msg);
          }
  
         Copy(s, termstr, termlen, U8);
@@ -10729,14 +10707,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                      if (   s + termlen <= PL_bufend
                          && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
                      {
-                        if (   check_grapheme
+                        if (   UTF
                              && UNLIKELY(! _is_grapheme((U8 *) start,
-                                                              (U8 *) s,
-                                                              (U8 *) PL_bufend,
+                                                       (U8 *) s,
+                                                       (U8 *) PL_bufend,
                                                                termcode)))
                          {
-                            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                                        "%s", non_grapheme_msg);
+                            yyerror(non_grapheme_msg);
                          }
                         break;
                      }