This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
toke.c: Convert to use isFOO_utf8_safe() macros
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 524a999..2996177 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -413,12 +413,12 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
        else if (!rv)
            sv_catpvs(report, "EOF");
        else
-           Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
+           Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
        switch (type) {
        case TOKENTYPE_NONE:
            break;
        case TOKENTYPE_IVAL:
-           Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
+           Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
            break;
        case TOKENTYPE_OPNUM:
            Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
@@ -527,20 +527,26 @@ S_no_op(pTHX_ const char *const what, char *s)
        if (is_first)
            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                    "\t(Missing semicolon on previous line?)\n");
-       else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
+        else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
+                                                           PL_bufend,
+                                                           UTF))
+        {
            const char *t;
-           for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
-                                                            t += UTF ? UTF8SKIP(t) : 1)
+           for (t = PL_oldoldbufptr;
+                 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
+                 t += UTF ? UTF8SKIP(t) : 1)
+            {
                NOOP;
+            }
            if (t < PL_bufptr && isSPACE(*t))
                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                       "\t(Do you need to predeclare %"UTF8f"?)\n",
+                       "\t(Do you need to predeclare %" UTF8f "?)\n",
                      UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
        }
        else {
            assert(s >= oldbp);
            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                   "\t(Missing operator before %"UTF8f"?)\n",
+                   "\t(Missing operator before %" UTF8f "?)\n",
                     UTF8fARG(UTF, s - oldbp, oldbp));
        }
     }
@@ -590,7 +596,7 @@ S_missingterm(pTHX_ char *s)
     sv = sv_2mortal(newSVpv(s,0));
     if (uni)
        SvUTF8_on(sv);
-    Perl_croak(aTHX_ "Can't find string terminator %c%"SVf
+    Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
                     "%c anywhere before EOF",q,SVfARG(sv),q);
 }
 
@@ -705,8 +711,8 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     PL_parser = parser;
 
     parser->stack = NULL;
+    parser->stack_max1 = NULL;
     parser->ps = NULL;
-    parser->stack_size = 0;
 
     /* on scope exit, free this parser and restore any outer one */
     SAVEPARSER(parser);
@@ -923,10 +929,18 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
     char *buf;
     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
+    bool current;
+
     linestr = PL_parser->linestr;
     buf = SvPVX(linestr);
     if (len <= SvLEN(linestr))
        return buf;
+
+    /* Is the lex_shared linestr SV the same as the current linestr SV?
+     * Only in this case does re_eval_start need adjusting, since it
+     * points within lex_shared->ls_linestr's buffer */
+    current = (linestr == PL_parser->lex_shared->ls_linestr);
+
     bufend_pos = PL_parser->bufend - buf;
     bufptr_pos = PL_parser->bufptr - buf;
     oldbufptr_pos = PL_parser->oldbufptr - buf;
@@ -934,7 +948,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
     linestart_pos = PL_parser->linestart - buf;
     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
-    re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
+    re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
                             PL_parser->lex_shared->re_eval_start - buf : 0;
 
     buf = sv_grow(linestr, len);
@@ -948,7 +962,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
        PL_parser->last_uni = buf + last_uni_pos;
     if (PL_parser->last_lop)
        PL_parser->last_lop = buf + last_lop_pos;
-    if (PL_parser->lex_shared->re_eval_start)
+    if (current && PL_parser->lex_shared->re_eval_start)
         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
     return buf;
 }
@@ -1024,13 +1038,11 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
                    p++;
                    highhalf++;
-               } else if (! UTF8_IS_INVARIANT(c)) {
-                   /* malformed UTF-8 */
-                   ENTER;
-                   SAVESPTR(PL_warnhook);
-                   PL_warnhook = PERL_WARNHOOK_FATAL;
-                   utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
-                   LEAVE;
+                } else if (! UTF8_IS_INVARIANT(c)) {
+                    _force_out_malformed_utf8_message((U8 *) p, (U8 *) e,
+                                                      0,
+                                                      1 /* 1 means die */ );
+                    NOT_REACHED; /* NOTREACHED */
                }
            }
            if (!highhalf)
@@ -1280,6 +1292,8 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
     bool got_some_for_debugger = 0;
     bool got_some;
+    const U8* first_bad_char_loc;
+
     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
@@ -1344,6 +1358,19 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     new_bufend_pos = SvCUR(linestr);
     PL_parser->bufend = buf + new_bufend_pos;
     PL_parser->bufptr = buf + bufptr_pos;
+
+    if (UTF && ! is_utf8_string_loc((U8 *) PL_parser->bufptr,
+                                    PL_parser->bufend - PL_parser->bufptr,
+                                    &first_bad_char_loc))
+    {
+
+        _force_out_malformed_utf8_message(first_bad_char_loc,
+                                          (U8 *) PL_parser->bufend,
+                                          0,
+                                          1 /* 1 means die */ );
+        NOT_REACHED; /* NOTREACHED */
+    }
+
     PL_parser->oldbufptr = buf + oldbufptr_pos;
     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
     PL_parser->linestart = buf + linestart_pos;
@@ -1420,12 +1447,11 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
        }
        unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
        if (retlen == (STRLEN)-1) {
-           /* malformed UTF-8 */
-           ENTER;
-           SAVESPTR(PL_warnhook);
-           PL_warnhook = PERL_WARNHOOK_FATAL;
-           utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
-           LEAVE;
+            _force_out_malformed_utf8_message((U8 *) s,
+                                              (U8 *) bufend,
+                                              0,
+                                              1 /* 1 means die */ );
+            NOT_REACHED; /* NOTREACHED */
        }
        return unichar;
     } else {
@@ -1634,19 +1660,19 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
 
        if (proto_after_greedy_proto)
            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                       "Prototype after '%c' for %"SVf" : %s",
+                       "Prototype after '%c' for %" SVf " : %s",
                        greedy_proto, SVfARG(name), p);
        if (in_brackets)
            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                       "Missing ']' in prototype for %"SVf" : %s",
+                       "Missing ']' in prototype for %" SVf " : %s",
                        SVfARG(name), p);
        if (bad_proto)
            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                       "Illegal character in prototype for %"SVf" : %s",
+                       "Illegal character in prototype for %" SVf " : %s",
                        SVfARG(name), p);
        if (bad_proto_after_underscore)
            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                       "Illegal character after '_' in prototype for %"SVf" : %s",
+                       "Illegal character after '_' in prototype for %" SVf " : %s",
                        SVfARG(name), p);
     }
 
@@ -1866,13 +1892,13 @@ S_check_uni(pTHX)
     while (isSPACE(*PL_last_uni))
        PL_last_uni++;
     s = PL_last_uni;
-    while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
+    while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
        s += UTF ? UTF8SKIP(s) : 1;
     if ((t = strchr(s, '(')) && t < PL_bufptr)
        return;
 
     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                    "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+                    "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
                     UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
 }
 
@@ -2036,7 +2062,7 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
 
     start = skipspace(start);
     s = start;
-    if (isIDFIRST_lazy_if(s,UTF)
+    if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
         || (allow_pack && *s == ':' && s[1] == ':') )
     {
        s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
@@ -2290,12 +2316,12 @@ S_tokeq(pTHX_ SV *sv)
  * Pattern matching will set PL_lex_op to the pattern-matching op to
  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
  *
- * OP_CONST and OP_READLINE are easy--just make the new op and return.
+ * OP_CONST is easy--just make the new op and return.
  *
  * Everything else becomes a FUNC.
  *
- * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
- * had an OP_CONST or OP_READLINE).  This just sets us up for a
+ * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
+ * had an OP_CONST.  This just sets us up for a
  * call to S_sublex_push().
  */
 
@@ -2546,15 +2572,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                                      e - backslash_ptr,
                                      &first_bad_char_loc))
     {
-        /* If warnings are on, this will print a more detailed analysis of what
-         * is wrong than the error message below */
-        utf8n_to_uvchr(first_bad_char_loc,
-                       e - ((char *) first_bad_char_loc),
-                       NULL, 0);
-
-        /* We deliberately don't try to print the malformed character, which
-         * might not print very well; it also may be just the first of many
-         * malformations, so don't print what comes after it */
+        _force_out_malformed_utf8_message(first_bad_char_loc,
+                                          (U8 *) PL_parser->bufend,
+                                          0,
+                                          0 /* 0 means don't die */ );
         yyerror_pv(Perl_form(aTHX_
             "Malformed UTF-8 character immediately after '%.*s'",
             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
@@ -2687,15 +2708,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         STRLEN len;
         const char* const str = SvPV_const(res, len);
         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
-            /* If warnings are on, this will print a more detailed analysis of
-             * what is wrong than the error message below */
-            utf8n_to_uvchr(first_bad_char_loc,
-                           (char *) first_bad_char_loc - str,
-                           NULL, 0);
-
-            /* We deliberately don't try to print the malformed character,
-             * which might not print very well; it also may be just the first
-             * of many malformations, so don't print what comes after it */
+            _force_out_malformed_utf8_message(first_bad_char_loc,
+                                              (U8 *) PL_parser->bufend,
+                                              0,
+                                              0 /* 0 means don't die */ );
             yyerror_pv(
               Perl_form(aTHX_
                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
@@ -3026,7 +3042,7 @@ S_scan_const(pTHX_ char *start)
                     else if (convert_unicode) {
                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
                         Perl_croak(aTHX_
-                              "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
+                              "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
                                " in transliteration operator",
                               range_min, range_max);
                     }
@@ -3034,7 +3050,7 @@ S_scan_const(pTHX_ char *start)
                     else {
                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
                         Perl_croak(aTHX_
-                              "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
+                              "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
                                " in transliteration operator",
                               range_min, range_max);
                     }
@@ -3231,8 +3247,12 @@ S_scan_const(pTHX_ char *start)
           (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
           */
        else if (*s == '@' && s[1]) {
-           if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
+           if (UTF
+               ? isIDFIRST_utf8_safe(s+1, send)
+               : isWORDCHAR_A(s[1]))
+            {
                break;
+            }
            if (strchr(":'{$", s[1]))
                break;
            if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
@@ -3775,18 +3795,29 @@ S_scan_const(pTHX_ char *start)
        } /* end if (backslash) */
 
     default_action:
-       /* If we started with encoded form, or already know we want it,
-          then encode the next character */
-       if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
-           STRLEN len  = 1;
-
-           /* One might think that it is wasted effort in the case of the
-            * source being utf8 (this_utf8 == TRUE) to take the next character
-            * in the source, convert it to an unsigned value, and then convert
-            * it back again.  But the source has not been validated here.  The
-            * routine that does the conversion checks for errors like
-            * malformed utf8 */
+        /* Just copy the input to the output, though we may have to convert
+         * to/from UTF-8.
+         *
+         * If the input has the same representation in UTF-8 as not, it will be
+         * a single byte, and we don't care about UTF8ness; or if neither
+         * source nor output is UTF-8, just copy the byte */
+        if (NATIVE_BYTE_IS_INVARIANT((U8)(*s)) || (! this_utf8 && ! has_utf8))
+        {
+           *d++ = *s++;
+        }
+        else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
+           const STRLEN len = UTF8SKIP(s);
+
+            /* We expect the source to have already been checked for
+             * malformedness */
+            assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
 
+            Copy(s, d, len, U8);
+            d += len;
+            s += len;
+        }
+        else { /* UTF8ness matters and doesn't match, need to convert */
+           STRLEN len = 1;
            const UV nextuv   = (this_utf8)
                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
                                 : (UV) ((U8) *s);
@@ -3812,17 +3843,14 @@ S_scan_const(pTHX_ char *start)
 
            d = (char*)uvchr_to_utf8((U8*)d, nextuv);
        }
-       else {
-           *d++ = *s++;
-       }
     } /* while loop to process each character */
 
     /* terminate the string and set up the sv */
     *d = '\0';
     SvCUR_set(sv, d - SvPVX_const(sv));
     if (SvCUR(sv) >= SvLEN(sv))
-       Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
-                  " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
+       Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
+                  " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
 
     SvPOK_on(sv);
     if (has_utf8) {
@@ -3963,7 +3991,7 @@ S_intuit_more(pTHX_ char *s)
            case '&':
            case '$':
                weight -= seen[un_char] * 10;
-               if (isWORDCHAR_lazy_if(s+1,UTF)) {
+               if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
                    int len;
                     char *tmp = PL_bufend;
                     PL_bufend = (char*)send;
@@ -4439,11 +4467,17 @@ S_check_scalar_slice(pTHX_ char *s)
 {
     s++;
     while (*s == ' ' || *s == '\t') s++;
-    if (*s == 'q' && s[1] == 'w'
-     && !isWORDCHAR_lazy_if(s+2,UTF))
+    if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
+                                                             PL_bufend,
+                                                             UTF))
+    {
        return;
-    while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
-       s += UTF ? UTF8SKIP(s) : 1;
+    }
+    while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
+           || (*s && strchr(" \t$#+-'\"", *s)))
+    {
+        s += UTF ? UTF8SKIP(s) : 1;
+    }
     if (*s == '}' || *s == ']')
        pl_yylval.ival = OPpSLICEWARNING;
 }
@@ -4534,7 +4568,7 @@ Perl_yylex(pTHX)
 
     DEBUG_T( {
        SV* tmp = newSVpvs("");
-       PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
+       PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
            (IV)CopLINE(PL_curcop),
            lex_state_names[PL_lex_state],
            exp_name[PL_expect],
@@ -4621,9 +4655,7 @@ Perl_yylex(pTHX)
                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
                     tmp = *s, *s = s[2], s[2] = (char)tmp;     /* misordered... */
                if ((*s == 'L' || *s == 'U' || *s == 'F')
-                    && (strchr(PL_lex_casestack, 'L')
-                        || strchr(PL_lex_casestack, 'U')
-                        || strchr(PL_lex_casestack, 'F')))
+                    && (strpbrk(PL_lex_casestack, "LUF")))
                 {
                    PL_lex_casestack[--PL_lex_casemods] = '\0';
                    PL_lex_allbrackets--;
@@ -4852,7 +4884,7 @@ Perl_yylex(pTHX)
                 break;
             }
             s = skipspace(s);
-            if (isIDFIRST_lazy_if(s, UTF)) {
+            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
                 char *dest = PL_tokenbuf + 1;
                 /* read var name, including sigil, into PL_tokenbuf */
                 PL_tokenbuf[0] = sigil;
@@ -4891,13 +4923,12 @@ Perl_yylex(pTHX)
     default:
        if (UTF) {
             if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
-                ENTER;
-                SAVESPTR(PL_warnhook);
-                PL_warnhook = PERL_WARNHOOK_FATAL;
-                utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
-                LEAVE;
+                _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend,
+                                                  0,
+                                                  1 /* 1 means die */ );
+                NOT_REACHED; /* NOTREACHED */
             }
-            if (isIDFIRST_utf8((U8*)s)) {
+            if (isIDFIRST_utf8_safe(s, PL_bufend)) {
                 goto keylookup;
             }
         }
@@ -4924,7 +4955,7 @@ Perl_yylex(pTHX)
         } else {
             d = PL_linestart;
         }
-        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
+        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
                           UTF8fARG(UTF, (s - d), d),
                          (int) len + 1);
     }
@@ -5430,7 +5461,7 @@ Perl_yylex(pTHX)
                    PL_expect = XPOSTDEREF;
                    TOKEN(ARROW);
                }
-               if (isIDFIRST_lazy_if(s,UTF)) {
+               if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
                    s = force_word(s,METHOD,FALSE,TRUE);
                    TOKEN(ARROW);
                }
@@ -5615,7 +5646,7 @@ Perl_yylex(pTHX)
         grabattrs:
            s = skipspace(s);
            attrs = NULL;
-           while (isIDFIRST_lazy_if(s,UTF)) {
+            while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
                I32 tmp;
                SV *sv;
                d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
@@ -5821,7 +5852,7 @@ Perl_yylex(pTHX)
                while (d < PL_bufend && SPACE_OR_TAB(*d))
                    d++;
            }
-           if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
+            if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
                d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
                              FALSE, &len);
                while (d < PL_bufend && SPACE_OR_TAB(*d))
@@ -5941,13 +5972,19 @@ Perl_yylex(pTHX)
                    }
                    else
                        /* skip plain q word */
-                       while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+                       while (   t < PL_bufend
+                               && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+                        {
                            t += UTF ? UTF8SKIP(t) : 1;
+                        }
                }
-               else if (isWORDCHAR_lazy_if(t,UTF)) {
+               else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
                    t += UTF ? UTF8SKIP(t) : 1;
-                   while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+                   while (   t < PL_bufend
+                           && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+                    {
                        t += UTF ? UTF8SKIP(t) : 1;
+                    }
                }
                while (t < PL_bufend && isSPACE(*t))
                    t++;
@@ -6042,8 +6079,9 @@ Perl_yylex(pTHX)
        }
        s--;
        if (PL_expect == XOPERATOR) {
-           if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
-               && isIDFIRST_lazy_if(s,UTF))
+           if (   PL_bufptr == PL_linestart
+                && ckWARN(WARN_SEMICOLON)
+               && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
            {
                CopLINE_dec(PL_curcop);
                Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
@@ -6325,7 +6363,10 @@ Perl_yylex(pTHX)
            POSTDEREF('$');
        }
 
-       if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
+       if (   s[1] == '#'
+            && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
+                || strchr("{$:+-@", s[2])))
+        {
            PL_tokenbuf[0] = '@';
            s = scan_ident(s + 1, PL_tokenbuf + 1,
                           sizeof PL_tokenbuf - 1, FALSE);
@@ -6374,14 +6415,18 @@ Perl_yylex(pTHX)
                    if (ckWARN(WARN_SYNTAX)) {
                        char *t = s+1;
 
-                       while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
+                        while (   isSPACE(*t)
+                               || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
+                               || *t == '$')
+                        {
                            t += UTF ? UTF8SKIP(t) : 1;
+                        }
                        if (*t++ == ',') {
                            PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
                            while (t < PL_bufend && *t != ']')
                                t++;
                            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                       "Multidimensional syntax %"UTF8f" not supported",
+                                       "Multidimensional syntax %" UTF8f " not supported",
                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
                        }
                    }
@@ -6396,7 +6441,7 @@ Perl_yylex(pTHX)
                            do {
                                t++;
                            } while (isSPACE(*t));
-                           if (isIDFIRST_lazy_if(t,UTF)) {
+                           if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
                                STRLEN len;
                                t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
                                              &len);
@@ -6405,7 +6450,7 @@ Perl_yylex(pTHX)
                                if (*t == ';'
                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
                                    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                       "You need to quote \"%"UTF8f"\"",
+                                       "You need to quote \"%" UTF8f "\"",
                                         UTF8fARG(UTF, len, tmpbuf));
                            }
                        }
@@ -6419,9 +6464,12 @@ Perl_yylex(pTHX)
                    PL_expect = XOPERATOR;
                else if (strchr("$@\"'`q", *s))
                    PL_expect = XTERM;          /* e.g. print $fh "foo" */
-               else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
+               else if (   strchr("&*<%", *s)
+                         && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
+                {
                    PL_expect = XTERM;          /* e.g. print $fh &sub */
-               else if (isIDFIRST_lazy_if(s,UTF)) {
+                }
+               else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
                    char tmpbuf[sizeof PL_tokenbuf];
                    int t2;
                    scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
@@ -6520,10 +6568,10 @@ Perl_yylex(pTHX)
         }
        else {
            /* Disable warning on "study /blah/" */
-           if (PL_oldoldbufptr == PL_last_uni
-            && (*PL_last_uni != 's' || s - PL_last_uni < 5
-                || memNE(PL_last_uni, "study", 5)
-                || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
+           if (    PL_oldoldbufptr == PL_last_uni
+                && (   *PL_last_uni != 's' || s - PL_last_uni < 5
+                    || memNE(PL_last_uni, "study", 5)
+                    || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
             ))
                check_uni();
            s = scan_pat(s,OP_MATCH);
@@ -6951,7 +6999,7 @@ Perl_yylex(pTHX)
                    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
                                  TRUE, &morelen);
                    if (!morelen)
-                       Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+                       Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
                                UTF8fARG(UTF, len, PL_tokenbuf),
                                *s == '\'' ? "'" : "::");
                    len += morelen;
@@ -6979,8 +7027,9 @@ Perl_yylex(pTHX)
                    if (ckWARN(WARN_BAREWORD)
                        && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
                        Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-                         "Bareword \"%"UTF8f"\" refers to nonexistent package",
-                          UTF8fARG(UTF, len, PL_tokenbuf));
+                                    "Bareword \"%" UTF8f
+                                    "\" refers to nonexistent package",
+                                    UTF8fARG(UTF, len, PL_tokenbuf));
                    len -= 2;
                    PL_tokenbuf[len] = '\0';
                    gv = NULL;
@@ -7049,8 +7098,8 @@ Perl_yylex(pTHX)
                    s = skipspace(s);
 
                    /* Two barewords in a row may indicate method call. */
-
-                   if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
+                   if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
+                            || *s == '$')
                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
                     {
                        goto method;
@@ -7135,9 +7184,11 @@ Perl_yylex(pTHX)
 
                /* If followed by a bareword, see if it looks like indir obj. */
 
-               if (tmp == 1 && !orig_keyword
-                       && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
-                       && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+               if (   tmp == 1
+                    && !orig_keyword
+                    && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
+                    && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
+                {
                  method:
                    if (lex && !off) {
                        assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
@@ -7281,7 +7332,7 @@ Perl_yylex(pTHX)
                if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
                 && saw_infix_sigil) {
                    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                    "Operator or semicolon missing before %c%"UTF8f,
+                                    "Operator or semicolon missing before %c%" UTF8f,
                                     lastchar,
                                     UTF8fARG(UTF, strlen(PL_tokenbuf),
                                              PL_tokenbuf));
@@ -7300,7 +7351,7 @@ Perl_yylex(pTHX)
        case KEY___LINE__:
            FUN0OP(
                 newSVOP(OP_CONST, 0,
-                   Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
+                   Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
            );
 
        case KEY___PACKAGE__:
@@ -7405,7 +7456,7 @@ Perl_yylex(pTHX)
                    goto just_a_word;
                }
                if (!tmp)
-                   Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+                   Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
                                      UTF8fARG(UTF, len, PL_tokenbuf));
                if (tmp < 0)
                    tmp = -tmp;
@@ -7631,7 +7682,9 @@ Perl_yylex(pTHX)
                return REPORT(0);
            pl_yylval.ival = CopLINE(PL_curcop);
            s = skipspace(s);
-           if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
+            if (   PL_expect == XSTATE
+                && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
+            {
                char *p = s;
 
                if ((PL_bufend - p) >= 3
@@ -7644,7 +7697,7 @@ Perl_yylex(pTHX)
                    p += 3;
                p = skipspace(p);
                 /* skip optional package name, as in "for my abc $x (..)" */
-               if (isIDFIRST_lazy_if(p,UTF)) {
+               if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
                    p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                    p = skipspace(p);
                }
@@ -7887,7 +7940,7 @@ Perl_yylex(pTHX)
            }
            PL_in_my = (U16)tmp;
            s = skipspace(s);
-           if (isIDFIRST_lazy_if(s,UTF)) {
+            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
                s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                if (len == 3 && strEQs(PL_tokenbuf, "sub"))
                    goto really_sub;
@@ -7937,10 +7990,10 @@ Perl_yylex(pTHX)
 
        case KEY_open:
            s = skipspace(s);
-           if (isIDFIRST_lazy_if(s,UTF)) {
-          const char *t;
-          d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
-              &len);
+            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+                const char *t;
+                d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+                              &len);
                for (t=d; isSPACE(*t);)
                    t++;
                if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
@@ -7950,7 +8003,7 @@ Perl_yylex(pTHX)
                    && !keyword(s, d-s, 0)
                ) {
                    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-                      "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+                      "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
                        UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
                }
            }
@@ -8097,9 +8150,13 @@ Perl_yylex(pTHX)
            {
                *PL_tokenbuf = '\0';
                s = force_word(s,BAREWORD,TRUE,TRUE);
-               if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
+                if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
+                                           PL_tokenbuf + sizeof(PL_tokenbuf),
+                                           UTF))
+                {
                    gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
+                }
                else if (*s == '<')
                    yyerror("<> at require-statement should be quotes");
            }
@@ -8301,7 +8358,7 @@ Perl_yylex(pTHX)
                s = skipspace(s);
                 d = SvPVX(PL_linestr)+off;
 
-               if (isIDFIRST_lazy_if(s,UTF)
+                if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
                     || *s == '\''
                     || (*s == ':' && s[1] == ':'))
                {
@@ -8380,7 +8437,7 @@ Perl_yylex(pTHX)
                    if (!have_name)
                        Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
                    else if (*s != ';' && *s != '}')
-                       Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
+                       Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
                }
 
                if (have_proto) {
@@ -8693,7 +8750,7 @@ S_pending_ident(pTHX)
         {
             /* Downgraded from fatal to warning 20000522 mjd */
             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                       "Possible unintended interpolation of %"UTF8f
+                       "Possible unintended interpolation of %" UTF8f
                        " in string",
                        UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
         }
@@ -8747,10 +8804,10 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
        s++;
     while (s < PL_bufend && isSPACE(*s))
        s++;
-    if (isIDFIRST_lazy_if(s,UTF)) {
+    if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
        const char * const w = s;
         s += UTF ? UTF8SKIP(s) : 1;
-       while (isWORDCHAR_lazy_if(s,UTF))
+       while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
            s += UTF ? UTF8SKIP(s) : 1;
        while (s < PL_bufend && isSPACE(*s))
            s++;
@@ -8929,15 +8986,16 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
     for (;;) {
         if (*d >= e)
             Perl_croak(aTHX_ "%s", ident_too_long);
-        if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+        if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
              /* The UTF-8 case must come first, otherwise things
              * like c\N{COMBINING TILDE} would start failing, as the
              * isWORDCHAR_A case below would gobble the 'c' up.
              */
 
             char *t = *s + UTF8SKIP(*s);
-            while (isIDCONT_utf8((U8*)t))
+            while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
                 t += UTF8SKIP(t);
+            }
             if (*d + (t - *s) > e)
                 Perl_croak(aTHX_ "%s", ident_too_long);
             Copy(*s, *d, t - *s, char);
@@ -8949,7 +9007,10 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
                 *(*d)++ = *(*s)++;
             } while (isWORDCHAR_A(**s) && *d < e);
         }
-        else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
+        else if (   allow_package
+                 && **s == '\''
+                 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
+        {
             *(*d)++ = ':';
             *(*d)++ = ':';
             (*s)++;
@@ -9000,10 +9061,10 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN
  *      Because all ASCII characters have the same representation whether
  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
  *      '{' without knowing if is UTF-8 or not. */
-#define VALID_LEN_ONE_IDENT(s, is_utf8)                                       \
-    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
-                         ? isIDFIRST_utf8((U8*) (s))                          \
-                         : (isGRAPH_L1(*s)                                    \
+#define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
+    (isGRAPH_A(*(s)) || ((is_utf8)                                          \
+                         ? isIDFIRST_utf8_safe(s, e)                        \
+                         : (isGRAPH_L1(*s)                                  \
                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
 
 STATIC char *
@@ -9044,7 +9105,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
     /* Here, it is not a run-of-the-mill identifier name */
 
     if (*s == '$' && s[1]
-        && (isIDFIRST_lazy_if(s+1,is_utf8)
+        && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
             || isDIGIT_A((U8)s[1])
             || s[1] == '$'
             || s[1] == '{'
@@ -9067,7 +9128,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
     if ((s <= PL_bufend - (is_utf8)
                           ? UTF8SKIP(s)
                           : 1)
-        && VALID_LEN_ONE_IDENT(s, is_utf8))
+        && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
     {
         if (is_utf8) {
             const STRLEN skip = UTF8SKIP(s);
@@ -9095,7 +9156,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
         bool skip;
         char *s2;
         /* If we were processing {...} notation then...  */
-       if (isIDFIRST_lazy_if(d,is_utf8)) {
+        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
             /* if it starts as a valid identifier, assume that it is one.
                (the later check for } being at the expected point will trap
                cases where this doesn't pan out.)  */
@@ -9174,7 +9235,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                     orig_copline = CopLINE(PL_curcop);
                     CopLINE_set(PL_curcop, tmp_copline);
                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                       "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
+                       "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
                        funny, SVfARG(tmp), funny, SVfARG(tmp));
                     CopLINE_set(PL_curcop, orig_copline);
                }
@@ -9211,7 +9272,7 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
 
     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
-        if (isWORDCHAR_lazy_if(*s, UTF)) {
+        if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
                        UTF ? SVf_UTF8 : 0);
             (*s) += charlen;
@@ -9455,7 +9516,8 @@ S_scan_subst(pTHX_ char *start)
          * spreads over */
         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
-        ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = es;
+        ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
+                                                                    cBOOL(es);
     }
 
     PL_lex_op = (OP*)pm;
@@ -9595,10 +9657,12 @@ S_scan_heredoc(pTHX_ char *s)
            s++, term = '\'';
        else
            term = '"';
-       if (!isWORDCHAR_lazy_if(s,UTF))
+       if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
            deprecate("bare << to mean <<\"\"");
        peek = s;
-       while (isWORDCHAR_lazy_if(peek,UTF)) {
+        while (
+               isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
+        {
            peek += UTF ? UTF8SKIP(peek) : 1;
        }
        len = (peek - s >= e - d) ? (e - d) : (peek - s);
@@ -9896,8 +9960,8 @@ S_scan_heredoc(pTHX_ char *s)
        STRLEN herelen = SvCUR(tmpstr);
        char *ss = SvPVX(tmpstr);
        char *se = ss + herelen;
-       SV *newstr = newSVpvs("");
-       SvGROW(newstr, herelen);
+        SV *newstr = newSV(herelen+1);
+        SvPOK_on(newstr);
 
        /* Trim leading whitespace */
        while (ss < se) {
@@ -9905,6 +9969,7 @@ S_scan_heredoc(pTHX_ char *s)
            if (*ss == '\n') {
                sv_catpv(newstr,"\n");
                ss++;
+               linecount++;
 
            /* Found our indentation? Strip it */
            } else if (se - ss >= indent_len
@@ -9928,12 +9993,9 @@ S_scan_heredoc(pTHX_ char *s)
                    (int)linecount
                );
            }
-
-           linecount++;
        }
-
-       sv_setsv(tmpstr,newstr);
-
+        /* avoid sv_setsv() as we dont wan't to COW here */
+        sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
        Safefree(indent);
        SvREFCNT_dec_NN(newstr);
     }
@@ -10017,8 +10079,9 @@ S_scan_inputsymbol(pTHX_ char *start)
     if (*d == '$' && d[1]) d++;
 
     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
-    while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
+    while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
        d += UTF ? UTF8SKIP(d) : 1;
+    }
 
     /* If we've tried to read what we allow filehandles to look like, and
        there's still text left, then it must be a glob() and not a getline.
@@ -10182,6 +10245,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
     STRLEN termlen;            /* length of terminating string */
     line_t herelines;
 
+    /* The delimiters that have a mirror-image closing one */
+    const char * opening_delims = "([{<";
+    const char * closing_delims = ")]}>";
+
     PERL_ARGS_ASSERT_SCAN_STR;
 
     /* skip space before the delimiter */
@@ -10194,15 +10261,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
 
     /* after skipping whitespace, the next character is the terminator */
     term = *s;
-    if (!UTF) {
+    if (!UTF || UTF8_IS_INVARIANT(term)) {
        termcode = termstr[0] = term;
        termlen = 1;
     }
     else {
        termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
        Copy(s, termstr, termlen, U8);
-       if (!UTF8_IS_INVARIANT(term))
-           has_utf8 = TRUE;
     }
 
     /* mark where we are */
@@ -10210,9 +10275,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
     PL_multi_open = termcode;
     herelines = PL_parser->herelines;
 
-    /* find corresponding closing delimiter */
-    if (term && (tmps = strchr("([{< )]}> )]}>",term)))
-       termcode = termstr[0] = term = tmps[5];
+    /* If the delimiter has a mirror-image closing one, get it */
+    if (term && (tmps = strchr(opening_delims, term))) {
+        termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
+    }
 
     PL_multi_close = termcode;
 
@@ -11221,32 +11287,32 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
            Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
     }
     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
-    Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
+    Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
         OutCopFILE(PL_curcop),
         (IV)(PL_parser->preambling == NOLINE
                ? CopLINE(PL_curcop)
                : PL_parser->preambling));
     if (context)
-       Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+       Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
                             UTF8fARG(UTF, contlen, context));
     else
-       Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
+       Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
         Perl_sv_catpvf(aTHX_ msg,
-        "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
+        "  (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n",
                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
         PL_multi_end = 0;
     }
     if (PL_in_eval & EVAL_WARNONLY) {
        PL_in_eval &= ~EVAL_WARNONLY;
-       Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
+       Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
     }
     else
        qerror(msg);
     if (PL_error_count >= 10) {
        SV * errsv;
        if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
-           Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
+           Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n",
                       SVfARG(errsv), OutCopFILE(PL_curcop));
        else
            Perl_croak(aTHX_ "%s has too many errors.\n",
@@ -11369,10 +11435,10 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
        Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
     }
     if (status < 0) {
-       Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
+       Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
     }
     DEBUG_P(PerlIO_printf(Perl_debug_log,
-                         "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+                         "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
                          FPTR2DPTR(void *, S_utf16_textfilter),
                          reverse ? 'l' : 'b', idx, maxlen, status,
                          (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
@@ -11427,7 +11493,7 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 
            status = FILTER_READ(idx + 1, utf16_buffer,
                                 160 + (SvCUR(utf16_buffer) & 1));
-           DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
+           DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
            DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
            if (status < 0) {
                /* Error */
@@ -11463,7 +11529,7 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
        }
     }
     DEBUG_P(PerlIO_printf(Perl_debug_log,
-                         "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+                         "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
                          status,
                          (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
@@ -11887,7 +11953,7 @@ Perl_parse_label(pTHX_ U32 flags)
        STRLEN wlen, bufptr_pos;
        lex_read_space(0);
        t = s = PL_bufptr;
-        if (!isIDFIRST_lazy_if(s, UTF))
+        if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
            goto no_label;
        t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
        if (word_takes_any_delimiter(s, wlen))