Added descriptions to tests in reverse.t

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index c71cdb7..b60d720 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -773,12 +773,6 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
  void
  Perl_parser_free(pTHX_  const yy_parser *parser)
  {
-#ifdef PERL_MAD
-   I32 nexttoke = parser->lasttoke;
-#else
-   I32 nexttoke = parser->nexttoke;
-#endif
-
      PERL_ARGS_ASSERT_PARSER_FREE;
  
      PL_curcop = parser->saved_curcop;
@@ -792,22 +786,43 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
      SvREFCNT_dec(parser->rsfp_filters);
      SvREFCNT_dec(parser->lex_stuff);
      SvREFCNT_dec(parser->sublex_info.repl);
+
+    Safefree(parser->lex_brackstack);
+    Safefree(parser->lex_casestack);
+    Safefree(parser->lex_shared);
+    PL_parser = parser->old_parser;
+    Safefree(parser);
+}
+
+void
+Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
+{
+#ifdef PERL_MAD
+    I32 nexttoke = parser->lasttoke;
+#else
+    I32 nexttoke = parser->nexttoke;
+#endif
+    PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
      while (nexttoke--) {
  #ifdef PERL_MAD
         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
-                               & 0xffff))
-           op_free(parser->nexttoke[nexttoke].next_val.opval);
+                               & 0xffff)
+        && parser->nexttoke[nexttoke].next_val.opval
+        && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
+        && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
+               op_free(parser->nexttoke[nexttoke].next_val.opval);
+               parser->nexttoke[nexttoke].next_val.opval = NULL;
+       }
  #else
-       if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff))
+       if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
+        && parser->nextval[nexttoke].opval
+        && parser->nextval[nexttoke].opval->op_slabbed
+        && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
             op_free(parser->nextval[nexttoke].opval);
+           parser->nextval[nexttoke].opval = NULL;
+       }
  #endif
      }
-
-    Safefree(parser->lex_brackstack);
-    Safefree(parser->lex_casestack);
-    Safefree(parser->lex_shared);
-    PL_parser = parser->old_parser;
-    Safefree(parser);
  }
  
  
@@ -994,10 +1009,13 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
         if (flags & LEX_STUFF_UTF8) {
             goto plain_copy;
         } else {
-           STRLEN highhalf = 0;
+           STRLEN highhalf = 0;    /* Count of variants */
             const char *p, *e = pv+len;
-           for (p = pv; p != e; p++)
-               highhalf += !!(((U8)*p) & 0x80);
+           for (p = pv; p != e; p++) {
+               if (! UTF8_IS_INVARIANT(*p)) {
+                    highhalf++;
+                }
+            }
             if (!highhalf)
                 goto plain_copy;
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
@@ -1008,9 +1026,9 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
             PL_parser->bufend += len+highhalf;
             for (p = pv; p != e; p++) {
                 U8 c = (U8)*p;
-               if (c & 0x80) {
-                   *bufptr++ = (char)(0xc0 | (c >> 6));
-                   *bufptr++ = (char)(0x80 | (c & 0x3f));
+               if (! UTF8_IS_INVARIANT(c)) {
+                   *bufptr++ = UTF8_TWO_BYTE_HI(c);
+                   *bufptr++ = UTF8_TWO_BYTE_LO(c);
                 } else {
                     *bufptr++ = (char)c;
                 }
@@ -1022,14 +1040,13 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
             const char *p, *e = pv+len;
             for (p = pv; p != e; p++) {
                 U8 c = (U8)*p;
-               if (c >= 0xc4) {
+               if (UTF8_IS_ABOVE_LATIN1(c)) {
                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
                                 "non-Latin-1 character into Latin-1 input");
-               } else if (c >= 0xc2 && p+1 != e &&
-                           (((U8)p[1]) & 0xc0) == 0x80) {
+               } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
                     p++;
                     highhalf++;
-               } else if (c >= 0x80) {
+               } else if (! UTF8_IS_INVARIANT(c)) {
                     /* malformed UTF-8 */
                     ENTER;
                     SAVESPTR(PL_warnhook);
@@ -1046,17 +1063,20 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
             SvCUR_set(PL_parser->linestr,
                 SvCUR(PL_parser->linestr) + len-highhalf);
             PL_parser->bufend += len-highhalf;
-           for (p = pv; p != e; p++) {
-               U8 c = (U8)*p;
-               if (c & 0x80) {
-                   *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
-                   p++;
-               } else {
-                   *bufptr++ = (char)c;
+           p = pv;
+           while (p < e) {
+               if (UTF8_IS_INVARIANT(*p)) {
+                   *bufptr++ = *p;
+                    p++;
                 }
+               else {
+                    assert(p < e -1 );
+                   *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+                   p += 2;
+                }
             }
         } else {
-           plain_copy:
+         plain_copy:
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
             bufptr = PL_parser->bufptr;
             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
@@ -1404,10 +1424,10 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
             bufend = PL_parser->bufend;
         }
         head = (U8)*s;
-       if (!(head & 0x80))
+       if (UTF8_IS_INVARIANT(head))
             return head;
-       if (head & 0x40) {
-           len = PL_utf8skip[head];
+       if (UTF8_IS_START(head)) {
+           len = UTF8SKIP(&head);
             while ((STRLEN)(bufend-s) < len) {
                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
                     break;
@@ -1715,7 +1735,7 @@ S_incline(pTHX_ const char *s)
  /* skip space before PL_thistoken */
  
  STATIC char *
-S_skipspace0(pTHX_ register char *s)
+S_skipspace0(pTHX_ char *s)
  {
      PERL_ARGS_ASSERT_SKIPSPACE0;
  
@@ -1736,7 +1756,7 @@ S_skipspace0(pTHX_ register char *s)
  /* skip space after PL_thistoken */
  
  STATIC char *
-S_skipspace1(pTHX_ register char *s)
+S_skipspace1(pTHX_ char *s)
  {
      const char *start = s;
      I32 startoff = start - SvPVX(PL_linestr);
@@ -1763,7 +1783,7 @@ S_skipspace1(pTHX_ register char *s)
  }
  
  STATIC char *
-S_skipspace2(pTHX_ register char *s, SV **svp)
+S_skipspace2(pTHX_ char *s, SV **svp)
  {
      char *start;
      const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
@@ -1816,7 +1836,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
   */
  
  STATIC char *
-S_skipspace(pTHX_ register char *s)
+S_skipspace(pTHX_ char *s)
  {
  #ifdef PERL_MAD
      char *start = s;
@@ -2018,11 +2038,6 @@ S_force_next(pTHX_ I32 type)
         tokereport(type, &NEXTVAL_NEXTTOKE);
      }
  #endif
-    /* Don’t let opslab_force_free snatch it */
-    if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
-       assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
-       NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
-    }  
  #ifdef PERL_MAD
      if (PL_curforce < 0)
         start_force(PL_lasttoke);
@@ -2095,7 +2110,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
   */
  
  STATIC char *
-S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
+S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
  {
      dVAR;
      char *s;
@@ -2144,14 +2159,14 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
   */
  
  STATIC void
-S_force_ident(pTHX_ register const char *s, int kind)
+S_force_ident(pTHX_ const char *s, int kind)
  {
      dVAR;
  
      PERL_ARGS_ASSERT_FORCE_IDENT;
  
-    if (*s) {
-       const STRLEN len = strlen(s);
+    if (s[0]) {
+       const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
                                                                  UTF ? SVf_UTF8 : 0));
         start_force(PL_curforce);
@@ -2639,9 +2654,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
       * interior, hence to the "}".  Finds what the name resolves to, returning
       * an SV* containing it; NULL if no valid one found */
  
-    STRLEN len;
-    const char *str;
-    const char* i = s;
      SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
  
      HV * table;
@@ -2695,76 +2707,127 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         }
      }
  
-    /* A custom translator can leave res not in UTF-8, so make sure.  XXX This
-     * can be revisited to not use utf8 for characters that don't need it when
-     * regexes don't have to be in utf8 for Unicode semantics.  If doing so,
-     * remember EBCDIC */
-    sv_utf8_upgrade(res);
-
-    /* Don't accept malformed input */
-    str = SvPV_const(res, len);
-    if (! is_utf8_string((U8 *) str, len)) {
-        yyerror("Malformed UTF-8 returned by \\N");
-        return NULL;
-    }
+    /* Here, it isn't Perl's charname handler.  We can't rely on a
+     * user-supplied handler to validate the input name.  For non-ut8 input,
+     * look to see that the first character is legal.  Then loop through the
+     * rest checking that each is a continuation */
  
      /* This code needs to be sync'ed with a regex in _charnames.pm which does
       * the same thing */
  
-    /* For non-ut8 input, look to see that the first character is an alpha,
-     * then loop through the rest checking that each is a continuation */
      if (! UTF) {
-        if (! isALPHAU(*i)) {
+        if (! isALPHAU(*s)) {
              goto bad_charname;
          }
-        else for (i = s + 1; i < e; i++) {
-            if (! isCHARNAME_CONT(*i)) {
+        s++;
+        while (s < e) {
+            if (! isCHARNAME_CONT(*s)) {
                  goto bad_charname;
              }
+            s++;
          }
      }
      else {
-        /* Similarly for utf8.  For invariants can check directly.  We accept
-         * anything above the latin1 range because it is immaterial to Perl if
-         * it is correct or not, and is expensive to check.  But it is fairly
-         * easy in the latin1 range to convert the variants into a single
-         * character and check those */
-        if (UTF8_IS_INVARIANT(*i)) {
-            if (! isALPHAU(*i)) {
+        /* Similarly for utf8.  For invariants can check directly; for other
+         * Latin1, can calculate their code point and check; otherwise  use a
+         * swash */
+        if (UTF8_IS_INVARIANT(*s)) {
+            if (! isALPHAU(*s)) {
                  goto bad_charname;
              }
-        } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
-            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
-                                                        *(i+1)))))
-            {
+            s++;
+        } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
                  goto bad_charname;
              }
+            s += 2;
          }
-        for (i = s + UTF8SKIP(s); i < e; i+= UTF8SKIP(i)) {
-            if (UTF8_IS_INVARIANT(*i)) {
-                if (isCHARNAME_CONT(*i)) continue;
-            } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
-                continue;
-            } else if (isCHARNAME_CONT(
-                        UNI_TO_NATIVE(
-                        TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
-            {
-                continue;
+        else {
+            if (! PL_utf8_charname_begin) {
+                U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+                PL_utf8_charname_begin = _core_swash_init("utf8",
+                                                        "_Perl_Charname_Begin",
+                                                        &PL_sv_undef,
+                                                        1, 0, NULL, &flags);
+            }
+            if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+                goto bad_charname;
+            }
+            s += UTF8SKIP(s);
+        }
+
+        while (s < e) {
+            if (UTF8_IS_INVARIANT(*s)) {
+                if (! isCHARNAME_CONT(*s)) {
+                    goto bad_charname;
+                }
+                s++;
+            }
+            else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
+                                                                    *(s+1)))))
+                {
+                    goto bad_charname;
+                }
+                s += 2;
+            }
+            else {
+                if (! PL_utf8_charname_continue) {
+                    U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+                    PL_utf8_charname_continue = _core_swash_init("utf8",
+                                                "_Perl_Charname_Continue",
+                                                &PL_sv_undef,
+                                                1, 0, NULL, &flags);
+                }
+                if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+                    goto bad_charname;
+                }
+                s += UTF8SKIP(s);
              }
-            goto bad_charname;
          }
      }
  
-    return res;
+    if (SvUTF8(res)) { /* Don't accept malformed input */
+        const U8* first_bad_char_loc;
+        STRLEN len;
+        const char* const str = SvPV_const(res, len);
+        if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
+            /* If warnings are on, this will print a more detailed analysis of
+             * what is wrong than the error message below */
+            utf8n_to_uvuni(first_bad_char_loc,
+                           (char *) first_bad_char_loc - str,
+                           NULL, 0);
+
+            /* We deliberately don't try to print the malformed character,
+             * which might not print very well; it also may be just the first
+             * of many malformations, so don't print what comes after it */
+            yyerror_pv(
+              Perl_form(aTHX_
+                "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
+                 (int) (e - backslash_ptr + 1), backslash_ptr,
+                 (int) ((char *) first_bad_char_loc - str), str
+              ),
+              SVf_UTF8);
+            return NULL;
+        }
+    }
  
-  bad_charname:
+    return res;
  
-    /* The e-i passed to the final %.*s makes sure that should the trailing NUL
-     * be missing that this print won't run off the end of the string */
-    yyerror(Perl_form(aTHX_
-        "Invalid character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
-        (int)(i - s + 1), s, (int)(e - i), i + 1));
-    return NULL;
+  bad_charname: {
+        int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
+
+        /* The final %.*s makes sure that should the trailing NUL be missing
+         * that this print won't run off the end of the string */
+        yyerror_pv(
+          Perl_form(aTHX_
+            "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
+            (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
+            (int)(e - s + bad_char_size), s + bad_char_size
+          ),
+          UTF ? SVf_UTF8 : 0);
+        return NULL;
+    }
  }
  
  /*
@@ -2903,6 +2966,9 @@ S_scan_const(pTHX_ char *start)
         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
      }
  
+    /* Protect sv from errors and fatal warnings. */
+    ENTER_with_name("scan_const");
+    SAVEFREESV(sv);
  
      while (s < send || dorange) {
  
@@ -2922,7 +2988,7 @@ S_scan_const(pTHX_ char *start)
  #ifdef EBCDIC
                     && !native_range
  #endif
-                   ) {
+                ) {
                     char * const c = (char*)utf8_hop((U8*)d, -1);
                     char *e = d++;
                     while (e-- > c)
@@ -2974,7 +3040,6 @@ S_scan_const(pTHX_ char *start)
  #endif
  
                  if (min > max) {
-                   SvREFCNT_dec(sv);
                     Perl_croak(aTHX_
                                "Invalid range \"%c-%c\" in transliteration operator",
                                (char)min, (char)max);
@@ -3033,7 +3098,6 @@ S_scan_const(pTHX_ char *start)
             /* range begins (ignore - as first or last char) */
             else if (*s == '-' && s+1 < send  && s != start) {
                 if (didrange) {
-                   SvREFCNT_dec(sv);
                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
                 }
                 if (has_utf8
@@ -3338,31 +3402,6 @@ S_scan_const(pTHX_ char *start)
  
                 /* Here it looks like a named character */
  
-               if (PL_lex_inpat) {
-
-                   /* XXX This block is temporary code.  \N{} implies that the
-                    * pattern is to have Unicode semantics, and therefore
-                    * currently has to be encoded in utf8.  By putting it in
-                    * utf8 now, we save a whole pass in the regular expression
-                    * compiler.  Once that code is changed so Unicode
-                    * semantics doesn't necessarily have to be in utf8, this
-                    * block should be removed.  However, the code that parses
-                    * the output of this would have to be changed to not
-                    * necessarily expect utf8 */
-                   if (!has_utf8) {
-                       SvCUR_set(sv, d - SvPVX_const(sv));
-                       SvPOK_on(sv);
-                       *d = '\0';
-                       /* See Note on sizing above.  */
-                       sv_utf8_upgrade_flags_grow(sv,
-                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                       /* 5 = '\N{' + cur char + NUL */
-                                       (STRLEN)(send - s) + 5);
-                       d = SvPVX(sv) + SvCUR(sv);
-                       has_utf8 = TRUE;
-                   }
-               }
-
                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
                                 | PERL_SCAN_DISALLOW_PREFIX;
@@ -3443,73 +3482,88 @@ S_scan_const(pTHX_ char *start)
                             * returned by charnames */
  
                             const char *str_end = str + len;
-                           STRLEN char_length;     /* cur char's byte length */
-                           STRLEN output_length;   /* and the number of bytes
-                                                      after this is translated
-                                                      into hex digits */
                             const STRLEN off = d - SvPVX_const(sv);
  
-                           /* 2 hex per byte; 2 chars for '\N'; 2 chars for
-                            * max('U+', '.'); and 1 for NUL */
-                           char hex_string[2 * UTF8_MAXBYTES + 5];
-
-                           /* Get the first character of the result. */
-                           U32 uv = utf8n_to_uvuni((U8 *) str,
-                                                   len,
-                                                   &char_length,
-                                                   UTF8_ALLOW_ANYUV);
-
-                           /* The call to is_utf8_string() above hopefully
-                            * guarantees that there won't be an error.  But
-                            * it's easy here to make sure.  The function just
-                            * above warns and returns 0 if invalid utf8, but
-                            * it can also return 0 if the input is validly a
-                            * NUL. Disambiguate */
-                           if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
-                               uv = UNICODE_REPLACEMENT;
-                           }
-
-                           /* Convert first code point to hex, including the
-                            * boiler plate before it.  For all these, we
-                            * convert to native format so that downstream code
-                            * can continue to assume the input is native */
-                           output_length =
-                               my_snprintf(hex_string, sizeof(hex_string),
-                                           "\\N{U+%X",
-                                           (unsigned int) UNI_TO_NATIVE(uv));
-
-                           /* Make sure there is enough space to hold it */
-                           d = off + SvGROW(sv, off
-                                                + output_length
-                                                + (STRLEN)(send - e)
-                                                + 2);  /* '}' + NUL */
-                           /* And output it */
-                           Copy(hex_string, d, output_length, char);
-                           d += output_length;
-
-                           /* For each subsequent character, append dot and
-                            * its ordinal in hex */
-                           while ((str += char_length) < str_end) {
-                               const STRLEN off = d - SvPVX_const(sv);
-                               U32 uv = utf8n_to_uvuni((U8 *) str,
-                                                       str_end - str,
-                                                       &char_length,
-                                                       UTF8_ALLOW_ANYUV);
-                               if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
-                                   uv = UNICODE_REPLACEMENT;
-                               }
-
-                               output_length =
-                                   my_snprintf(hex_string, sizeof(hex_string),
-                                           ".%X",
-                                           (unsigned int) UNI_TO_NATIVE(uv));
-
-                               d = off + SvGROW(sv, off
-                                                    + output_length
-                                                    + (STRLEN)(send - e)
-                                                    + 2);      /* '}' +  NUL */
-                               Copy(hex_string, d, output_length, char);
-                               d += output_length;
+                            if (! SvUTF8(res)) {
+                                /* For the non-UTF-8 case, we can determine the
+                                 * exact length needed without having to parse
+                                 * through the string.  Each character takes up
+                                 * 2 hex digits plus either a trailing dot or
+                                 * the "}" */
+                                d = off + SvGROW(sv, off
+                                                    + 3 * len
+                                                    + 6 /* For the "\N{U+", and
+                                                           trailing NUL */
+                                                    + (STRLEN)(send - e));
+                                Copy("\\N{U+", d, 5, char);
+                                d += 5;
+                                while (str < str_end) {
+                                    char hex_string[4];
+                                    my_snprintf(hex_string, sizeof(hex_string),
+                                                "%02X.", (U8) *str);
+                                    Copy(hex_string, d, 3, char);
+                                    d += 3;
+                                    str++;
+                                }
+                                d--;    /* We will overwrite below the final
+                                           dot with a right brace */
+                            }
+                            else {
+                                STRLEN char_length; /* cur char's byte length */
+
+                                /* and the number of bytes after this is
+                                 * translated into hex digits */
+                                STRLEN output_length;
+
+                                /* 2 hex per byte; 2 chars for '\N'; 2 chars
+                                 * for max('U+', '.'); and 1 for NUL */
+                                char hex_string[2 * UTF8_MAXBYTES + 5];
+
+                                /* Get the first character of the result. */
+                                U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                        len,
+                                                        &char_length,
+                                                        UTF8_ALLOW_ANYUV);
+                                /* Convert first code point to hex, including
+                                 * the boiler plate before it.  For all these,
+                                 * we convert to native format so that
+                                 * downstream code can continue to assume the
+                                 * input is native */
+                                output_length =
+                                    my_snprintf(hex_string, sizeof(hex_string),
+                                            "\\N{U+%X",
+                                            (unsigned int) UNI_TO_NATIVE(uv));
+
+                                /* Make sure there is enough space to hold it */
+                                d = off + SvGROW(sv, off
+                                                    + output_length
+                                                    + (STRLEN)(send - e)
+                                                    + 2);      /* '}' + NUL */
+                                /* And output it */
+                                Copy(hex_string, d, output_length, char);
+                                d += output_length;
+
+                                /* For each subsequent character, append dot and
+                                * its ordinal in hex */
+                                while ((str += char_length) < str_end) {
+                                    const STRLEN off = d - SvPVX_const(sv);
+                                    U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                            str_end - str,
+                                                            &char_length,
+                                                            UTF8_ALLOW_ANYUV);
+                                    output_length =
+                                        my_snprintf(hex_string,
+                                            sizeof(hex_string),
+                                            ".%X",
+                                            (unsigned int) UNI_TO_NATIVE(uv));
+
+                                    d = off + SvGROW(sv, off
+                                                        + output_length
+                                                        + (STRLEN)(send - e)
+                                                        + 2);  /* '}' +  NUL */
+                                    Copy(hex_string, d, output_length, char);
+                                    d += output_length;
+                                }
                             }
  
                             *d++ = '}'; /* Done.  Add the trailing brace */
@@ -3670,6 +3724,7 @@ S_scan_const(pTHX_ char *start)
  
      /* return the substring (via pl_yylval) only if we parsed anything */
      if (s > PL_bufptr) {
+       SvREFCNT_inc_simple_void_NN(sv);
         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
             const char *const key = PL_lex_inpat ? "qr" : "q";
             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
@@ -3694,8 +3749,8 @@ S_scan_const(pTHX_ char *start)
                                 type, typelen);
         }
         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
-    } else
-       SvREFCNT_dec(sv);
+    }
+    LEAVE_with_name("scan_const");
      return s;
  }
  
@@ -3721,7 +3776,7 @@ S_scan_const(pTHX_ char *start)
  /* This is the one truly awful dwimmer necessary to conflate C and sed. */
  
  STATIC int
-S_intuit_more(pTHX_ register char *s)
+S_intuit_more(pTHX_ char *s)
  {
      dVAR;
  
@@ -4170,7 +4225,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
  }
  
  STATIC char *
-S_filter_gets(pTHX_ register SV *sv, STRLEN append)
+S_filter_gets(pTHX_ SV *sv, STRLEN append)
  {
      dVAR;
  
@@ -4610,8 +4665,6 @@ Perl_yylex(pTHX)
                     PL_lex_allbrackets--;
                 next_type &= 0xffff;
             }
-           if (S_is_opval_token(next_type) && pl_yylval.opval)
-               pl_yylval.opval->op_savefree = 0; /* release */
             return REPORT(next_type == 'p' ? pending_ident() : next_type);
         }
  
@@ -4653,9 +4706,11 @@ Perl_yylex(pTHX)
  #ifdef PERL_MAD
             while (PL_bufptr != PL_bufend &&
               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
-               if (!PL_thiswhite)
+               if (PL_madskills) {
+                 if (!PL_thiswhite)
                     PL_thiswhite = newSVpvs("");
-               sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+               }
                 PL_bufptr += 2;
             }
  #else
@@ -4671,9 +4726,11 @@ Perl_yylex(pTHX)
             s = PL_bufptr + 1;
             if (s[1] == '\\' && s[2] == 'E') {
  #ifdef PERL_MAD
-               if (!PL_thiswhite)
+               if (PL_madskills) {
+                 if (!PL_thiswhite)
                     PL_thiswhite = newSVpvs("");
-               sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+               }
  #endif
                 PL_bufptr = s + 3;
                 PL_lex_state = LEX_INTERPCONCAT;
@@ -5331,9 +5388,11 @@ Perl_yylex(pTHX)
      case ' ': case '\t': case '\f': case 013:
  #ifdef PERL_MAD
         PL_realtokenstart = -1;
-       if (!PL_thiswhite)
+       if (PL_madskills) {
+         if (!PL_thiswhite)
             PL_thiswhite = newSVpvs("");
-       sv_catpvn(PL_thiswhite, s, 1);
+         sv_catpvn(PL_thiswhite, s, 1);
+       }
  #endif
         s++;
         goto retry;
@@ -6047,7 +6106,7 @@ Perl_yylex(pTHX)
         force_next(formbrack ? '.' : '}');
         if (formbrack) LEAVE;
  #ifdef PERL_MAD
-       if (!PL_thistoken)
+       if (PL_madskills && !PL_thistoken)
             PL_thistoken = newSVpvs("");
  #endif
         if (formbrack == 2) { /* means . where arguments were expected */
@@ -7963,6 +8022,9 @@ Perl_yylex(pTHX)
                                   "Experimental \"%s\" subs not enabled",
                                    tmp == KEY_my    ? "my"    :
                                    tmp == KEY_state ? "state" : "our");
+                   Perl_ck_warner_d(aTHX_
+                       packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
+                       "The lexical_subs feature is experimental");
                     goto really_sub;
                 }
                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
@@ -8385,7 +8447,9 @@ Perl_yylex(pTHX)
                 SV *tmpwhite = 0;
  
                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
-               SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
+               SV *subtoken = PL_madskills
+                  ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
+                  : NULL;
                 PL_thistoken = 0;
  
                 d = s;
@@ -8960,6 +9024,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
      dVAR; dSP;
      HV * table = GvHV(PL_hintgv);               /* ^H */
      SV *res;
+    SV *errsv = NULL;
      SV **cvp;
      SV *cv, *typesv;
      const char *why1 = "", *why2 = "", *why3 = "";
@@ -9010,9 +9075,12 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
              why3 = "} is not defined";
          report:
              if (strEQ(key,"charnames")) {
-                msg = Perl_newSVpvf(aTHX_
-                        /* The +3 is for '\N{'; -4 for that, plus '}' */
-                        "Unknown charname '%.*s'", (int)typelen - 4, type + 3);
+                yyerror_pv(Perl_form(aTHX_
+                            /* The +3 is for '\N{'; -4 for that, plus '}' */
+                            "Unknown charname '%.*s'", (int)typelen - 4, type + 3
+                           ),
+                           UTF ? SVf_UTF8 : 0);
+                return sv;
              }
              else {
                  msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
@@ -9050,11 +9118,11 @@ now_ok:
      SPAGAIN ;
  
      /* Check the eval first */
-    if (!PL_in_eval && SvTRUE(ERRSV)) {
+    if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
         STRLEN errlen;
         const char * errstr;
-       sv_catpvs(ERRSV, "Propagated");
-       errstr = SvPV_const(ERRSV, errlen);
+       sv_catpvs(errsv, "Propagated");
+       errstr = SvPV_const(errsv, errlen);
         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
         (void)POPs;
         res = SvREFCNT_inc_simple(sv);
@@ -9084,7 +9152,7 @@ now_ok:
     *slp
     */
  STATIC char *
-S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
  {
      dVAR;
      char *d = dest;
@@ -9127,7 +9195,7 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
  }
  
  STATIC char *
-S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
+S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
  {
      dVAR;
      char *bracket = NULL;
@@ -9711,7 +9779,7 @@ S_scan_trans(pTHX_ char *start)
  */
  
  STATIC char *
-S_scan_heredoc(pTHX_ register char *s)
+S_scan_heredoc(pTHX_ char *s)
  {
      dVAR;
      I32 op_type = OP_SCALAR;
@@ -10290,7 +10358,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
      s += termlen;
  #ifdef PERL_MAD
      tstart = SvPVX(PL_linestr) + stuffstart;
-    if (!PL_thisopen && !keep_delims) {
+    if (PL_madskills && !PL_thisopen && !keep_delims) {
         PL_thisopen = newSVpvn(tstart, s - tstart);
         stuffstart = s - SvPVX(PL_linestr);
      }
@@ -10934,7 +11002,7 @@ vstring:
  }
  
  STATIC char *
-S_scan_formline(pTHX_ register char *s)
+S_scan_formline(pTHX_ char *s)
  {
      dVAR;
      char *eol;
@@ -11202,9 +11270,10 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
      else
         qerror(msg);
      if (PL_error_count >= 10) {
-       if (PL_in_eval && SvCUR(ERRSV))
+       SV * errsv;
+       if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
-                      SVfARG(ERRSV), OutCopFILE(PL_curcop));
+                      SVfARG(errsv), OutCopFILE(PL_curcop));
         else
             Perl_croak(aTHX_ "%s has too many errors.\n",
              OutCopFILE(PL_curcop));