This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Bump the version of Module-CoreList in Maintainers.pl
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 72fc10b..544cd1a 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -583,7 +583,7 @@ S_missingterm(pTHX_ char *s)
        ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
            && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 /* The longest string we pass in.  */
-#define MAX_FEATURE_LEN (sizeof("switch")-1)
+#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 
 /*
  * S_feature_is_enabled
@@ -714,8 +714,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 
     if (!len) {
        parser->linestr = newSVpvs("\n;");
-    } else if (SvREADONLY(line) || s[len-1] != ';') {
-       parser->linestr = newSVsv(line);
+    } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
+       /* avoid tie/overload weirdness */
+       parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
        if (s[len-1] != ';')
            sv_catpvs(parser->linestr, "\n;");
     } else {
@@ -914,7 +915,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
 }
 
 /*
-=for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
+=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 
 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -936,8 +937,9 @@ function is more convenient.
 */
 
 void
-Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
+Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 {
+    dVAR;
     char *bufptr;
     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
     if (flags & ~(LEX_STUFF_UTF8))
@@ -947,7 +949,7 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
            goto plain_copy;
        } else {
            STRLEN highhalf = 0;
-           char *p, *e = pv+len;
+           const char *p, *e = pv+len;
            for (p = pv; p != e; p++)
                highhalf += !!(((U8)*p) & 0x80);
            if (!highhalf)
@@ -955,6 +957,8 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
            bufptr = PL_parser->bufptr;
            Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr,
+               SvCUR(PL_parser->linestr) + len+highhalf);
            PL_parser->bufend += len+highhalf;
            for (p = pv; p != e; p++) {
                U8 c = (U8)*p;
@@ -969,7 +973,7 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
     } else {
        if (flags & LEX_STUFF_UTF8) {
            STRLEN highhalf = 0;
-           char *p, *e = pv+len;
+           const char *p, *e = pv+len;
            for (p = pv; p != e; p++) {
                U8 c = (U8)*p;
                if (c >= 0xc4) {
@@ -993,6 +997,8 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
            bufptr = PL_parser->bufptr;
            Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr,
+               SvCUR(PL_parser->linestr) + len-highhalf);
            PL_parser->bufend += len-highhalf;
            for (p = pv; p != e; p++) {
                U8 c = (U8)*p;
@@ -1008,6 +1014,7 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
            bufptr = PL_parser->bufptr;
            Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
            PL_parser->bufend += len;
            Copy(pv, bufptr, len, char);
        }
@@ -1123,7 +1130,7 @@ it is not permitted to discard text that has yet to be lexed.
 Normally it is not necessarily to do this directly, because it suffices to
 use the implicit discarding behaviour of L</lex_next_chunk> and things
 based on it.  However, if a token stretches across multiple lines,
-and the lexing code has kept multiple lines of text in the buffer fof
+and the lexing code has kept multiple lines of text in the buffer for
 that purpose, then after completion of the token it would be wise to
 explicitly discard the now-unneeded earlier lines, to avoid future
 multi-line tokens growing the buffer without bound.
@@ -1201,9 +1208,6 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
     bool got_some;
     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
-#ifdef PERL_MAD
-    flags |= LEX_KEEP_PREVIOUS;
-#endif /* PERL_MAD */
     linestr = PL_parser->linestr;
     buf = SvPVX(linestr);
     if (!(flags & LEX_KEEP_PREVIOUS) &&
@@ -1305,6 +1309,7 @@ is encountered, an exception is generated.
 I32
 Perl_lex_peek_unichar(pTHX_ U32 flags)
 {
+    dVAR;
     char *s, *bufend;
     if (flags & ~(LEX_KEEP_PREVIOUS))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
@@ -1404,12 +1409,14 @@ chunk will not be discarded.
 =cut
 */
 
+#define LEX_NO_NEXT_CHUNK 0x80000000
+
 void
 Perl_lex_read_space(pTHX_ U32 flags)
 {
     char *s, *bufend;
     bool need_incline = 0;
-    if (flags & ~(LEX_KEEP_PREVIOUS))
+    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
 #ifdef PERL_MAD
     if (PL_skipwhite) {
@@ -1442,6 +1449,8 @@ Perl_lex_read_space(pTHX_ U32 flags)
            if (PL_madskills)
                sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
 #endif /* PERL_MAD */
+           if (flags & LEX_NO_NEXT_CHUNK)
+               break;
            PL_parser->bufptr = s;
            CopLINE_inc(PL_curcop);
            got_more = lex_next_chunk(flags);
@@ -1717,20 +1726,12 @@ S_skipspace(pTHX_ register char *s)
     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
        while (s < PL_bufend && SPACE_OR_TAB(*s))
            s++;
-    } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
-       while (isSPACE(*s) && *s != '\n')
-           s++;
-       if (*s == '#') {
-           do {
-               s++;
-           } while (s != PL_bufend && *s != '\n');
-       }
-       if (*s == '\n')
-           s++;
     } else {
        STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
        PL_bufptr = s;
-       lex_read_space(LEX_KEEP_PREVIOUS);
+       lex_read_space(LEX_KEEP_PREVIOUS |
+               (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+                   LEX_NO_NEXT_CHUNK : 0));
        s = PL_bufptr;
        PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
        if (PL_linestart > PL_bufptr)
@@ -2098,9 +2099,15 @@ S_force_version(pTHX_ char *s, int guessing)
            curmad('X', newSVpvn(s,d-s));
        }
 #endif
-        if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
+        if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
            SV *ver;
+#ifdef USE_LOCALE_NUMERIC
+           char *loc = setlocale(LC_NUMERIC, "C");
+#endif
             s = scan_num(s, &pl_yylval);
+#ifdef USE_LOCALE_NUMERIC
+           setlocale(LC_NUMERIC, loc);
+#endif
             version = pl_yylval.opval;
            ver = cSVOPx(version)->op_sv;
            if (SvPOK(ver) && !SvNIOK(ver)) {
@@ -2137,6 +2144,55 @@ S_force_version(pTHX_ char *s, int guessing)
 }
 
 /*
+ * S_force_strict_version
+ * Forces the next token to be a version number using strict syntax rules.
+ */
+
+STATIC char *
+S_force_strict_version(pTHX_ char *s)
+{
+    dVAR;
+    OP *version = NULL;
+#ifdef PERL_MAD
+    I32 startoff = s - SvPVX(PL_linestr);
+#endif
+    const char *errstr = NULL;
+
+    PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
+
+    while (isSPACE(*s)) /* leading whitespace */
+       s++;
+
+    if (is_STRICT_VERSION(s,&errstr)) {
+       SV *ver = newSV(0);
+       s = (char *)scan_version(s, ver, 0);
+       version = newSVOP(OP_CONST, 0, ver);
+    }
+    else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
+           (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
+    {
+       PL_bufptr = s;
+       if (errstr)
+           yyerror(errstr); /* version required */
+       return s;
+    }
+
+#ifdef PERL_MAD
+    if (PL_madskills && !version) {
+       sv_free(PL_nextwhite);  /* let next token collect whitespace */
+       PL_nextwhite = 0;
+       s = SvPVX(PL_linestr) + startoff;
+    }
+#endif
+    /* NOTE: The parser sees the package name and the VERSION swapped */
+    start_force(PL_curforce);
+    NEXTVAL_NEXTTOKE.opval = version;
+    force_next(WORD);
+
+    return s;
+}
+
+/*
  * S_tokeq
  * Tokenize a quoted string passed in as an SV.  It finds the next
  * chunk, up to end of string or a backslash.  It may make a new
@@ -2425,10 +2481,7 @@ S_sublex_done(pTHX)
 
   In patterns:
     backslashes:
-      double-quoted style: \r and \n
-      regexp special ones: \D \s
-      constants: \x31
-      backrefs: \1
+      constants: \N{NAME} only
       case and quoting: \U \Q \E
     stops on @ and $, but not for $ as tail anchor
 
@@ -2442,7 +2495,7 @@ S_sublex_done(pTHX)
   In double-quoted strings:
     backslashes:
       double-quoted style: \r and \n
-      constants: \x31
+      constants: \x31, etc.
       deprecated backrefs: \1 (in substitution replacements)
       case and quoting: \U \Q \E
     stops on @ and $
@@ -2470,14 +2523,14 @@ S_sublex_done(pTHX)
          check for embedded arrays
          check for embedded scalars
          if (backslash) {
-             leave intact backslashes from leaveit (below)
              deprecate \1 in substitution replacements
              handle string-changing backslashes \l \U \Q \E, etc.
              switch (what was escaped) {
                  handle \- in a transliteration (becomes a literal -)
+                 if a pattern and not \N{, go treat as regular character
                  handle \132 (octal characters)
                  handle \x15 and \x{1234} (hex characters)
-                 handle \N{name} (named characters)
+                 handle \N{name} (named characters, also \N{3,5} in a pattern)
                  handle \cV (control characters)
                  handle printf-style backslashes (\f, \r, \n, etc)
              } (end switch)
@@ -2535,6 +2588,7 @@ S_scan_const(pTHX_ char *start)
 
 
     while (s < send || dorange) {
+
         /* get transliterations out of the way (they're most literal) */
        if (PL_lex_inwhat == OP_TRANS) {
            /* expand a range A-Z to the full set of characters.  AIE! */
@@ -2754,9 +2808,12 @@ S_scan_const(pTHX_ char *start)
 
        /* backslashes */
        if (*s == '\\' && s+1 < send) {
+           char* e;    /* Can be used for ending '}', etc. */
+
            s++;
 
-           /* deprecate \1 in strings and substitution replacements */
+           /* warn on \1 - \9 in substitution replacements, but note that \11
+            * is an octal; and \19 is \1 followed by '9' */
            if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
                isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
            {
@@ -2770,13 +2827,28 @@ S_scan_const(pTHX_ char *start)
                --s;
                break;
            }
-           /* skip any other backslash escapes in a pattern */
-           else if (PL_lex_inpat) {
+           /* In a pattern, process \N, but skip any other backslash escapes.
+            * This is because we don't want to translate an escape sequence
+            * into a meta symbol and have the regex compiler use the meta
+            * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
+            * in spite of this, we do have to process \N here while the proper
+            * charnames handler is in scope.  See bugs #56444 and #62056.
+            * There is a complication because \N in a pattern may also stand
+            * for 'match a non-nl', and not mean a charname, in which case its
+            * processing should be deferred to the regex compiler.  To be a
+            * charname it must be followed immediately by a '{', and not look
+            * like \N followed by a curly quantifier, i.e., not something like
+            * \N{3,}.  regcurly returns a boolean indicating if it is a legal
+            * quantifier */
+           else if (PL_lex_inpat
+                   && (*s != 'N'
+                       || s[1] != '{'
+                       || regcurly(s + 1)))
+           {
                *d++ = NATIVE_TO_NEED(has_utf8,'\\');
                goto default_action;
            }
 
-           /* if we get here, it's either a quoted -, or a digit */
            switch (*s) {
 
            /* quoted - in transliterations */
@@ -2796,7 +2868,7 @@ S_scan_const(pTHX_ char *start)
                    goto default_action;
                }
 
-           /* eg. \132 indicates the octal constant 0x132 */
+           /* eg. \132 indicates the octal constant 0132 */
            case '0': case '1': case '2': case '3':
            case '4': case '5': case '6': case '7':
                {
@@ -2807,6 +2879,21 @@ S_scan_const(pTHX_ char *start)
                }
                goto NUM_ESCAPE_INSERT;
 
+           /* eg. \o{24} indicates the octal constant \024 */
+           case 'o':
+               {
+                   STRLEN len;
+                   const char* error;
+
+                   bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
+                   s += len;
+                   if (! valid) {
+                       yyerror(error);
+                       continue;
+                   }
+                   goto NUM_ESCAPE_INSERT;
+               }
+
            /* eg. \x24 indicates the hex constant 0x24 */
            case 'x':
                ++s;
@@ -2835,15 +2922,13 @@ S_scan_const(pTHX_ char *start)
                }
 
              NUM_ESCAPE_INSERT:
-               /* Insert oct, hex, or \N{U+...} escaped character.  There will
-                * always be enough room in sv since such escapes will be
-                * longer than any UTF-8 sequence they can end up as, except if
-                * they force us to recode the rest of the string into utf8 */
+               /* Insert oct or hex escaped character.  There will always be
+                * enough room in sv since such escapes will be longer than any
+                * UTF-8 sequence they can end up as, except if they force us
+                * to recode the rest of the string into utf8 */
                
                /* Here uv is the ordinal of the next character being added in
-                * unicode (converted from native).  (It has to be done before
-                * here because \N is interpreted as unicode, and oct and hex
-                * as native.) */
+                * unicode (converted from native). */
                if (!UNI_IS_INVARIANT(uv)) {
                    if (!has_utf8 && uv > 255) {
                        /* Might need to recode whatever we have accumulated so
@@ -2883,104 +2968,342 @@ S_scan_const(pTHX_ char *start)
                }
                continue;
 
-           /* \N{LATIN SMALL LETTER A} is a named character, and so is
-            * \N{U+0041} */
            case 'N':
-               ++s;
-               if (*s == '{') {
-                   char* e = strchr(s, '}');
-                   SV *res;
-                   STRLEN len;
-                   const char *str;
-
-                   if (!e) {
+               /* In a non-pattern \N must be a named character, like \N{LATIN
+                * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
+                * mean to match a non-newline.  For non-patterns, named
+                * characters are converted to their string equivalents. In
+                * patterns, named characters are not converted to their
+                * ultimate forms for the same reasons that other escapes
+                * aren't.  Instead, they are converted to the \N{U+...} form
+                * to get the value from the charnames that is in effect right
+                * now, while preserving the fact that it was a named character
+                * so that the regex compiler knows this */
+
+               /* This section of code doesn't generally use the
+                * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
+                * a close examination of this macro and determined it is a
+                * no-op except on utfebcdic variant characters.  Every
+                * character generated by this that would normally need to be
+                * enclosed by this macro is invariant, so the macro is not
+                * needed, and would complicate use of copy(). There are other
+                * parts of this file where the macro is used inconsistently,
+                * but are saved by it being a no-op */
+
+               /* The structure of this section of code (besides checking for
+                * errors and upgrading to utf8) is:
+                *  Further disambiguate between the two meanings of \N, and if
+                *      not a charname, go process it elsewhere
+                *  If of form \N{U+...}, pass it through if a pattern;
+                *      otherwise convert to utf8
+                *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
+                *  pattern; otherwise convert to utf8 */
+
+               /* Here, s points to the 'N'; the test below is guaranteed to
+                * succeed if we are being called on a pattern as we already
+                * know from a test above that the next character is a '{'.
+                * On a non-pattern \N must mean 'named sequence, which
+                * requires braces */
+               s++;
+               if (*s != '{') {
+                   yyerror("Missing braces on \\N{}"); 
+                   continue;
+               }
+               s++;
+
+               /* If there is no matching '}', it is an error. */
+               if (! (e = strchr(s, '}'))) {
+                   if (! PL_lex_inpat) {
                        yyerror("Missing right brace on \\N{}");
-                       e = s - 1;
-                       goto cont_scan;
-                   }
-                   if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
-                       /* \N{U+...} The ... is a unicode value even on EBCDIC
-                        * machines */
-                       I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
-                         PERL_SCAN_DISALLOW_PREFIX;
-                       s += 3;
-                       len = e - s;
-                       uv = grok_hex(s, &len, &flags, NULL);
-                       if ( e > s && len != (STRLEN)(e - s) ) {
-                           uv = 0xFFFD;
-                       }
-                       s = e + 1;
-                       goto NUM_ESCAPE_INSERT;
+                   } else {
+                       yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
                    }
-                   res = newSVpvn(s + 1, e - s - 1);
-                   res = new_constant( NULL, 0, "charnames",
-                                       res, NULL, s - 2, e - s + 3 );
-                   if (has_utf8)
-                       sv_utf8_upgrade(res);
-                   str = SvPV_const(res,len);
-#ifdef EBCDIC_NEVER_MIND
-                   /* charnames uses pack U and that has been
-                    * recently changed to do the below uni->native
-                    * mapping, so this would be redundant (and wrong,
-                    * the code point would be doubly converted).
-                    * But leave this in just in case the pack U change
-                    * gets revoked, but the semantics is still
-                    * desireable for charnames. --jhi */
-                   {
-                        UV uv = utf8_to_uvchr((const U8*)str, 0);
+                   continue;
+               }
 
-                        if (uv < 0x100) {
-                             U8 tmpbuf[UTF8_MAXBYTES+1], *d;
+               /* Here it looks like a named character */
 
-                             d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
-                             sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
-                             str = SvPV_const(res, len);
-                        }
-                   }
-#endif
-                   /* If destination is not in utf8 but this new character is,
-                    * recode the dest to utf8 */
-                   if (!has_utf8 && SvUTF8(res)) {
+               if (PL_lex_inpat) {
+
+                   /* XXX This block is temporary code.  \N{} implies that the
+                    * pattern is to have Unicode semantics, and therefore
+                    * currently has to be encoded in utf8.  By putting it in
+                    * utf8 now, we save a whole pass in the regular expression
+                    * compiler.  Once that code is changed so Unicode
+                    * semantics doesn't necessarily have to be in utf8, this
+                    * block should be removed */
+                   if (!has_utf8) {
                        SvCUR_set(sv, d - SvPVX_const(sv));
                        SvPOK_on(sv);
                        *d = '\0';
                        /* See Note on sizing above.  */
                        sv_utf8_upgrade_flags_grow(sv,
-                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                           len + (STRLEN)(send - s) + 1);
+                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                       /* 5 = '\N{' + cur char + NUL */
+                                       (STRLEN)(send - s) + 5);
                        d = SvPVX(sv) + SvCUR(sv);
                        has_utf8 = TRUE;
-                   } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
+                   }
+               }
+
+               if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
+                   I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+                               | PERL_SCAN_DISALLOW_PREFIX;
+                   STRLEN len;
 
-                       /* See Note on sizing above.  (NOTE: SvCUR() is not set
-                        * correctly here). */
-                       const STRLEN off = d - SvPVX_const(sv);
-                       d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
+                   /* For \N{U+...}, the '...' is a unicode value even on
+                    * EBCDIC machines */
+                   s += 2;         /* Skip to next char after the 'U+' */
+                   len = e - s;
+                   uv = grok_hex(s, &len, &flags, NULL);
+                   if (len == 0 || len != (STRLEN)(e - s)) {
+                       yyerror("Invalid hexadecimal number in \\N{U+...}");
+                       s = e + 1;
+                       continue;
                    }
+
+                   if (PL_lex_inpat) {
+
+                       /* Pass through to the regex compiler unchanged.  The
+                        * reason we evaluated the number above is to make sure
+                        * there wasn't a syntax error. */
+                       s -= 5;     /* Include the '\N{U+' */
+                       Copy(s, d, e - s + 1, char);    /* 1 = include the } */
+                       d += e - s + 1;
+                   }
+                   else {  /* Not a pattern: convert the hex to string */
+
+                        /* If destination is not in utf8, unconditionally
+                         * recode it to be so.  This is because \N{} implies
+                         * Unicode semantics, and scalars have to be in utf8
+                         * to guarantee those semantics */
+                       if (! has_utf8) {
+                           SvCUR_set(sv, d - SvPVX_const(sv));
+                           SvPOK_on(sv);
+                           *d = '\0';
+                           /* See Note on sizing above.  */
+                           sv_utf8_upgrade_flags_grow(
+                                       sv,
+                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                       UNISKIP(uv) + (STRLEN)(send - e) + 1);
+                           d = SvPVX(sv) + SvCUR(sv);
+                           has_utf8 = TRUE;
+                       }
+
+                       /* Add the string to the output */
+                       if (UNI_IS_INVARIANT(uv)) {
+                           *d++ = (char) uv;
+                       }
+                       else d = (char*)uvuni_to_utf8((U8*)d, uv);
+                   }
+               }
+               else { /* Here is \N{NAME} but not \N{U+...}. */
+
+                   SV *res;            /* result from charnames */
+                   const char *str;    /* the string in 'res' */
+                   STRLEN len;         /* its length */
+
+                   /* Get the value for NAME */
+                   res = newSVpvn(s, e - s);
+                   res = new_constant( NULL, 0, "charnames",
+                                       /* includes all of: \N{...} */
+                                       res, NULL, s - 3, e - s + 4 );
+
+                   /* Most likely res will be in utf8 already since the
+                    * standard charnames uses pack U, but a custom translator
+                    * can leave it otherwise, so make sure.  XXX This can be
+                    * revisited to not have charnames use utf8 for characters
+                    * that don't need it when regexes don't have to be in utf8
+                    * for Unicode semantics.  If doing so, remember EBCDIC */
+                   sv_utf8_upgrade(res);
+                   str = SvPV_const(res, len);
+
+                   /* Don't accept malformed input */
+                   if (! is_utf8_string((U8 *) str, len)) {
+                       yyerror("Malformed UTF-8 returned by \\N");
+                   }
+                   else if (PL_lex_inpat) {
+
+                       if (! len) { /* The name resolved to an empty string */
+                           Copy("\\N{}", d, 4, char);
+                           d += 4;
+                       }
+                       else {
+                           /* In order to not lose information for the regex
+                           * compiler, pass the result in the specially made
+                           * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
+                           * the code points in hex of each character
+                           * returned by charnames */
+
+                           const char *str_end = str + len;
+                           STRLEN char_length;     /* cur char's byte length */
+                           STRLEN output_length;   /* and the number of bytes
+                                                      after this is translated
+                                                      into hex digits */
+                           const STRLEN off = d - SvPVX_const(sv);
+
+                           /* 2 hex per byte; 2 chars for '\N'; 2 chars for
+                            * max('U+', '.'); and 1 for NUL */
+                           char hex_string[2 * UTF8_MAXBYTES + 5];
+
+                           /* Get the first character of the result. */
+                           U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                   len,
+                                                   &char_length,
+                                                   UTF8_ALLOW_ANYUV);
+
+                           /* The call to is_utf8_string() above hopefully
+                            * guarantees that there won't be an error.  But
+                            * it's easy here to make sure.  The function just
+                            * above warns and returns 0 if invalid utf8, but
+                            * it can also return 0 if the input is validly a
+                            * NUL. Disambiguate */
+                           if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
+                               uv = UNICODE_REPLACEMENT;
+                           }
+
+                           /* Convert first code point to hex, including the
+                            * boiler plate before it */
+                           sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
+                           output_length = strlen(hex_string);
+
+                           /* Make sure there is enough space to hold it */
+                           d = off + SvGROW(sv, off
+                                                + output_length
+                                                + (STRLEN)(send - e)
+                                                + 2);  /* '}' + NUL */
+                           /* And output it */
+                           Copy(hex_string, d, output_length, char);
+                           d += output_length;
+
+                           /* For each subsequent character, append dot and
+                            * its ordinal in hex */
+                           while ((str += char_length) < str_end) {
+                               const STRLEN off = d - SvPVX_const(sv);
+                               U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                       str_end - str,
+                                                       &char_length,
+                                                       UTF8_ALLOW_ANYUV);
+                               if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
+                                   uv = UNICODE_REPLACEMENT;
+                               }
+
+                               sprintf(hex_string, ".%X", (unsigned int) uv);
+                               output_length = strlen(hex_string);
+
+                               d = off + SvGROW(sv, off
+                                                    + output_length
+                                                    + (STRLEN)(send - e)
+                                                    + 2);      /* '}' +  NUL */
+                               Copy(hex_string, d, output_length, char);
+                               d += output_length;
+                           }
+
+                           *d++ = '}'; /* Done.  Add the trailing brace */
+                       }
+                   }
+                   else { /* Here, not in a pattern.  Convert the name to a
+                           * string. */
+
+                        /* If destination is not in utf8, unconditionally
+                         * recode it to be so.  This is because \N{} implies
+                         * Unicode semantics, and scalars have to be in utf8
+                         * to guarantee those semantics */
+                       if (! has_utf8) {
+                           SvCUR_set(sv, d - SvPVX_const(sv));
+                           SvPOK_on(sv);
+                           *d = '\0';
+                           /* See Note on sizing above.  */
+                           sv_utf8_upgrade_flags_grow(sv,
+                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                               len + (STRLEN)(send - s) + 1);
+                           d = SvPVX(sv) + SvCUR(sv);
+                           has_utf8 = TRUE;
+                       } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
+
+                           /* See Note on sizing above.  (NOTE: SvCUR() is not
+                            * set correctly here). */
+                           const STRLEN off = d - SvPVX_const(sv);
+                           d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
+                       }
+                       Copy(str, d, len, char);
+                       d += len;
+                   }
+                   SvREFCNT_dec(res);
+
+                   /* Deprecate non-approved name syntax */
+                   if (ckWARN_d(WARN_DEPRECATED)) {
+                       bool problematic = FALSE;
+                       char* i = s;
+
+                       /* For non-ut8 input, look to see that the first
+                        * character is an alpha, then loop through the rest
+                        * checking that each is a continuation */
+                       if (! this_utf8) {
+                           if (! isALPHAU(*i)) problematic = TRUE;
+                           else for (i = s + 1; i < e; i++) {
+                               if (isCHARNAME_CONT(*i)) continue;
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       else {
+                           /* Similarly for utf8.  For invariants can check
+                            * directly.  We accept anything above the latin1
+                            * range because it is immaterial to Perl if it is
+                            * correct or not, and is expensive to check.  But
+                            * it is fairly easy in the latin1 range to convert
+                            * the variants into a single character and check
+                            * those */
+                           if (UTF8_IS_INVARIANT(*i)) {
+                               if (! isALPHAU(*i)) problematic = TRUE;
+                           } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                               if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+                                                                           *(i+1)))))
+                               {
+                                   problematic = TRUE;
+                               }
+                           }
+                           if (! problematic) for (i = s + UTF8SKIP(s);
+                                                   i < e;
+                                                   i+= UTF8SKIP(i))
+                           {
+                               if (UTF8_IS_INVARIANT(*i)) {
+                                   if (isCHARNAME_CONT(*i)) continue;
+                               } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                                   continue;
+                               } else if (isCHARNAME_CONT(
+                                           UNI_TO_NATIVE(
+                                           UTF8_ACCUMULATE(*i, *(i+1)))))
+                               {
+                                   continue;
+                               }
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       if (problematic) {
+                           /* The e-i passed to the final %.*s makes sure that
+                            * should the trailing NUL be missing that this
+                            * print won't run off the end of the string */
+                           Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                                       "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
+                                       (int)(i - s + 1), s, (int)(e - i), i + 1);
+                       }
+                   }
+               } /* End \N{NAME} */
 #ifdef EBCDIC
-                   if (!dorange)
-                       native_range = FALSE; /* \N{} is guessed to be Unicode */
+               if (!dorange) 
+                   native_range = FALSE; /* \N{} is defined to be Unicode */
 #endif
-                   Copy(str, d, len, char);
-                   d += len;
-                   SvREFCNT_dec(res);
-                 cont_scan:
-                   s = e + 1;
-               }
-               else
-                   yyerror("Missing braces on \\N{}");
+               s = e + 1;  /* Point to just after the '}' */
                continue;
 
            /* \c is a control character */
            case 'c':
                s++;
                if (s < send) {
-                   U8 c = *s++;
-#ifdef EBCDIC
-                   if (isLOWER(c))
-                       c = toUPPER(c);
-#endif
-                   *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
+                   *d++ = grok_bslash_c(*s++, 1);
                }
                else {
                    yyerror("Missing control char name in \\c");
@@ -3450,8 +3773,6 @@ Perl_filter_del(pTHX_ filter_t funcp)
     /* if filter is on top of stack (usual case) just pop it off */
     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
-       IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
-       IoANY(datasv) = (void *)NULL;
        sv_free(av_pop(PL_rsfp_filters));
 
         return;
@@ -3810,7 +4131,8 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
     s = SKIPSPACE1(s);
     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
        s = force_version(s, TRUE);
-       if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
+       if (*s == ';' || *s == '}'
+               || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
            start_force(PL_curforce);
            NEXTVAL_NEXTTOKE.opval = NULL;
            force_next(WORD);
@@ -4365,7 +4687,8 @@ Perl_yylex(pTHX)
                    PL_doextract = FALSE;
                }
            }
-           incline(s);
+           if (PL_rsfp)
+               incline(s);
        } while (PL_doextract);
        PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
@@ -5409,7 +5732,7 @@ Perl_yylex(pTHX)
            }
        }
 
-       if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
+       if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
            PL_tokenbuf[0] = '@';
            s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
                           sizeof PL_tokenbuf - 1, FALSE);
@@ -5643,8 +5966,6 @@ Perl_yylex(pTHX)
                    pl_yylval.ival = 0;
                OPERATOR(DOTDOT);
            }
-           if (PL_expect != XOPERATOR)
-               check_uni();
            Aop(OP_CONCAT);
        }
        /* FALL THROUGH */
@@ -5837,8 +6158,6 @@ Perl_yylex(pTHX)
        /* Is this a label? */
        if (!anydelim && PL_expect == XSTATE
              && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
-           if (tmp)
-               Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
            s = d + 1;
            pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
            CLINE;
@@ -5886,8 +6205,9 @@ Perl_yylex(pTHX)
                gvp = 0;
                if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
                    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                  "Ambiguous call resolved as CORE::%s(), %s",
-                                  GvENAME(hgv), "qualify as such or use &");
+                                  "Ambiguous call resolved as CORE::%s(), "
+                                  "qualify as such or use &",
+                                  GvENAME(hgv));
            }
        }
 
@@ -6175,10 +6495,27 @@ Perl_yylex(pTHX)
                        const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
                        if (!protolen)
                            TERM(FUNC0SUB);
-                       if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
-                           OPERATOR(UNIOPSUB);
                        while (*proto == ';')
                            proto++;
+                       if (
+                           (
+                               (
+                                   *proto == '$' || *proto == '_'
+                                || *proto == '*'
+                               )
+                            && proto[1] == '\0'
+                           )
+                        || (
+                            *proto == '\\' && proto[1] && proto[2] == '\0'
+                           )
+                       )
+                           OPERATOR(UNIOPSUB);
+                       if (*proto == '\\' && proto[1] == '[') {
+                           const char *p = proto + 2;
+                           while(*p && *p != ']')
+                               ++p;
+                           if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
+                       }
                        if (*proto == '&' && *s == '{') {
                            if (PL_curstash)
                                sv_setpvs(PL_subname, "__ANON__");
@@ -6590,8 +6927,14 @@ Perl_yylex(pTHX)
 
        case KEY_eval:
            s = SKIPSPACE1(s);
-           PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
-           UNIBRACK(OP_ENTEREVAL);
+           if (*s == '{') { /* block eval */
+               PL_expect = XTERMBLOCK;
+               UNIBRACK(OP_ENTERTRY);
+           }
+           else { /* string eval */
+               PL_expect = XTERM;
+               UNIBRACK(OP_ENTEREVAL);
+           }
 
        case KEY_eof:
            UNI(OP_EOF);
@@ -6962,7 +7305,9 @@ Perl_yylex(pTHX)
 
        case KEY_package:
            s = force_word(s,WORD,FALSE,TRUE,FALSE);
-           s = force_version(s, FALSE);
+           s = SKIPSPACE1(s);
+           s = force_strict_version(s);
+           PL_lex_expect = XBLOCK;
            OPERATOR(PACKAGE);
 
        case KEY_pipe:
@@ -7349,7 +7694,7 @@ Perl_yylex(pTHX)
                    bool must_be_last = FALSE;
                    bool underscore = FALSE;
                    bool seen_underscore = FALSE;
-                   const bool warnsyntax = ckWARN(WARN_SYNTAX);
+                   const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
 
                    s = scan_str(s,!!PL_madskills,FALSE);
                    if (!s)
@@ -7361,7 +7706,7 @@ Perl_yylex(pTHX)
                        if (!isSPACE(*p)) {
                            d[tmp++] = *p;
 
-                           if (warnsyntax) {
+                           if (warnillegalproto) {
                                if (must_be_last)
                                    proto_after_greedy_proto = TRUE;
                                if (!strchr("$@%*;[]&\\_", *p)) {
@@ -7394,11 +7739,11 @@ Perl_yylex(pTHX)
                    }
                    d[tmp] = '\0';
                    if (proto_after_greedy_proto)
-                       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                    "Prototype after '%c' for %"SVf" : %s",
                                    greedy_proto, SVfARG(PL_subname), d);
                    if (bad_proto)
-                       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                    "Illegal character %sin prototype for %"SVf" : %s",
                                    seen_underscore ? "after '_' " : "",
                                    SVfARG(PL_subname), d);
@@ -7429,7 +7774,7 @@ Perl_yylex(pTHX)
                else if (*s != '{' && key == KEY_sub) {
                    if (!have_name)
                        Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
-                   else if (*s != ';')
+                   else if (*s != ';' && *s != '}')
                        Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
                }
 
@@ -7741,28 +8086,11 @@ S_pending_ident(pTHX)
     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
                                                      tokenbuf_len - 1));
     pl_yylval.opval->op_private = OPpCONST_ENTERED;
-    gv_fetchpvn_flags(
-           PL_tokenbuf + 1, tokenbuf_len - 1,
-           /* If the identifier refers to a stash, don't autovivify it.
-            * Change 24660 had the side effect of causing symbol table
-            * hashes to always be defined, even if they were freshly
-            * created and the only reference in the entire program was
-            * the single statement with the defined %foo::bar:: test.
-            * It appears that all code in the wild doing this actually
-            * wants to know whether sub-packages have been loaded, so
-            * by avoiding auto-vivifying symbol tables, we ensure that
-            * defined %foo::bar:: continues to be false, and the existing
-            * tests still give the expected answers, even though what
-            * they're actually testing has now changed subtly.
-            */
-           (*PL_tokenbuf == '%'
-            && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
-            && d[-1] == ':'
-            ? 0
-            : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
-           ((PL_tokenbuf[0] == '$') ? SVt_PV
-            : (PL_tokenbuf[0] == '@') ? SVt_PVAV
-            : SVt_PVHV));
+    gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+                    PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
+                    ((PL_tokenbuf[0] == '$') ? SVt_PV
+                     : (PL_tokenbuf[0] == '@') ? SVt_PVAV
+                     : SVt_PVHV));
     return WORD;
 }
 
@@ -8206,7 +8534,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
           if (name[1] == 'i' &&
               name[2] == 'e')
           {                                       /* tie        */
-            return KEY_tie;
+            return -KEY_tie;
           }
 
           goto unknown;
@@ -8650,7 +8978,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
                 case 'e':
                   if (name[3] == 'd')
                   {                               /* tied       */
-                    return KEY_tied;
+                    return -KEY_tied;
                   }
 
                   goto unknown;
@@ -9145,7 +9473,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
                     {
                       case 'e':
                         {                         /* untie      */
-                          return KEY_untie;
+                          return -KEY_untie;
                         }
 
                       case 'l':
@@ -11255,6 +11583,11 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
        SvREFCNT_dec(msg);
        return sv;
     }
+
+    /* charnames doesn't work well if there have been errors found */
+    if (PL_error_count > 0 && strEQ(key,"charnames"))
+       return &PL_sv_undef;
+
     cvp = hv_fetch(table, key, keylen, FALSE);
     if (!cvp || !SvOK(*cvp)) {
        why1 = "$^H{";
@@ -11368,7 +11701,7 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
     char *bracket = NULL;
     char funny = *s++;
     register char *d = dest;
-    register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
+    register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
 
     PERL_ARGS_ASSERT_SCAN_IDENT;
 
@@ -11527,27 +11860,15 @@ static U32
 S_pmflag(U32 pmfl, const char ch) {
     switch (ch) {
        CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
-    case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
-    case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
-    case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
-    case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
+    case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
+    case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
+    case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
+    case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
+    case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
     }
     return pmfl;
 }
 
-void
-Perl_pmflag(pTHX_ U32* pmfl, int ch)
-{
-    PERL_ARGS_ASSERT_PMFLAG;
-
-    Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
-                    "Perl_pmflag() is deprecated, and will be removed from the XS API");
-
-    if (ch<256) {
-       *pmfl = S_pmflag(*pmfl, (char)ch);
-    }
-}
-
 STATIC char *
 S_scan_pat(pTHX_ char *start, I32 type)
 {
@@ -11599,6 +11920,12 @@ S_scan_pat(pTHX_ char *start, I32 type)
 #endif
     while (*s && strchr(valid_flags, *s))
        pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
+
+    if (isALNUM(*s)) {
+       Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
+           "Having no space between pattern and following word is deprecated");
+
+    }
 #ifdef PERL_MAD
     if (PL_madskills && modstart != s) {
        SV* tmptoken = newSVpvn(modstart, s - modstart);
@@ -11679,8 +12006,14 @@ S_scan_subst(pTHX_ char *start)
        }
        else if (strchr(S_PAT_MODS, *s))
            pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
-       else
+       else {
+           if (isALNUM(*s)) {
+               Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
+                   "Having no space between pattern and following word is deprecated");
+
+           }
            break;
+       }
     }
 
 #ifdef PERL_MAD
@@ -12678,11 +13011,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
            const char *base, *Base, *max;
 
            /* check for hex */
-           if (s[1] == 'x') {
+           if (s[1] == 'x' || s[1] == 'X') {
                shift = 4;
                s += 2;
                just_zero = FALSE;
-           } else if (s[1] == 'b') {
+           } else if (s[1] == 'b' || s[1] == 'B') {
                shift = 1;
                s += 2;
                just_zero = FALSE;
@@ -12796,13 +13129,12 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
            }
 
-           sv = newSV(0);
            if (overflowed) {
                if (n > 4294967295.0)
                    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
                                   "%s number > %s non-portable",
                                   Base, max);
-               sv_setnv(sv, n);
+               sv = newSVnv(n);
            }
            else {
 #if UVSIZE > 4
@@ -12811,7 +13143,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                                   "%s number > %s non-portable",
                                   Base, max);
 #endif
-               sv_setuv(sv, u);
+               sv = newSVuv(u);
            }
            if (just_zero && (PL_hints & HINT_NEW_INTEGER))
                sv = new_constant(start, s - start, "integer",
@@ -12942,9 +13274,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
        }
 
 
-       /* make an sv from the string */
-       sv = newSV(0);
-
        /*
            We try to do an integer conversion first if no characters
            indicating "float" have been found.
@@ -12956,12 +13285,12 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
             if (flags == IS_NUMBER_IN_UV) {
               if (uv <= IV_MAX)
-               sv_setiv(sv, uv); /* Prefer IVs over UVs. */
+               sv = newSViv(uv); /* Prefer IVs over UVs. */
               else
-               sv_setuv(sv, uv);
+               sv = newSVuv(uv);
             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
               if (uv <= (UV) IV_MIN)
-                sv_setiv(sv, -(IV)uv);
+                sv = newSViv(-(IV)uv);
               else
                floatit = TRUE;
             } else
@@ -12971,7 +13300,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
            /* terminate the string */
            *d = '\0';
            nv = Atof(PL_tokenbuf);
-           sv_setnv(sv, nv);
+           sv = newSVnv(nv);
        }
 
        if ( floatit
@@ -13289,17 +13618,17 @@ S_swallow_bom(pTHX_ U8 *s)
     switch (s[0]) {
     case 0xFF:
        if (s[1] == 0xFE) {
-           /* UTF-16 little-endian? (or UTF32-LE?) */
+           /* UTF-16 little-endian? (or UTF-32LE?) */
            if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
-               Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
+               Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
 #ifndef PERL_NO_UTF16_FILTER
-           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
            s += 2;
            if (PL_bufend > (char*)s) {
                s = add_utf16_textfilter(s, TRUE);
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
 #endif
        }
        break;
@@ -13312,7 +13641,7 @@ S_swallow_bom(pTHX_ U8 *s)
                s = add_utf16_textfilter(s, FALSE);
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
 #endif
        }
        break;
@@ -13327,15 +13656,19 @@ S_swallow_bom(pTHX_ U8 *s)
             if (s[1] == 0) {
                  if (s[2] == 0xFE && s[3] == 0xFF) {
                       /* UTF-32 big-endian */
-                      Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+                      Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
                  }
             }
             else if (s[2] == 0 && s[3] != 0) {
                  /* Leading bytes
                   * 00 xx 00 xx
                   * are a good indicator of UTF-16BE. */
+#ifndef PERL_NO_UTF16_FILTER
                  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
-               s = add_utf16_textfilter(s, FALSE);
+                 s = add_utf16_textfilter(s, FALSE);
+#else
+                 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
+#endif
             }
        }
 #ifdef EBCDIC
@@ -13352,8 +13685,12 @@ S_swallow_bom(pTHX_ U8 *s)
                  /* Leading bytes
                   * xx 00 xx 00
                   * are a good indicator of UTF-16LE. */
+#ifndef PERL_NO_UTF16_FILTER
              if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
              s = add_utf16_textfilter(s, TRUE);
+#else
+             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
+#endif
         }
     }
     return (char*)s;
@@ -13371,9 +13708,11 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
     SV *const utf8_buffer = filter;
     IV status = IoPAGE(filter);
-    const bool reverse = (bool) IoLINES(filter);
+    const bool reverse = cBOOL(IoLINES(filter));
     I32 retval;
 
+    PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
+
     /* As we're automatically added, at the lowest level, and hence only called
        from this file, we can be sure that we're not called in block mode. Hence
        don't bother writing code to deal with block mode.  */
@@ -13487,6 +13826,8 @@ S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
 {
     SV *filter = filter_add(S_utf16_textfilter, NULL);
 
+    PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
+
     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
     sv_setpvs(filter, "");
     IoLINES(filter) = reversed;