handy.h: Don't use isascii() as not in all libc's

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index cec8ac2..961866b 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -39,15 +39,13 @@ Individual members of C<PL_parser> have their own documentation.
  #include "EXTERN.h"
  #define PERL_IN_TOKE_C
  #include "perl.h"
+#include "dquote_static.c"
  
  #define new_constant(a,b,c,d,e,f,g)    \
         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  
  #define pl_yylval      (PL_parser->yylval)
  
-/* YYINITDEPTH -- initial size of the parser's stacks.  */
-#define YYINITDEPTH 200
-
  /* XXX temporary backwards compatibility */
  #define PL_lex_brackets                (PL_parser->lex_brackets)
  #define PL_lex_brackstack      (PL_parser->lex_brackstack)
@@ -675,13 +673,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
      parser->old_parser = oparser = PL_parser;
      PL_parser = parser;
  
-    Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
-    parser->ps = parser->stack;
-    parser->stack_size = YYINITDEPTH;
-
-    parser->stack->state = 0;
-    parser->yyerrstatus = 0;
-    parser->yychar = YYEMPTY;          /* Cause a token to be read.  */
+    parser->stack = NULL;
+    parser->ps = NULL;
+    parser->stack_size = 0;
  
      /* on scope exit, free this parser and restore any outer one */
      SAVEPARSER(parser);
@@ -714,8 +708,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
  
      if (!len) {
         parser->linestr = newSVpvs("\n;");
-    } else if (SvREADONLY(line) || s[len-1] != ';') {
-       parser->linestr = newSVsv(line);
+    } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
+       /* avoid tie/overload weirdness */
+       parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
         if (s[len-1] != ';')
             sv_catpvs(parser->linestr, "\n;");
      } else {
@@ -749,7 +744,6 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
         PerlIO_close(parser->rsfp);
      SvREFCNT_dec(parser->rsfp_filters);
  
-    Safefree(parser->stack);
      Safefree(parser->lex_brackstack);
      Safefree(parser->lex_casestack);
      PL_parser = parser->old_parser;
@@ -914,7 +908,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
  }
  
  /*
-=for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
+=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
  
  Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
  immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -936,8 +930,9 @@ function is more convenient.
  */
  
  void
-Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
+Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
  {
+    dVAR;
      char *bufptr;
      PERL_ARGS_ASSERT_LEX_STUFF_PVN;
      if (flags & ~(LEX_STUFF_UTF8))
@@ -947,7 +942,7 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
             goto plain_copy;
         } else {
             STRLEN highhalf = 0;
-           char *p, *e = pv+len;
+           const char *p, *e = pv+len;
             for (p = pv; p != e; p++)
                 highhalf += !!(((U8)*p) & 0x80);
             if (!highhalf)
@@ -955,6 +950,8 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
             bufptr = PL_parser->bufptr;
             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr,
+               SvCUR(PL_parser->linestr) + len+highhalf);
             PL_parser->bufend += len+highhalf;
             for (p = pv; p != e; p++) {
                 U8 c = (U8)*p;
@@ -969,7 +966,7 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
      } else {
         if (flags & LEX_STUFF_UTF8) {
             STRLEN highhalf = 0;
-           char *p, *e = pv+len;
+           const char *p, *e = pv+len;
             for (p = pv; p != e; p++) {
                 U8 c = (U8)*p;
                 if (c >= 0xc4) {
@@ -993,6 +990,8 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
             bufptr = PL_parser->bufptr;
             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr,
+               SvCUR(PL_parser->linestr) + len-highhalf);
             PL_parser->bufend += len-highhalf;
             for (p = pv; p != e; p++) {
                 U8 c = (U8)*p;
@@ -1008,6 +1007,7 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
             bufptr = PL_parser->bufptr;
             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
             PL_parser->bufend += len;
             Copy(pv, bufptr, len, char);
         }
@@ -1123,7 +1123,7 @@ it is not permitted to discard text that has yet to be lexed.
  Normally it is not necessarily to do this directly, because it suffices to
  use the implicit discarding behaviour of L</lex_next_chunk> and things
  based on it.  However, if a token stretches across multiple lines,
-and the lexing code has kept multiple lines of text in the buffer fof
+and the lexing code has kept multiple lines of text in the buffer for
  that purpose, then after completion of the token it would be wise to
  explicitly discard the now-unneeded earlier lines, to avoid future
  multi-line tokens growing the buffer without bound.
@@ -1302,6 +1302,7 @@ is encountered, an exception is generated.
  I32
  Perl_lex_peek_unichar(pTHX_ U32 flags)
  {
+    dVAR;
      char *s, *bufend;
      if (flags & ~(LEX_KEEP_PREVIOUS))
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
@@ -1401,12 +1402,14 @@ chunk will not be discarded.
  =cut
  */
  
+#define LEX_NO_NEXT_CHUNK 0x80000000
+
  void
  Perl_lex_read_space(pTHX_ U32 flags)
  {
      char *s, *bufend;
      bool need_incline = 0;
-    if (flags & ~(LEX_KEEP_PREVIOUS))
+    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
  #ifdef PERL_MAD
      if (PL_skipwhite) {
@@ -1439,6 +1442,8 @@ Perl_lex_read_space(pTHX_ U32 flags)
             if (PL_madskills)
                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
  #endif /* PERL_MAD */
+           if (flags & LEX_NO_NEXT_CHUNK)
+               break;
             PL_parser->bufptr = s;
             CopLINE_inc(PL_curcop);
             got_more = lex_next_chunk(flags);
@@ -1714,20 +1719,12 @@ S_skipspace(pTHX_ register char *s)
      if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
         while (s < PL_bufend && SPACE_OR_TAB(*s))
             s++;
-    } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
-       while (isSPACE(*s) && *s != '\n')
-           s++;
-       if (*s == '#') {
-           do {
-               s++;
-           } while (s != PL_bufend && *s != '\n');
-       }
-       if (*s == '\n')
-           s++;
      } else {
         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
         PL_bufptr = s;
-       lex_read_space(LEX_KEEP_PREVIOUS);
+       lex_read_space(LEX_KEEP_PREVIOUS |
+               (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+                   LEX_NO_NEXT_CHUNK : 0));
         s = PL_bufptr;
         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
         if (PL_linestart > PL_bufptr)
@@ -1925,6 +1922,17 @@ S_force_next(pTHX_ I32 type)
  #endif
  }
  
+void
+Perl_yyunlex(pTHX)
+{
+    if (PL_parser->yychar != YYEMPTY) {
+       start_force(-1);
+       NEXTVAL_NEXTTOKE = PL_parser->yylval;
+       force_next(PL_parser->yychar);
+       PL_parser->yychar = YYEMPTY;
+    }
+}
+
  STATIC SV *
  S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
  {
@@ -2095,9 +2103,15 @@ S_force_version(pTHX_ char *s, int guessing)
             curmad('X', newSVpvn(s,d-s));
         }
  #endif
-        if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
+        if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
             SV *ver;
+#ifdef USE_LOCALE_NUMERIC
+           char *loc = setlocale(LC_NUMERIC, "C");
+#endif
              s = scan_num(s, &pl_yylval);
+#ifdef USE_LOCALE_NUMERIC
+           setlocale(LC_NUMERIC, loc);
+#endif
              version = pl_yylval.opval;
             ver = cSVOPx(version)->op_sv;
             if (SvPOK(ver) && !SvNIOK(ver)) {
@@ -2134,6 +2148,55 @@ S_force_version(pTHX_ char *s, int guessing)
  }
  
  /*
+ * S_force_strict_version
+ * Forces the next token to be a version number using strict syntax rules.
+ */
+
+STATIC char *
+S_force_strict_version(pTHX_ char *s)
+{
+    dVAR;
+    OP *version = NULL;
+#ifdef PERL_MAD
+    I32 startoff = s - SvPVX(PL_linestr);
+#endif
+    const char *errstr = NULL;
+
+    PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
+
+    while (isSPACE(*s)) /* leading whitespace */
+       s++;
+
+    if (is_STRICT_VERSION(s,&errstr)) {
+       SV *ver = newSV(0);
+       s = (char *)scan_version(s, ver, 0);
+       version = newSVOP(OP_CONST, 0, ver);
+    }
+    else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
+           (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
+    {
+       PL_bufptr = s;
+       if (errstr)
+           yyerror(errstr); /* version required */
+       return s;
+    }
+
+#ifdef PERL_MAD
+    if (PL_madskills && !version) {
+       sv_free(PL_nextwhite);  /* let next token collect whitespace */
+       PL_nextwhite = 0;
+       s = SvPVX(PL_linestr) + startoff;
+    }
+#endif
+    /* NOTE: The parser sees the package name and the VERSION swapped */
+    start_force(PL_curforce);
+    NEXTVAL_NEXTTOKE.opval = version;
+    force_next(WORD);
+
+    return s;
+}
+
+/*
   * S_tokeq
   * Tokenize a quoted string passed in as an SV.  It finds the next
   * chunk, up to end of string or a backslash.  It may make a new
@@ -2422,10 +2485,7 @@ S_sublex_done(pTHX)
  
    In patterns:
      backslashes:
-      double-quoted style: \r and \n
-      regexp special ones: \D \s
-      constants: \x31
-      backrefs: \1
+      constants: \N{NAME} only
        case and quoting: \U \Q \E
      stops on @ and $, but not for $ as tail anchor
  
@@ -2439,7 +2499,7 @@ S_sublex_done(pTHX)
    In double-quoted strings:
      backslashes:
        double-quoted style: \r and \n
-      constants: \x31
+      constants: \x31, etc.
        deprecated backrefs: \1 (in substitution replacements)
        case and quoting: \U \Q \E
      stops on @ and $
@@ -2467,14 +2527,14 @@ S_sublex_done(pTHX)
           check for embedded arrays
           check for embedded scalars
           if (backslash) {
-             leave intact backslashes from leaveit (below)
               deprecate \1 in substitution replacements
               handle string-changing backslashes \l \U \Q \E, etc.
               switch (what was escaped) {
                   handle \- in a transliteration (becomes a literal -)
+                 if a pattern and not \N{, go treat as regular character
                   handle \132 (octal characters)
                   handle \x15 and \x{1234} (hex characters)
-                 handle \N{name} (named characters)
+                 handle \N{name} (named characters, also \N{3,5} in a pattern)
                   handle \cV (control characters)
                   handle printf-style backslashes (\f, \r, \n, etc)
               } (end switch)
@@ -2532,6 +2592,7 @@ S_scan_const(pTHX_ char *start)
  
  
      while (s < send || dorange) {
+
          /* get transliterations out of the way (they're most literal) */
         if (PL_lex_inwhat == OP_TRANS) {
             /* expand a range A-Z to the full set of characters.  AIE! */
@@ -2751,9 +2812,12 @@ S_scan_const(pTHX_ char *start)
  
         /* backslashes */
         if (*s == '\\' && s+1 < send) {
+           char* e;    /* Can be used for ending '}', etc. */
+
             s++;
  
-           /* deprecate \1 in strings and substitution replacements */
+           /* warn on \1 - \9 in substitution replacements, but note that \11
+            * is an octal; and \19 is \1 followed by '9' */
             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
             {
@@ -2767,13 +2831,28 @@ S_scan_const(pTHX_ char *start)
                 --s;
                 break;
             }
-           /* skip any other backslash escapes in a pattern */
-           else if (PL_lex_inpat) {
+           /* In a pattern, process \N, but skip any other backslash escapes.
+            * This is because we don't want to translate an escape sequence
+            * into a meta symbol and have the regex compiler use the meta
+            * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
+            * in spite of this, we do have to process \N here while the proper
+            * charnames handler is in scope.  See bugs #56444 and #62056.
+            * There is a complication because \N in a pattern may also stand
+            * for 'match a non-nl', and not mean a charname, in which case its
+            * processing should be deferred to the regex compiler.  To be a
+            * charname it must be followed immediately by a '{', and not look
+            * like \N followed by a curly quantifier, i.e., not something like
+            * \N{3,}.  regcurly returns a boolean indicating if it is a legal
+            * quantifier */
+           else if (PL_lex_inpat
+                   && (*s != 'N'
+                       || s[1] != '{'
+                       || regcurly(s + 1)))
+           {
                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
                 goto default_action;
             }
  
-           /* if we get here, it's either a quoted -, or a digit */
             switch (*s) {
  
             /* quoted - in transliterations */
@@ -2793,7 +2872,7 @@ S_scan_const(pTHX_ char *start)
                     goto default_action;
                 }
  
-           /* eg. \132 indicates the octal constant 0x132 */
+           /* eg. \132 indicates the octal constant 0132 */
             case '0': case '1': case '2': case '3':
             case '4': case '5': case '6': case '7':
                 {
@@ -2804,6 +2883,21 @@ S_scan_const(pTHX_ char *start)
                 }
                 goto NUM_ESCAPE_INSERT;
  
+           /* eg. \o{24} indicates the octal constant \024 */
+           case 'o':
+               {
+                   STRLEN len;
+                   const char* error;
+
+                   bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
+                   s += len;
+                   if (! valid) {
+                       yyerror(error);
+                       continue;
+                   }
+                   goto NUM_ESCAPE_INSERT;
+               }
+
             /* eg. \x24 indicates the hex constant 0x24 */
             case 'x':
                 ++s;
@@ -2832,15 +2926,13 @@ S_scan_const(pTHX_ char *start)
                 }
  
               NUM_ESCAPE_INSERT:
-               /* Insert oct, hex, or \N{U+...} escaped character.  There will
-                * always be enough room in sv since such escapes will be
-                * longer than any UTF-8 sequence they can end up as, except if
-                * they force us to recode the rest of the string into utf8 */
+               /* Insert oct or hex escaped character.  There will always be
+                * enough room in sv since such escapes will be longer than any
+                * UTF-8 sequence they can end up as, except if they force us
+                * to recode the rest of the string into utf8 */
                 
                 /* Here uv is the ordinal of the next character being added in
-                * unicode (converted from native).  (It has to be done before
-                * here because \N is interpreted as unicode, and oct and hex
-                * as native.) */
+                * unicode (converted from native). */
                 if (!UNI_IS_INVARIANT(uv)) {
                     if (!has_utf8 && uv > 255) {
                         /* Might need to recode whatever we have accumulated so
@@ -2880,104 +2972,342 @@ S_scan_const(pTHX_ char *start)
                 }
                 continue;
  
-           /* \N{LATIN SMALL LETTER A} is a named character, and so is
-            * \N{U+0041} */
             case 'N':
-               ++s;
-               if (*s == '{') {
-                   char* e = strchr(s, '}');
-                   SV *res;
-                   STRLEN len;
-                   const char *str;
-
-                   if (!e) {
+               /* In a non-pattern \N must be a named character, like \N{LATIN
+                * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
+                * mean to match a non-newline.  For non-patterns, named
+                * characters are converted to their string equivalents. In
+                * patterns, named characters are not converted to their
+                * ultimate forms for the same reasons that other escapes
+                * aren't.  Instead, they are converted to the \N{U+...} form
+                * to get the value from the charnames that is in effect right
+                * now, while preserving the fact that it was a named character
+                * so that the regex compiler knows this */
+
+               /* This section of code doesn't generally use the
+                * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
+                * a close examination of this macro and determined it is a
+                * no-op except on utfebcdic variant characters.  Every
+                * character generated by this that would normally need to be
+                * enclosed by this macro is invariant, so the macro is not
+                * needed, and would complicate use of copy(). There are other
+                * parts of this file where the macro is used inconsistently,
+                * but are saved by it being a no-op */
+
+               /* The structure of this section of code (besides checking for
+                * errors and upgrading to utf8) is:
+                *  Further disambiguate between the two meanings of \N, and if
+                *      not a charname, go process it elsewhere
+                *  If of form \N{U+...}, pass it through if a pattern;
+                *      otherwise convert to utf8
+                *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
+                *  pattern; otherwise convert to utf8 */
+
+               /* Here, s points to the 'N'; the test below is guaranteed to
+                * succeed if we are being called on a pattern as we already
+                * know from a test above that the next character is a '{'.
+                * On a non-pattern \N must mean 'named sequence, which
+                * requires braces */
+               s++;
+               if (*s != '{') {
+                   yyerror("Missing braces on \\N{}"); 
+                   continue;
+               }
+               s++;
+
+               /* If there is no matching '}', it is an error. */
+               if (! (e = strchr(s, '}'))) {
+                   if (! PL_lex_inpat) {
                         yyerror("Missing right brace on \\N{}");
-                       e = s - 1;
-                       goto cont_scan;
-                   }
-                   if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
-                       /* \N{U+...} The ... is a unicode value even on EBCDIC
-                        * machines */
-                       I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
-                         PERL_SCAN_DISALLOW_PREFIX;
-                       s += 3;
-                       len = e - s;
-                       uv = grok_hex(s, &len, &flags, NULL);
-                       if ( e > s && len != (STRLEN)(e - s) ) {
-                           uv = 0xFFFD;
-                       }
-                       s = e + 1;
-                       goto NUM_ESCAPE_INSERT;
+                   } else {
+                       yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
                     }
-                   res = newSVpvn(s + 1, e - s - 1);
-                   res = new_constant( NULL, 0, "charnames",
-                                       res, NULL, s - 2, e - s + 3 );
-                   if (has_utf8)
-                       sv_utf8_upgrade(res);
-                   str = SvPV_const(res,len);
-#ifdef EBCDIC_NEVER_MIND
-                   /* charnames uses pack U and that has been
-                    * recently changed to do the below uni->native
-                    * mapping, so this would be redundant (and wrong,
-                    * the code point would be doubly converted).
-                    * But leave this in just in case the pack U change
-                    * gets revoked, but the semantics is still
-                    * desireable for charnames. --jhi */
-                   {
-                        UV uv = utf8_to_uvchr((const U8*)str, 0);
+                   continue;
+               }
  
-                        if (uv < 0x100) {
-                             U8 tmpbuf[UTF8_MAXBYTES+1], *d;
+               /* Here it looks like a named character */
  
-                             d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
-                             sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
-                             str = SvPV_const(res, len);
-                        }
-                   }
-#endif
-                   /* If destination is not in utf8 but this new character is,
-                    * recode the dest to utf8 */
-                   if (!has_utf8 && SvUTF8(res)) {
+               if (PL_lex_inpat) {
+
+                   /* XXX This block is temporary code.  \N{} implies that the
+                    * pattern is to have Unicode semantics, and therefore
+                    * currently has to be encoded in utf8.  By putting it in
+                    * utf8 now, we save a whole pass in the regular expression
+                    * compiler.  Once that code is changed so Unicode
+                    * semantics doesn't necessarily have to be in utf8, this
+                    * block should be removed */
+                   if (!has_utf8) {
                         SvCUR_set(sv, d - SvPVX_const(sv));
                         SvPOK_on(sv);
                         *d = '\0';
                         /* See Note on sizing above.  */
                         sv_utf8_upgrade_flags_grow(sv,
-                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                           len + (STRLEN)(send - s) + 1);
+                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                       /* 5 = '\N{' + cur char + NUL */
+                                       (STRLEN)(send - s) + 5);
                         d = SvPVX(sv) + SvCUR(sv);
                         has_utf8 = TRUE;
-                   } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
+                   }
+               }
+
+               if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
+                   I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+                               | PERL_SCAN_DISALLOW_PREFIX;
+                   STRLEN len;
+
+                   /* For \N{U+...}, the '...' is a unicode value even on
+                    * EBCDIC machines */
+                   s += 2;         /* Skip to next char after the 'U+' */
+                   len = e - s;
+                   uv = grok_hex(s, &len, &flags, NULL);
+                   if (len == 0 || len != (STRLEN)(e - s)) {
+                       yyerror("Invalid hexadecimal number in \\N{U+...}");
+                       s = e + 1;
+                       continue;
+                   }
+
+                   if (PL_lex_inpat) {
+
+                       /* Pass through to the regex compiler unchanged.  The
+                        * reason we evaluated the number above is to make sure
+                        * there wasn't a syntax error. */
+                       s -= 5;     /* Include the '\N{U+' */
+                       Copy(s, d, e - s + 1, char);    /* 1 = include the } */
+                       d += e - s + 1;
+                   }
+                   else {  /* Not a pattern: convert the hex to string */
+
+                        /* If destination is not in utf8, unconditionally
+                         * recode it to be so.  This is because \N{} implies
+                         * Unicode semantics, and scalars have to be in utf8
+                         * to guarantee those semantics */
+                       if (! has_utf8) {
+                           SvCUR_set(sv, d - SvPVX_const(sv));
+                           SvPOK_on(sv);
+                           *d = '\0';
+                           /* See Note on sizing above.  */
+                           sv_utf8_upgrade_flags_grow(
+                                       sv,
+                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                       UNISKIP(uv) + (STRLEN)(send - e) + 1);
+                           d = SvPVX(sv) + SvCUR(sv);
+                           has_utf8 = TRUE;
+                       }
+
+                       /* Add the string to the output */
+                       if (UNI_IS_INVARIANT(uv)) {
+                           *d++ = (char) uv;
+                       }
+                       else d = (char*)uvuni_to_utf8((U8*)d, uv);
+                   }
+               }
+               else { /* Here is \N{NAME} but not \N{U+...}. */
+
+                   SV *res;            /* result from charnames */
+                   const char *str;    /* the string in 'res' */
+                   STRLEN len;         /* its length */
  
-                       /* See Note on sizing above.  (NOTE: SvCUR() is not set
-                        * correctly here). */
-                       const STRLEN off = d - SvPVX_const(sv);
-                       d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
+                   /* Get the value for NAME */
+                   res = newSVpvn(s, e - s);
+                   res = new_constant( NULL, 0, "charnames",
+                                       /* includes all of: \N{...} */
+                                       res, NULL, s - 3, e - s + 4 );
+
+                   /* Most likely res will be in utf8 already since the
+                    * standard charnames uses pack U, but a custom translator
+                    * can leave it otherwise, so make sure.  XXX This can be
+                    * revisited to not have charnames use utf8 for characters
+                    * that don't need it when regexes don't have to be in utf8
+                    * for Unicode semantics.  If doing so, remember EBCDIC */
+                   sv_utf8_upgrade(res);
+                   str = SvPV_const(res, len);
+
+                   /* Don't accept malformed input */
+                   if (! is_utf8_string((U8 *) str, len)) {
+                       yyerror("Malformed UTF-8 returned by \\N");
+                   }
+                   else if (PL_lex_inpat) {
+
+                       if (! len) { /* The name resolved to an empty string */
+                           Copy("\\N{}", d, 4, char);
+                           d += 4;
+                       }
+                       else {
+                           /* In order to not lose information for the regex
+                           * compiler, pass the result in the specially made
+                           * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
+                           * the code points in hex of each character
+                           * returned by charnames */
+
+                           const char *str_end = str + len;
+                           STRLEN char_length;     /* cur char's byte length */
+                           STRLEN output_length;   /* and the number of bytes
+                                                      after this is translated
+                                                      into hex digits */
+                           const STRLEN off = d - SvPVX_const(sv);
+
+                           /* 2 hex per byte; 2 chars for '\N'; 2 chars for
+                            * max('U+', '.'); and 1 for NUL */
+                           char hex_string[2 * UTF8_MAXBYTES + 5];
+
+                           /* Get the first character of the result. */
+                           U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                   len,
+                                                   &char_length,
+                                                   UTF8_ALLOW_ANYUV);
+
+                           /* The call to is_utf8_string() above hopefully
+                            * guarantees that there won't be an error.  But
+                            * it's easy here to make sure.  The function just
+                            * above warns and returns 0 if invalid utf8, but
+                            * it can also return 0 if the input is validly a
+                            * NUL. Disambiguate */
+                           if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
+                               uv = UNICODE_REPLACEMENT;
+                           }
+
+                           /* Convert first code point to hex, including the
+                            * boiler plate before it */
+                           sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
+                           output_length = strlen(hex_string);
+
+                           /* Make sure there is enough space to hold it */
+                           d = off + SvGROW(sv, off
+                                                + output_length
+                                                + (STRLEN)(send - e)
+                                                + 2);  /* '}' + NUL */
+                           /* And output it */
+                           Copy(hex_string, d, output_length, char);
+                           d += output_length;
+
+                           /* For each subsequent character, append dot and
+                            * its ordinal in hex */
+                           while ((str += char_length) < str_end) {
+                               const STRLEN off = d - SvPVX_const(sv);
+                               U32 uv = utf8n_to_uvuni((U8 *) str,
+                                                       str_end - str,
+                                                       &char_length,
+                                                       UTF8_ALLOW_ANYUV);
+                               if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
+                                   uv = UNICODE_REPLACEMENT;
+                               }
+
+                               sprintf(hex_string, ".%X", (unsigned int) uv);
+                               output_length = strlen(hex_string);
+
+                               d = off + SvGROW(sv, off
+                                                    + output_length
+                                                    + (STRLEN)(send - e)
+                                                    + 2);      /* '}' +  NUL */
+                               Copy(hex_string, d, output_length, char);
+                               d += output_length;
+                           }
+
+                           *d++ = '}'; /* Done.  Add the trailing brace */
+                       }
+                   }
+                   else { /* Here, not in a pattern.  Convert the name to a
+                           * string. */
+
+                        /* If destination is not in utf8, unconditionally
+                         * recode it to be so.  This is because \N{} implies
+                         * Unicode semantics, and scalars have to be in utf8
+                         * to guarantee those semantics */
+                       if (! has_utf8) {
+                           SvCUR_set(sv, d - SvPVX_const(sv));
+                           SvPOK_on(sv);
+                           *d = '\0';
+                           /* See Note on sizing above.  */
+                           sv_utf8_upgrade_flags_grow(sv,
+                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                               len + (STRLEN)(send - s) + 1);
+                           d = SvPVX(sv) + SvCUR(sv);
+                           has_utf8 = TRUE;
+                       } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
+
+                           /* See Note on sizing above.  (NOTE: SvCUR() is not
+                            * set correctly here). */
+                           const STRLEN off = d - SvPVX_const(sv);
+                           d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
+                       }
+                       Copy(str, d, len, char);
+                       d += len;
+                   }
+                   SvREFCNT_dec(res);
+
+                   /* Deprecate non-approved name syntax */
+                   if (ckWARN_d(WARN_DEPRECATED)) {
+                       bool problematic = FALSE;
+                       char* i = s;
+
+                       /* For non-ut8 input, look to see that the first
+                        * character is an alpha, then loop through the rest
+                        * checking that each is a continuation */
+                       if (! this_utf8) {
+                           if (! isALPHAU(*i)) problematic = TRUE;
+                           else for (i = s + 1; i < e; i++) {
+                               if (isCHARNAME_CONT(*i)) continue;
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       else {
+                           /* Similarly for utf8.  For invariants can check
+                            * directly.  We accept anything above the latin1
+                            * range because it is immaterial to Perl if it is
+                            * correct or not, and is expensive to check.  But
+                            * it is fairly easy in the latin1 range to convert
+                            * the variants into a single character and check
+                            * those */
+                           if (UTF8_IS_INVARIANT(*i)) {
+                               if (! isALPHAU(*i)) problematic = TRUE;
+                           } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                               if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+                                                                           *(i+1)))))
+                               {
+                                   problematic = TRUE;
+                               }
+                           }
+                           if (! problematic) for (i = s + UTF8SKIP(s);
+                                                   i < e;
+                                                   i+= UTF8SKIP(i))
+                           {
+                               if (UTF8_IS_INVARIANT(*i)) {
+                                   if (isCHARNAME_CONT(*i)) continue;
+                               } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                                   continue;
+                               } else if (isCHARNAME_CONT(
+                                           UNI_TO_NATIVE(
+                                           UTF8_ACCUMULATE(*i, *(i+1)))))
+                               {
+                                   continue;
+                               }
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       if (problematic) {
+                           /* The e-i passed to the final %.*s makes sure that
+                            * should the trailing NUL be missing that this
+                            * print won't run off the end of the string */
+                           Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                                       "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
+                                       (int)(i - s + 1), s, (int)(e - i), i + 1);
+                       }
                     }
+               } /* End \N{NAME} */
  #ifdef EBCDIC
-                   if (!dorange)
-                       native_range = FALSE; /* \N{} is guessed to be Unicode */
+               if (!dorange) 
+                   native_range = FALSE; /* \N{} is defined to be Unicode */
  #endif
-                   Copy(str, d, len, char);
-                   d += len;
-                   SvREFCNT_dec(res);
-                 cont_scan:
-                   s = e + 1;
-               }
-               else
-                   yyerror("Missing braces on \\N{}");
+               s = e + 1;  /* Point to just after the '}' */
                 continue;
  
             /* \c is a control character */
             case 'c':
                 s++;
                 if (s < send) {
-                   U8 c = *s++;
-#ifdef EBCDIC
-                   if (isLOWER(c))
-                       c = toUPPER(c);
-#endif
-                   *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
+                   *d++ = grok_bslash_c(*s++, 1);
                 }
                 else {
                     yyerror("Missing control char name in \\c");
@@ -3447,8 +3777,6 @@ Perl_filter_del(pTHX_ filter_t funcp)
      /* if filter is on top of stack (usual case) just pop it off */
      datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
      if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
-       IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
-       IoANY(datasv) = (void *)NULL;
         sv_free(av_pop(PL_rsfp_filters));
  
          return;
@@ -3629,7 +3957,7 @@ Perl_madlex(pTHX)
      PL_thismad = 0;
  
      /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
-    if (PL_pending_ident)
+    if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
          return S_pending_ident(aTHX);
  
      /* previous token ate up our whitespace? */
@@ -3888,7 +4216,7 @@ Perl_yylex(pTHX)
         SvREFCNT_dec(tmp);
      } );
      /* check if there's an identifier for us to look at */
-    if (PL_pending_ident)
+    if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
          return REPORT(S_pending_ident(aTHX));
  
      /* no identifier pending identification */
@@ -5408,7 +5736,7 @@ Perl_yylex(pTHX)
             }
         }
  
-       if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
+       if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
             PL_tokenbuf[0] = '@';
             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
                            sizeof PL_tokenbuf - 1, FALSE);
@@ -5807,7 +6135,7 @@ Perl_yylex(pTHX)
             int result;
             char *saved_bufptr = PL_bufptr;
             PL_bufptr = s;
-           result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
+           result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
             s = PL_bufptr;
             if (result == KEYWORD_PLUGIN_DECLINE) {
                 /* not a plugged-in keyword */
@@ -5834,8 +6162,6 @@ Perl_yylex(pTHX)
         /* Is this a label? */
         if (!anydelim && PL_expect == XSTATE
               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
-           if (tmp)
-               Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
             s = d + 1;
             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
             CLINE;
@@ -5883,8 +6209,9 @@ Perl_yylex(pTHX)
                 gvp = 0;
                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                  "Ambiguous call resolved as CORE::%s(), %s",
-                                  GvENAME(hgv), "qualify as such or use &");
+                                  "Ambiguous call resolved as CORE::%s(), "
+                                  "qualify as such or use &",
+                                  GvENAME(hgv));
             }
         }
  
@@ -5967,16 +6294,15 @@ Perl_yylex(pTHX)
  
                 /* if we saw a global override before, get the right name */
  
+               sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
+                   len ? len : strlen(PL_tokenbuf));
                 if (gvp) {
+                   SV * const tmp_sv = sv;
                     sv = newSVpvs("CORE::GLOBAL::");
-                   sv_catpv(sv,PL_tokenbuf);
-               }
-               else {
-                   /* If len is 0, newSVpv does strlen(), which is correct.
-                      If len is non-zero, then it will be the true length,
-                      and so the scalar will be created correctly.  */
-                   sv = newSVpv(PL_tokenbuf,len);
+                   sv_catsv(sv, tmp_sv);
+                   SvREFCNT_dec(tmp_sv);
                 }
+
  #ifdef PERL_MAD
                 if (PL_madskills && !PL_thistoken) {
                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
@@ -5986,17 +6312,11 @@ Perl_yylex(pTHX)
  #endif
  
                 /* Presume this is going to be a bareword of some sort. */
-
                 CLINE;
                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
                 pl_yylval.opval->op_private = OPpCONST_BARE;
-               /* UTF-8 package name? */
-               if (UTF && !IN_BYTES &&
-                   is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
-                   SvUTF8_on(sv);
  
                 /* And if "Foo::", then that's what it certainly is. */
-
                 if (len)
                     goto safe_bareword;
  
@@ -6172,10 +6492,27 @@ Perl_yylex(pTHX)
                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
                         if (!protolen)
                             TERM(FUNC0SUB);
-                       if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
-                           OPERATOR(UNIOPSUB);
                         while (*proto == ';')
                             proto++;
+                       if (
+                           (
+                               (
+                                   *proto == '$' || *proto == '_'
+                                || *proto == '*'
+                               )
+                            && proto[1] == '\0'
+                           )
+                        || (
+                            *proto == '\\' && proto[1] && proto[2] == '\0'
+                           )
+                       )
+                           OPERATOR(UNIOPSUB);
+                       if (*proto == '\\' && proto[1] == '[') {
+                           const char *p = proto + 2;
+                           while(*p && *p != ']')
+                               ++p;
+                           if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
+                       }
                         if (*proto == '&' && *s == '{') {
                             if (PL_curstash)
                                 sv_setpvs(PL_subname, "__ANON__");
@@ -6965,7 +7302,9 @@ Perl_yylex(pTHX)
  
         case KEY_package:
             s = force_word(s,WORD,FALSE,TRUE,FALSE);
-           s = force_version(s, FALSE);
+           s = SKIPSPACE1(s);
+           s = force_strict_version(s);
+           PL_lex_expect = XBLOCK;
             OPERATOR(PACKAGE);
  
         case KEY_pipe:
@@ -6981,14 +7320,13 @@ Perl_yylex(pTHX)
         case KEY_quotemeta:
             UNI(OP_QUOTEMETA);
  
-       case KEY_qw:
+       case KEY_qw: {
+           OP *words = NULL;
             s = scan_str(s,!!PL_madskills,FALSE);
             if (!s)
                 missingterm(NULL);
             PL_expect = XOPERATOR;
-           force_next(')');
             if (SvCUR(PL_lex_stuff)) {
-               OP *words = NULL;
                 int warned = 0;
                 d = SvPV_force(PL_lex_stuff, len);
                 while (len) {
@@ -7020,18 +7358,17 @@ Perl_yylex(pTHX)
                                             newSVOP(OP_CONST, 0, tokeq(sv)));
                     }
                 }
-               if (words) {
-                   start_force(PL_curforce);
-                   NEXTVAL_NEXTTOKE.opval = words;
-                   force_next(THING);
-               }
             }
+           if (!words)
+               words = newNULLLIST();
             if (PL_lex_stuff) {
                 SvREFCNT_dec(PL_lex_stuff);
                 PL_lex_stuff = NULL;
             }
-           PL_expect = XTERM;
-           TOKEN('(');
+           PL_expect = XOPERATOR;
+           pl_yylval.opval = sawparens(words);
+           TOKEN(QWLIST);
+       }
  
         case KEY_qq:
             s = scan_str(s,!!PL_madskills,FALSE);
@@ -7352,7 +7689,7 @@ Perl_yylex(pTHX)
                     bool must_be_last = FALSE;
                     bool underscore = FALSE;
                     bool seen_underscore = FALSE;
-                   const bool warnsyntax = ckWARN(WARN_SYNTAX);
+                   const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
  
                     s = scan_str(s,!!PL_madskills,FALSE);
                     if (!s)
@@ -7364,7 +7701,7 @@ Perl_yylex(pTHX)
                         if (!isSPACE(*p)) {
                             d[tmp++] = *p;
  
-                           if (warnsyntax) {
+                           if (warnillegalproto) {
                                 if (must_be_last)
                                     proto_after_greedy_proto = TRUE;
                                 if (!strchr("$@%*;[]&\\_", *p)) {
@@ -7397,11 +7734,11 @@ Perl_yylex(pTHX)
                     }
                     d[tmp] = '\0';
                     if (proto_after_greedy_proto)
-                       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                     "Prototype after '%c' for %"SVf" : %s",
                                     greedy_proto, SVfARG(PL_subname), d);
                     if (bad_proto)
-                       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
                                     "Illegal character %sin prototype for %"SVf" : %s",
                                     seen_underscore ? "after '_' " : "",
                                     SVfARG(PL_subname), d);
@@ -7432,7 +7769,7 @@ Perl_yylex(pTHX)
                 else if (*s != '{' && key == KEY_sub) {
                     if (!have_name)
                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
-                   else if (*s != ';')
+                   else if (*s != ';' && *s != '}')
                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
                 }
  
@@ -7744,28 +8081,11 @@ S_pending_ident(pTHX)
      pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
                                                       tokenbuf_len - 1));
      pl_yylval.opval->op_private = OPpCONST_ENTERED;
-    gv_fetchpvn_flags(
-           PL_tokenbuf + 1, tokenbuf_len - 1,
-           /* If the identifier refers to a stash, don't autovivify it.
-            * Change 24660 had the side effect of causing symbol table
-            * hashes to always be defined, even if they were freshly
-            * created and the only reference in the entire program was
-            * the single statement with the defined %foo::bar:: test.
-            * It appears that all code in the wild doing this actually
-            * wants to know whether sub-packages have been loaded, so
-            * by avoiding auto-vivifying symbol tables, we ensure that
-            * defined %foo::bar:: continues to be false, and the existing
-            * tests still give the expected answers, even though what
-            * they're actually testing has now changed subtly.
-            */
-           (*PL_tokenbuf == '%'
-            && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
-            && d[-1] == ':'
-            ? 0
-            : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
-           ((PL_tokenbuf[0] == '$') ? SVt_PV
-            : (PL_tokenbuf[0] == '@') ? SVt_PVAV
-            : SVt_PVHV));
+    gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+                    PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
+                    ((PL_tokenbuf[0] == '$') ? SVt_PV
+                     : (PL_tokenbuf[0] == '@') ? SVt_PVAV
+                     : SVt_PVHV));
      return WORD;
  }
  
@@ -8209,7 +8529,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
            if (name[1] == 'i' &&
                name[2] == 'e')
            {                                       /* tie        */
-            return KEY_tie;
+            return -KEY_tie;
            }
  
            goto unknown;
@@ -8653,7 +8973,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
                  case 'e':
                    if (name[3] == 'd')
                    {                               /* tied       */
-                    return KEY_tied;
+                    return -KEY_tied;
                    }
  
                    goto unknown;
@@ -9148,7 +9468,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
                      {
                        case 'e':
                          {                         /* untie      */
-                          return KEY_untie;
+                          return -KEY_untie;
                          }
  
                        case 'l':
@@ -11258,6 +11578,11 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
         SvREFCNT_dec(msg);
         return sv;
      }
+
+    /* charnames doesn't work well if there have been errors found */
+    if (PL_error_count > 0 && strEQ(key,"charnames"))
+       return &PL_sv_undef;
+
      cvp = hv_fetch(table, key, keylen, FALSE);
      if (!cvp || !SvOK(*cvp)) {
         why1 = "$^H{";
@@ -11530,27 +11855,15 @@ static U32
  S_pmflag(U32 pmfl, const char ch) {
      switch (ch) {
         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
-    case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
-    case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
-    case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
-    case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
+    case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
+    case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
+    case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
+    case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
+    case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
      }
      return pmfl;
  }
  
-void
-Perl_pmflag(pTHX_ U32* pmfl, int ch)
-{
-    PERL_ARGS_ASSERT_PMFLAG;
-
-    Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
-                    "Perl_pmflag() is deprecated, and will be removed from the XS API");
-
-    if (ch<256) {
-       *pmfl = S_pmflag(*pmfl, (char)ch);
-    }
-}
-
  STATIC char *
  S_scan_pat(pTHX_ char *start, I32 type)
  {
@@ -11602,6 +11915,12 @@ S_scan_pat(pTHX_ char *start, I32 type)
  #endif
      while (*s && strchr(valid_flags, *s))
         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
+
+    if (isALNUM(*s)) {
+       Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
+           "Having no space between pattern and following word is deprecated");
+
+    }
  #ifdef PERL_MAD
      if (PL_madskills && modstart != s) {
         SV* tmptoken = newSVpvn(modstart, s - modstart);
@@ -11682,8 +12001,14 @@ S_scan_subst(pTHX_ char *start)
         }
         else if (strchr(S_PAT_MODS, *s))
             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
-       else
+       else {
+           if (isALNUM(*s)) {
+               Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
+                   "Having no space between pattern and following word is deprecated");
+
+           }
             break;
+       }
      }
  
  #ifdef PERL_MAD
@@ -12681,11 +13006,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             const char *base, *Base, *max;
  
             /* check for hex */
-           if (s[1] == 'x') {
+           if (s[1] == 'x' || s[1] == 'X') {
                 shift = 4;
                 s += 2;
                 just_zero = FALSE;
-           } else if (s[1] == 'b') {
+           } else if (s[1] == 'b' || s[1] == 'B') {
                 shift = 1;
                 s += 2;
                 just_zero = FALSE;
@@ -12799,13 +13124,12 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
             }
  
-           sv = newSV(0);
             if (overflowed) {
                 if (n > 4294967295.0)
                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
                                    "%s number > %s non-portable",
                                    Base, max);
-               sv_setnv(sv, n);
+               sv = newSVnv(n);
             }
             else {
  #if UVSIZE > 4
@@ -12814,7 +13138,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                                    "%s number > %s non-portable",
                                    Base, max);
  #endif
-               sv_setuv(sv, u);
+               sv = newSVuv(u);
             }
             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
                 sv = new_constant(start, s - start, "integer",
@@ -12945,9 +13269,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
         }
  
  
-       /* make an sv from the string */
-       sv = newSV(0);
-
         /*
             We try to do an integer conversion first if no characters
             indicating "float" have been found.
@@ -12959,12 +13280,12 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  
              if (flags == IS_NUMBER_IN_UV) {
                if (uv <= IV_MAX)
-               sv_setiv(sv, uv); /* Prefer IVs over UVs. */
+               sv = newSViv(uv); /* Prefer IVs over UVs. */
                else
-               sv_setuv(sv, uv);
+               sv = newSVuv(uv);
              } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
                if (uv <= (UV) IV_MIN)
-                sv_setiv(sv, -(IV)uv);
+                sv = newSViv(-(IV)uv);
                else
                 floatit = TRUE;
              } else
@@ -12974,7 +13295,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             /* terminate the string */
             *d = '\0';
             nv = Atof(PL_tokenbuf);
-           sv_setnv(sv, nv);
+           sv = newSVnv(nv);
         }
  
         if ( floatit
@@ -13382,9 +13703,11 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
      SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
      SV *const utf8_buffer = filter;
      IV status = IoPAGE(filter);
-    const bool reverse = (bool) IoLINES(filter);
+    const bool reverse = cBOOL(IoLINES(filter));
      I32 retval;
  
+    PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
+
      /* As we're automatically added, at the lowest level, and hence only called
         from this file, we can be sure that we're not called in block mode. Hence
         don't bother writing code to deal with block mode.  */
@@ -13498,6 +13821,8 @@ S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
  {
      SV *filter = filter_add(S_utf16_textfilter, NULL);
  
+    PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
+
      IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
      sv_setpvs(filter, "");
      IoLINES(filter) = reversed;
@@ -13617,6 +13942,66 @@ Perl_keyword_plugin_standard(pTHX_
  }
  
  /*
+=for apidoc Amx|OP *|parse_fullstmt|U32 flags
+
+Parse a single complete Perl statement.  This may be a normal imperative
+statement, including optional label, or a declaration that has
+compile-time effect.  It is up to the caller to ensure that the dynamic
+parser state (L</PL_parser> et al) is correctly set to reflect the source
+of the code to be parsed and the lexical context for the statement.
+
+The op tree representing the statement is returned.  This may be a
+null pointer if the statement is null, for example if it was actually
+a subroutine definition (which has compile-time side effects).  If not
+null, it will be the result of a L</newSTATEOP> call, normally including
+a C<nextstate> or equivalent op.
+
+If an error occurs in parsing or compilation, in most cases a valid op
+tree (most likely null) is returned anyway.  The error is reflected in
+the parser state, normally resulting in a single exception at the top
+level of parsing which covers all the compilation errors that occurred.
+Some compilation errors, however, will throw an exception immediately.
+
+The I<flags> parameter is reserved for future use, and must always
+be zero.
+
+=cut
+*/
+
+OP *
+Perl_parse_fullstmt(pTHX_ U32 flags)
+{
+    OP *fullstmtop;
+    if (flags)
+       Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
+    ENTER;
+    SAVEVPTR(PL_eval_root);
+    PL_eval_root = NULL;
+    if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
+       qerror(Perl_mess(aTHX_ "Parse error"));
+    fullstmtop = PL_eval_root;
+    LEAVE;
+    return fullstmtop;
+}
+
+void
+Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
+{
+    PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
+    deprecate("qw(...) as parentheses");
+    force_next(')');
+    if (qwlist->op_type == OP_STUB) {
+       op_free(qwlist);
+    }
+    else {
+       start_force(PL_curforce);
+       NEXTVAL_NEXTTOKE.opval = qwlist;
+       force_next(THING);
+    }
+    force_next('(');
+}
+
+/*
   * Local variables:
   * c-indentation-style: bsd
   * c-basic-offset: 4