This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
toke.c: Split code to load _charnames.pm into own fnc
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 24df24d..2c3bbb3 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -38,7 +38,6 @@ Individual members of C<PL_parser> have their own documentation.
 #include "EXTERN.h"
 #define PERL_IN_TOKE_C
 #include "perl.h"
-#include "dquote_inline.h"
 #include "invlist_inline.h"
 
 #define new_constant(a,b,c,d,e,f,g, h) \
@@ -95,6 +94,7 @@ Individual members of C<PL_parser> have their own documentation.
     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
 
 static const char* const ident_too_long = "Identifier too long";
+static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
 
 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 
@@ -113,7 +113,7 @@ static const char* const ident_too_long = "Identifier too long";
 
 /* In variables named $^X, these are the legal values for X.
  * 1999-02-27 mjd-perl-patch@plover.com */
-#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
+#define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
 
 #define SPACE_OR_TAB(c) isBLANK_A(c)
 
@@ -1003,6 +1003,8 @@ buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 function is more convenient.
 
+=for apidoc Amnh||LEX_STUFF_UTF8
+
 =cut
 */
 
@@ -1296,6 +1298,8 @@ consumed, then it will not be discarded regardless of the flag.
 Returns true if some new text was added to the buffer, or false if the
 buffer has reached the end of the input text.
 
+=for apidoc Amnh||LEX_KEEP_PREVIOUS
+
 =cut
 */
 
@@ -1643,11 +1647,11 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
            if (must_be_last)
                proto_after_greedy_proto = TRUE;
            if (underscore) {
-               if (!strchr(";@%", *p))
+               if (!memCHRs(";@%", *p))
                    bad_proto_after_underscore = TRUE;
                underscore = FALSE;
            }
-           if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
+           if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
                bad_proto = TRUE;
            }
            else {
@@ -2011,7 +2015,7 @@ S_force_next(pTHX_ I32 type)
 static int
 S_postderef(pTHX_ int const funny, char const next)
 {
-    assert(funny == DOLSHARP || strchr("$@%&*", funny));
+    assert(funny == DOLSHARP || memCHRs("$@%&*", funny));
     if (next == '*') {
        PL_expect = XOPERATOR;
        if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
@@ -2582,6 +2586,64 @@ S_sublex_done(pTHX)
     }
 }
 
+HV *
+Perl_load_charnames(pTHX_ SV * char_name, const char * context,
+                          const STRLEN context_len, const char ** error_msg)
+{
+    /* Load the official _charnames module if not already there.  The
+     * parameters are just to give info for any error messages generated:
+     *  char_name   a name to look up which is the reason for loading this
+     *  context     'char_name' in the context in the input in which it appears
+     *  context_len how many bytes 'context' occupies
+     *  error_msg   *error_msg will be set to any error
+     *
+     *  Returns the ^H table if success; otherwise NULL */
+
+    unsigned int i;
+    HV * table;
+    SV **cvp;
+    SV * res;
+
+    PERL_ARGS_ASSERT_LOAD_CHARNAMES;
+
+    /* This loop is executed 1 1/2 times.  On the first time through, if it
+     * isn't already loaded, try loading it, and iterate just once to see if it
+     * worked.  */
+    for (i = 0; i < 2; i++) {
+        table = GvHV(PL_hintgv);                /* ^H */
+
+        if (    table
+            && (PL_hints & HINT_LOCALIZE_HH)
+            && (cvp = hv_fetchs(table, "charnames", FALSE))
+            &&  SvOK(*cvp))
+        {
+            return table;   /* Quit if already loaded */
+        }
+
+        if (i == 0) {
+            Perl_load_module(aTHX_
+                0,
+                newSVpvs("_charnames"),
+
+                /* version parameter; no need to specify it, as if we get too early
+                * a version, will fail anyway, not being able to find 'charnames'
+                * */
+                NULL,
+                newSVpvs(":full"),
+                newSVpvs(":short"),
+                NULL);
+        }
+    }
+
+    /* Here, it failed; new_constant will give appropriate error messages */
+    *error_msg = NULL;
+    res = new_constant( NULL, 0, "charnames", char_name, NULL,
+                        context, context_len, error_msg);
+    SvREFCNT_dec(res);
+
+    return NULL;
+}
+
 STATIC SV*
 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
 {
@@ -2620,13 +2682,19 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
      * doesn't have to be. */
 
+    SV* char_name;
     SV* res;
     HV * table;
     SV **cvp;
     SV *cv;
     SV *rv;
     HV *stash;
-    const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+
+    /* Points to the beginning of the \N{... so that any messages include the
+     * context of what's failing*/
+    const char* context = s - 3;
+    STRLEN context_len = e - context + 1; /* include all of \N{...} */
+
     dVAR;
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
@@ -2634,27 +2702,35 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
     assert(e >= s);
     assert(s > (char *) 3);
 
-    res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+    char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
 
-    if (!SvCUR(res)) {
-        SvREFCNT_dec_NN(res);
+    if (!SvCUR(char_name)) {
+        SvREFCNT_dec_NN(char_name);
         /* diag_listed_as: Unknown charname '%s' */
         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
         return NULL;
     }
 
-    res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
-                        /* include the <}> */
-                        e - backslash_ptr + 1, error_msg);
-    if (! SvPOK(res)) {
-        SvREFCNT_dec_NN(res);
+    /* Autoload the charnames module */
+
+    table = load_charnames(char_name, context, context_len, error_msg);
+    if (table == NULL) {
+        return NULL;
+    }
+
+    *error_msg = NULL;
+    res = new_constant( NULL, 0, "charnames", char_name, NULL,
+                        context, context_len, error_msg);
+    if (*error_msg) {
+        *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
+
+        SvREFCNT_dec(res);
         return NULL;
     }
 
     /* See if the charnames handler is the Perl core's, and if so, we can skip
      * the validation needed for a user-supplied one, as Perl's does its own
      * validation. */
-    table = GvHV(PL_hintgv);            /* ^H */
     cvp = hv_fetchs(table, "charnames", FALSE);
     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
@@ -2751,7 +2827,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
         *error_msg = Perl_form(aTHX_
             "charnames alias definitions may not contain trailing "
             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
-            (int)(s - backslash_ptr + 1), backslash_ptr,
+            (int)(s - context + 1), context,
             (int)(e - s + 1), s + 1);
         return NULL;
     }
@@ -2771,7 +2847,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
                                immediately after '%s' */
             *error_msg = Perl_form(aTHX_
                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
-                 (int) (e - backslash_ptr + 1), backslash_ptr,
+                 (int) context_len, context,
                  (int) ((char *) first_bad_char_loc - str), str);
             return NULL;
         }
@@ -2787,7 +2863,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
                            in \N{%s} */
         *error_msg = Perl_form(aTHX_
             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
-            (int)(s - backslash_ptr + 1), backslash_ptr,
+            (int)(s - context + 1), context,
             (int)(e - s + 1), s + 1);
         return NULL;
     }
@@ -2799,7 +2875,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
         *error_msg = Perl_form(aTHX_
             "charnames alias definitions may not contain a sequence of "
             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
-            (int)(s - backslash_ptr + 1), backslash_ptr,
+            (int)(s - context + 1), context,
             (int)(e - s + 1), s + 1);
         return NULL;
 }
@@ -3441,7 +3517,7 @@ S_scan_const(pTHX_ char *start)
             {
                break;
             }
-           if (strchr(":'{$", s[1]))
+           if (memCHRs(":'{$", s[1]))
                break;
            if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
                break; /* in regexp, neither @+ nor @- are interpolated */
@@ -3451,7 +3527,7 @@ S_scan_const(pTHX_ char *start)
        else if (*s == '$') {
            if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
                break;
-           if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
+           if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
                if (s[1] == '\\') {
                    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
                                   "Possible unintended interpolation of $\\ in regex");
@@ -3488,7 +3564,7 @@ S_scan_const(pTHX_ char *start)
            }
 
            /* string-change backslash escapes */
-           if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
+           if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
                --s;
                break;
            }
@@ -3530,15 +3606,18 @@ S_scan_const(pTHX_ char *start)
            case '0': case '1': case '2': case '3':
            case '4': case '5': case '6': case '7':
                {
-                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                              | PERL_SCAN_NOTIFY_ILLDIGIT;
                     STRLEN len = 3;
-                   uv = grok_oct(s, &len, &flags, NULL);
-                   s += len;
-                    if (len < 3 && s < send && isDIGIT(*s)
+                    uv = grok_oct(s, &len, &flags, NULL);
+                    s += len;
+                    if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
+                        && s < send
+                        && isDIGIT(*s)  /* like \08, \178 */
                         && ckWARN(WARN_MISC))
                     {
-                        Perl_warner(aTHX_ packWARN(WARN_MISC),
-                                    "%s", form_short_octal_warning(s, len));
+                        Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
+                            form_alien_digit_msg(8, len, s, send, UTF, FALSE));
                     }
                }
                goto NUM_ESCAPE_INSERT;
@@ -3548,14 +3627,13 @@ S_scan_const(pTHX_ char *start)
                {
                    const char* error;
 
-                   bool valid = grok_bslash_o(&s, send,
+                   if (! grok_bslash_o(&s, send,
                                                &uv, &error,
-                                               TRUE, /* Output warning */
+                                               NULL,
                                                FALSE, /* Not strict */
-                                               TRUE, /* Output warnings for
-                                                         non-portables */
-                                               UTF);
-                   if (! valid) {
+                                               FALSE, /* No illegal cp's */
+                                               UTF))
+                    {
                        yyerror(error);
                        uv = 0; /* drop through to ensure range ends are set */
                    }
@@ -3567,14 +3645,13 @@ S_scan_const(pTHX_ char *start)
                {
                    const char* error;
 
-                   bool valid = grok_bslash_x(&s, send,
+                   if (! grok_bslash_x(&s, send,
                                                &uv, &error,
-                                               TRUE, /* Output warning */
+                                               NULL,
                                                FALSE, /* Not strict */
-                                               TRUE,  /* Output warnings for
-                                                         non-portables */
-                                               UTF);
-                   if (! valid) {
+                                               FALSE, /* No illegal cp's */
+                                               UTF))
+                    {
                        yyerror(error);
                        uv = 0; /* drop through to ensure range ends are set */
                    }
@@ -3640,7 +3717,10 @@ S_scan_const(pTHX_ char *start)
                             d = SvCUR(sv) + SvGROW(sv, needed);
                         }
 
-                       d = (char*)uvchr_to_utf8((U8*)d, uv);
+                       d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
+                                                   (ckWARN(WARN_PORTABLE))
+                                                   ? UNICODE_WARN_PERL_EXTENDED
+                                                   : 0);
                    }
                }
 #ifdef EBCDIC
@@ -3740,13 +3820,23 @@ S_scan_const(pTHX_ char *start)
                    }
                    else {  /* Not a pattern: convert the hex to string */
                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                               | PERL_SCAN_SILENT_ILLDIGIT
-                               | PERL_SCAN_DISALLOW_PREFIX;
+                                 | PERL_SCAN_SILENT_ILLDIGIT
+                                 | PERL_SCAN_SILENT_OVERFLOW
+                                 | PERL_SCAN_DISALLOW_PREFIX;
                         STRLEN len = e - s;
+
                         uv = grok_hex(s, &len, &flags, NULL);
                         if (len == 0 || (len != (STRLEN)(e - s)))
                             goto bad_NU;
 
+                        if (    uv > MAX_LEGAL_CP
+                            || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
+                        {
+                            yyerror(form_cp_too_large_msg(16, s, len, 0));
+                            uv = 0; /* drop through to ensure range ends are
+                                       set */
+                        }
+
                          /* For non-tr///, if the destination is not in utf8,
                           * unconditionally recode it to be so.  This is
                           * because \N{} implies Unicode semantics, and scalars
@@ -3785,7 +3875,10 @@ S_scan_const(pTHX_ char *start)
                            *d++ = (char) LATIN1_TO_NATIVE(uv);
                        }
                        else {
-                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
+                                                   (ckWARN(WARN_PORTABLE))
+                                                   ? UNICODE_WARN_PERL_EXTENDED
+                                                   : 0);
                         }
                    }
                }
@@ -3991,7 +4084,14 @@ S_scan_const(pTHX_ char *start)
            case 'c':
                s++;
                if (s < send) {
-                   *d++ = grok_bslash_c(*s, 1);
+                    const char * message;
+
+                   if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
+                        yyerror(message);
+                        yyquit();   /* Have always immediately croaked on
+                                       errors in this */
+                    }
+                   d++;
                }
                else {
                    yyerror("Missing control char name in \\c");
@@ -4201,7 +4301,7 @@ S_intuit_more(pTHX_ char *s, char *e)
     if (*s == '-' && s[1] == '>'
      && FEATURE_POSTDEREF_QQ_IS_ENABLED
      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
-       ||(s[2] == '@' && strchr("*[{",s[3])) ))
+       ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
        return TRUE;
     if (*s != '{' && *s != '[')
        return FALSE;
@@ -4266,9 +4366,9 @@ S_intuit_more(pTHX_ char *s, char *e)
                }
                else if (*s == '$'
                          && s[1]
-                         && strchr("[#!%*<>()-=",s[1]))
+                         && memCHRs("[#!%*<>()-=",s[1]))
                 {
-                   if (/*{*/ strchr("])} =",s[2]))
+                   if (/*{*/ memCHRs("])} =",s[2]))
                        weight -= 10;
                    else
                        weight -= 1;
@@ -4277,11 +4377,11 @@ S_intuit_more(pTHX_ char *s, char *e)
            case '\\':
                un_char = 254;
                if (s[1]) {
-                   if (strchr("wds]",s[1]))
+                   if (memCHRs("wds]",s[1]))
                        weight += 100;
                    else if (seen[(U8)'\''] || seen[(U8)'"'])
                        weight += 1;
-                   else if (strchr("rnftbxcav",s[1]))
+                   else if (memCHRs("rnftbxcav",s[1]))
                        weight += 40;
                    else if (isDIGIT(s[1])) {
                        weight += 40;
@@ -4295,9 +4395,9 @@ S_intuit_more(pTHX_ char *s, char *e)
            case '-':
                if (s[1] == '\\')
                    weight += 50;
-               if (strchr("aA01! ",last_un_char))
+               if (memCHRs("aA01! ",last_un_char))
                    weight += 30;
-               if (strchr("zZ79~",s[1]))
+               if (memCHRs("zZ79~",s[1]))
                    weight += 30;
                if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
                    weight -= 5;        /* cope with negative subscript */
@@ -4725,10 +4825,10 @@ S_tokenize_use(pTHX_ int is_use, char *s) {
 STATIC bool
 S_word_takes_any_delimiter(char *p, STRLEN len)
 {
-    return (len == 1 && strchr("msyq", p[0]))
+    return (len == 1 && memCHRs("msyq", p[0]))
             || (len == 2
                 && ((p[0] == 't' && p[1] == 'r')
-                    || (p[0] == 'q' && strchr("qwxr", p[1]))));
+                    || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
 }
 
 static void
@@ -4743,7 +4843,7 @@ S_check_scalar_slice(pTHX_ char *s)
        return;
     }
     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
-           || (*s && strchr(" \t$#+-'\"", *s)))
+           || (*s && memCHRs(" \t$#+-'\"", *s)))
     {
         s += UTF ? UTF8SKIP(s) : 1;
     }
@@ -4791,7 +4891,7 @@ yyl_sigvar(pTHX_ char *s)
     case '@':
     case '%':
         /* spot stuff that looks like an prototype */
-        if (strchr("$:@%&*;\\[]", *s)) {
+        if (memCHRs("$:@%&*;\\[]", *s)) {
             yyerror("Illegal character following sigil in a subroutine signature");
             break;
         }
@@ -4819,7 +4919,7 @@ yyl_sigvar(pTHX_ char *s)
         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
          * as the ASSIGNOP, and exclude other tokens that start with =
          */
-        if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
+        if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
             /* save now to report with the same context as we did when
              * all ASSIGNOPS were accepted */
             PL_oldbufptr = s;
@@ -4882,7 +4982,7 @@ yyl_dollar(pTHX_ char *s)
 
     if (   s[1] == '#'
         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
-            || strchr("{$:+-@", s[2])))
+            || memCHRs("{$:+-@", s[2])))
     {
         PL_tokenbuf[0] = '@';
         s = scan_ident(s + 1, PL_tokenbuf + 1,
@@ -4930,13 +5030,40 @@ yyl_dollar(pTHX_ char *s)
                 if (ckWARN(WARN_SYNTAX)) {
                     char *t = s+1;
 
-                    while (   isSPACE(*t)
-                           || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
-                           || *t == '$')
-                    {
-                        t += UTF ? UTF8SKIP(t) : 1;
+                    while ( t < PL_bufend ) {
+                        if (isSPACE(*t)) {
+                            do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
+                            /* consumed one or more space chars */
+                        } else if (*t == '$' || *t == '@') {
+                            /* could be more than one '$' like $$ref or @$ref */
+                            do { t++; } while (t < PL_bufend && *t == '$');
+
+                            /* could be an abigail style identifier like $ foo */
+                            while (t < PL_bufend && *t == ' ') t++;
+
+                            /* strip off the name of the var */
+                            while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+                                t += UTF ? UTF8SKIP(t) : 1;
+                            /* consumed a varname */
+                        } else if (isDIGIT(*t)) {
+                            /* deal with hex constants like 0x11 */
+                            if (t[0] == '0' && t[1] == 'x') {
+                                t += 2;
+                                while (t < PL_bufend && isXDIGIT(*t)) t++;
+                            } else {
+                                /* deal with decimal/octal constants like 1 and 0123 */
+                                do { t++; } while (isDIGIT(*t));
+                                if (t<PL_bufend && *t == '.') {
+                                    do { t++; } while (isDIGIT(*t));
+                                }
+                            }
+                            /* consumed a number */
+                        } else {
+                            /* not a var nor a space nor a number */
+                            break;
+                        }
                     }
-                    if (*t++ == ',') {
+                    if (t < PL_bufend && *t++ == ',') {
                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
                         while (t < PL_bufend && *t != ']')
                             t++;
@@ -4983,9 +5110,9 @@ yyl_dollar(pTHX_ char *s)
             const bool islop = (PL_last_lop == PL_oldoldbufptr);
             if (!islop || PL_last_lop_op == OP_GREPSTART)
                 PL_expect = XOPERATOR;
-            else if (strchr("$@\"'`q", *s))
+            else if (memCHRs("$@\"'`q", *s))
                 PL_expect = XTERM;             /* e.g. print $fh "foo" */
-            else if (   strchr("&*<%", *s)
+            else if (   memCHRs("&*<%", *s)
                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
             {
                 PL_expect = XTERM;             /* e.g. print $fh &sub */
@@ -5459,7 +5586,7 @@ yyl_hyphen(pTHX_ char *s)
             s = skipspace(s);
             if (((*s == '$' || *s == '&') && s[1] == '*')
               ||(*s == '$' && s[1] == '#' && s[2] == '*')
-              ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
+              ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
              )
             {
@@ -5955,7 +6082,7 @@ yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
                     }
                     term = *t;
                     open = term;
-                    if (term && (tmps = strchr("([{< )]}> )]}>",term)))
+                    if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
                         term = tmps[5];
                     close = term;
                     if (open == close)
@@ -6964,7 +7091,7 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len)
              */
             if (d && *s != '#') {
                 const char *c = ipath;
-                while (*c && !strchr("; \t\r\n\f\v#", *c))
+                while (*c && !memCHRs("; \t\r\n\f\v#", *c))
                     c++;
                 if (c < d)
                     d = NULL;  /* "perl" not in first word; ignore */
@@ -7800,6 +7927,11 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct
     case KEY_ioctl:
         LOP(OP_IOCTL,XTERM);
 
+    case KEY_isa:
+        Perl_ck_warner_d(aTHX_
+            packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
+        Rop(OP_ISA);
+
     case KEY_join:
         LOP(OP_JOIN,XTERM);
 
@@ -7907,7 +8039,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct
             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
             for (t=d; isSPACE(*t);)
                 t++;
-            if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
+            if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
                 /* [perl #16184] */
                 && !(t[0] == '=' && t[1] == '>')
                 && !(t[0] == ':' && t[1] == ':')
@@ -8724,7 +8856,7 @@ yyl_try(pTHX_ char *s, STRLEN len)
            if (tmp == '~')
                PMop(OP_MATCH);
            if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
-               && strchr("+-*/%.^&|<",tmp))
+               && memCHRs("+-*/%.^&|<",tmp))
                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                            "Reversed %c= operator",(int)tmp);
            s--;
@@ -9470,7 +9602,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
             * block / parens, boolean operators (&&, ||, //) and branch
             * constructs (or, and, if, until, unless, while, err, for).
             * Not a very solid hack... */
-           if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
+           if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                            "%s (...) interpreted as function",name);
        }
@@ -9532,75 +9664,31 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
     SV **cvp;
     SV *cv, *typesv;
     const char *why1 = "", *why2 = "", *why3 = "";
+    const char * optional_colon = ":";  /* Only some messages have a colon */
+    char *msg;
 
     PERL_ARGS_ASSERT_NEW_CONSTANT;
     /* We assume that this is true: */
-    if (*key == 'c') { assert (strEQ(key, "charnames")); }
     assert(type || s);
 
     sv_2mortal(sv);                    /* Parent created it permanently */
-    if (!table
-       || ! (PL_hints & HINT_LOCALIZE_HH)
-       || ! (cvp = hv_fetch(table, key, keylen, FALSE))
-       || ! SvOK(*cvp))
+
+    if (   ! table
+       || ! (PL_hints & HINT_LOCALIZE_HH))
     {
-       char *msg;
-
-       /* Here haven't found what we're looking for.  If it is charnames,
-        * perhaps it needs to be loaded.  Try doing that before giving up */
-       if (*key == 'c') {
-           Perl_load_module(aTHX_
-                           0,
-                           newSVpvs("_charnames"),
-                            /* version parameter; no need to specify it, as if
-                             * we get too early a version, will fail anyway,
-                             * not being able to find '_charnames' */
-                           NULL,
-                           newSVpvs(":full"),
-                           newSVpvs(":short"),
-                           NULL);
-            assert(sp == PL_stack_sp);
-           table = GvHV(PL_hintgv);
-           if (table
-               && (PL_hints & HINT_LOCALIZE_HH)
-               && (cvp = hv_fetch(table, key, keylen, FALSE))
-               && SvOK(*cvp))
-           {
-               goto now_ok;
-           }
-       }
-       if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
-           msg = Perl_form(aTHX_
-                              "Constant(%.*s) unknown",
-                               (int)(type ? typelen : len),
-                               (type ? type: s));
-       }
-       else {
-            why1 = "$^H{";
-            why2 = key;
-            why3 = "} is not defined";
-        report:
-            if (*key == 'c') {
-                msg = Perl_form(aTHX_
-                            /* The +3 is for '\N{'; -4 for that, plus '}' */
-                            "Unknown charname '%.*s'", (int)typelen - 4, type + 3
-                      );
-            }
-            else {
-                msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
-                                    (int)(type ? typelen : len),
-                                    (type ? type: s), why1, why2, why3);
-            }
-        }
-        if (error_msg) {
-            *error_msg = msg;
-        }
-        else {
-            yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
-        }
-       return SvREFCNT_inc_simple_NN(sv);
+        why1 = "unknown";
+        optional_colon = "";
+        goto report;
+    }
+
+    cvp = hv_fetch(table, key, keylen, FALSE);
+    if (!cvp || !SvOK(*cvp)) {
+        why1 = "$^H{";
+        why2 = key;
+        why3 = "} is not defined";
+        goto report;
     }
-  now_ok:
+
     cv = *cvp;
     if (!pv && s)
        pv = newSVpvn_flags(s, len, SVs_TEMP);
@@ -9645,16 +9733,31 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
     LEAVE ;
     POPSTACK;
 
-    if (!SvOK(res)) {
-       why1 = "Call to &{$^H{";
-       why2 = key;
-       why3 = "}} did not return a defined value";
-       sv = res;
-       (void)sv_2mortal(sv);
-       goto report;
+    if (SvOK(res)) {
+        return res;
     }
 
-    return res;
+    sv = res;
+    (void)sv_2mortal(sv);
+
+    why1 = "Call to &{$^H{";
+    why2 = key;
+    why3 = "}} did not return a defined value";
+
+  report:
+
+    msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
+                        (int)(type ? typelen : len),
+                        (type ? type: s),
+                        optional_colon,
+                        why1, why2, why3);
+    if (error_msg) {
+        *error_msg = msg;
+    }
+    else {
+        yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+    }
+    return SvREFCNT_inc_simple_NN(sv);
 }
 
 PERL_STATIC_INLINE void
@@ -9712,11 +9815,11 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
     }
     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
-        char *d;
+        char *this_d;
        char *d2;
-        Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
-        d2 = d;
-        SAVEFREEPV(d);
+        Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
+        d2 = this_d;
+        SAVEFREEPV(this_d);
         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                          "Old package separator used in string");
         if (olds[-1] == '#')
@@ -9732,7 +9835,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
         }
         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
-                          UTF8fARG(is_utf8, d2-d, d));
+                          UTF8fARG(is_utf8, d2-this_d, this_d));
     }
     return;
 }
@@ -9789,12 +9892,17 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 
     if (isSPACE(*s) || !*s)
        s = skipspace(s);
-    if (isDIGIT(*s)) {
-       while (isDIGIT(*s)) {
-           if (d >= e)
-               Perl_croak(aTHX_ "%s", ident_too_long);
-           *d++ = *s++;
-       }
+    if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
+        bool is_zero= *s == '0' ? TRUE : FALSE;
+        char *digit_start= d;
+        *d++ = *s++;
+        while (s < PL_bufend && isDIGIT(*s)) {
+            if (d >= e)
+                Perl_croak(aTHX_ "%s", ident_too_long);
+            *d++ = *s++;
+        } 
+        if (is_zero && d - digit_start > 1)
+            Perl_croak(aTHX_ ident_var_zero_multi_digit);
     }
     else {  /* See if it is a "normal" identifier */
         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
@@ -9846,6 +9954,19 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
         }
         else {
             *d = *s++;
+            /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
+            if (isDIGIT(*d)) {
+                bool is_zero= *d == '0' ? TRUE : FALSE;
+                char *digit_start= d;
+                while (s < PL_bufend && isDIGIT(*s)) {
+                    d++;
+                    if (d >= e)
+                        Perl_croak(aTHX_ "%s", ident_too_long);
+                    *d= *s++;
+                }
+                if (is_zero && d - digit_start > 1)
+                    Perl_croak(aTHX_ ident_var_zero_multi_digit);
+            }
             d[1] = '\0';
         }
     }
@@ -11012,7 +11133,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
     }
     else {
        termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
-        if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+        if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
                                            (U8 *) s,
                                            (U8 *) PL_bufend,
                                                   termcode)))
@@ -11084,7 +11205,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
                     {
                         if (   UTF
-                            && UNLIKELY(! _is_grapheme((U8 *) start,
+                            && UNLIKELY(! is_grapheme((U8 *) start,
                                                        (U8 *) s,
                                                        (U8 *) PL_bufend,
                                                               termcode)))
@@ -11744,7 +11865,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
        /* read exponent part, if present */
        if ((isALPHA_FOLD_EQ(*s, 'e')
               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
-            && strchr("+-0123456789_", s[1]))
+            && memCHRs("+-0123456789_", s[1]))
         {
             int exp_digits = 0;
             const char *save_s = s;
@@ -12346,7 +12467,7 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
     while (1) {
        STRLEN chars;
        STRLEN have;
-       I32 newlen;
+       Size_t newlen;
        U8 *end;
        /* First, look in our buffer of existing UTF-8 data:  */
        char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
@@ -12710,7 +12831,10 @@ normally resulting in a single exception at the top level of parsing
 which covers all the compilation errors that occurred.  Some compilation
 errors, however, will throw an exception immediately.
 
+=for apidoc Amnh||PARSE_OPTIONAL
+
 =cut
+
 */
 
 OP *