This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[PATCH] update perlfaq to version 5.021009
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 9a01103..30a9061 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -399,7 +399,7 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
        }
        if (name)
            Perl_sv_catpv(aTHX_ report, name);
-       else if ((char)rv > ' ' && (char)rv <= '~')
+       else if (isGRAPH(rv))
        {
            Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
            if ((char)rv == 'p')
@@ -730,7 +730,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
     parser->last_lop = parser->last_uni = NULL;
 
-    assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+    STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
                                                         |LEX_DONT_CLOSE_RSFP));
     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
                                                         |LEX_DONT_CLOSE_RSFP));
@@ -1968,7 +1968,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
     SV * const sv = newSVpvn_utf8(start, len,
                                  !IN_BYTES
                                  && UTF
-                                 && !is_ascii_string((const U8*)start, len)
+                                 && !is_invariant_string((const U8*)start, len)
                                  && is_utf8_string((const U8*)start, len));
     return sv;
 }
@@ -2005,9 +2005,10 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
        s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
        if (check_keyword) {
          char *s2 = PL_tokenbuf;
+         STRLEN len2 = len;
          if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
-           s2 += 6, len -= 6;
-         if (keyword(s2, len, 0))
+           s2 += 6, len2 -= 6;
+         if (keyword(s2, len2, 0))
            return start;
        }
        if (token == METHOD) {
@@ -2457,7 +2458,7 @@ S_sublex_done(pTHX)
                 + PL_parser->herelines;
            PL_parser->herelines = 0;
        }
-       return ',';
+       return '/';
     }
     else {
        const line_t l = CopLINE(PL_curcop);
@@ -2491,6 +2492,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
 
+    if (!SvCUR(res))
+        return res;
+
     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
                                      e - backslash_ptr,
                                      &first_bad_char_loc))
@@ -3282,34 +3286,68 @@ S_scan_const(pTHX_ char *start)
 
                if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+                               | PERL_SCAN_SILENT_ILLDIGIT
                                | PERL_SCAN_DISALLOW_PREFIX;
                    STRLEN len;
 
                    s += 2;         /* Skip to next char after the 'U+' */
                    len = e - s;
                    uv = grok_hex(s, &len, &flags, NULL);
-                   if (len == 0 || len != (STRLEN)(e - s)) {
+                   if (len == 0
+                    || (  len != (STRLEN)(e - s) && s[len] != '.'
+                       && PL_lex_inpat))
+                   {
+                     bad_NU:
                        yyerror("Invalid hexadecimal number in \\N{U+...}");
                        s = e + 1;
                        continue;
                    }
 
                    if (PL_lex_inpat) {
-                       s -= 5;     /* Include the '\N{U+' */
 #ifdef EBCDIC
+                       s -= 5;     /* Include the '\N{U+' */
                         /* On EBCDIC platforms, in \N{U+...}, the '...' is a
                          * Unicode value, so convert to native so downstream
                          * code can continue to assume it's native */
+                        /* XXX This should be in the regexp parser,
+                               because doing it here makes /\N{U+41}/ and
+                               =~ '\N{U+41}' do different things.  */
                        d += my_snprintf(d, e - s + 1 + 1,  /* includes the '}'
                                                               and the \0 */
-                                         "\\N{U+%X}",
+                                         "\\N{U+%X",
                                          (unsigned int) UNI_TO_NATIVE(uv));
+                        s += 5 + len;
+                        while (*s == '.') {
+                            s++;
+                            len = e - s;
+                            uv = grok_hex(s, &len, &flags, NULL);
+                            if (!len
+                             || (len != (STRLEN)(e - s) && s[len] != '.'))
+                                goto bad_NU;
+                            s--;
+                            d += my_snprintf(
+                                     d, e - s + 1 + 1, ".%X",
+                                     (unsigned int)UNI_TO_NATIVE(uv)
+                                 );
+                            s += len + 1;
+                        }
+                        *(d++) = '}';
 #else
                         /* On non-EBCDIC platforms, pass it through unchanged.
-                         * The reason we evaluated the number above is to make
+                         * The reason we evaluate the numbers is to make
                          * sure there wasn't a syntax error. */
-                       Copy(s, d, e - s + 1, char);    /* +1 is for the '}' */
-                       d += e - s + 1;
+                        const char * const orig_s = s - 5;
+                        while (*s == '.') {
+                            s++;
+                            len = e - s;
+                            uv = grok_hex(s, &len, &flags, NULL);
+                            if (!len
+                             || (len != (STRLEN)(e - s) && s[len] != '.'))
+                                goto bad_NU;
+                        }
+                        /* +1 is for the '}' */
+                        Copy(orig_s, d, e - orig_s + 1, char);
+                        d += e - orig_s + 1;
 #endif
                    }
                    else {  /* Not a pattern: convert the hex to string */
@@ -3789,11 +3827,10 @@ S_intuit_more(pTHX_ char *s)
                    && !(last_un_char == '$' || last_un_char == '@'
                         || last_un_char == '&')
                    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
-                   char *d = tmpbuf;
+                   char *d = s;
                    while (isALPHA(*s))
-                       *d++ = *s++;
-                   *d = '\0';
-                   if (keyword(tmpbuf, d - tmpbuf, 0))
+                       s++;
+                   if (keyword(d, s - d, 0))
                        weight -= 150;
                }
                if (un_char == last_un_char + 1)
@@ -4948,7 +4985,7 @@ Perl_yylex(pTHX)
        Perl_croak(aTHX_
       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
 #endif
-    case ' ': case '\t': case '\f': case 013:
+    case ' ': case '\t': case '\f': case '\v':
        s++;
        goto retry;
     case '#':
@@ -5328,6 +5365,19 @@ Perl_yylex(pTHX)
                        sv_free(sv);
                        CvMETHOD_on(PL_compcv);
                    }
+                   else if (!PL_in_my && len == 5
+                         && strnEQ(SvPVX(sv), "const", len))
+                   {
+                       sv_free(sv);
+                       Perl_ck_warner_d(aTHX_
+                           packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+                          ":const is experimental"
+                       );
+                       CvANONCONST_on(PL_compcv);
+                       if (!CvANON(PL_compcv))
+                           yyerror(":const is not permitted on named "
+                                   "subroutines");
+                   }
                    /* After we've set the flags, it could be argued that
                       we don't need to do the attributes.pm-based setting
                       process, and shouldn't bother appending recognized
@@ -5751,30 +5801,30 @@ Perl_yylex(pTHX)
            s--;
            if (PL_expect == XSTATE && isALPHA(tmp) &&
                (s == PL_linestart+1 || s[-2] == '\n') )
-               {
-                   if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
-                       || PL_lex_state != LEX_NORMAL) {
-                       d = PL_bufend;
-                       while (s < d) {
-                           if (*s++ == '\n') {
-                               incline(s);
-                               if (strnEQ(s,"=cut",4)) {
-                                   s = strchr(s,'\n');
-                                   if (s)
-                                       s++;
-                                   else
-                                       s = d;
-                                   incline(s);
-                                   goto retry;
-                               }
-                           }
-                       }
-                       goto retry;
-                   }
-                   s = PL_bufend;
-                   PL_parser->in_pod = 1;
-                   goto retry;
-               }
+            {
+                if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
+                    || PL_lex_state != LEX_NORMAL) {
+                    d = PL_bufend;
+                    while (s < d) {
+                        if (*s++ == '\n') {
+                            incline(s);
+                            if (strnEQ(s,"=cut",4)) {
+                                s = strchr(s,'\n');
+                                if (s)
+                                    s++;
+                                else
+                                    s = d;
+                                incline(s);
+                                goto retry;
+                            }
+                        }
+                    }
+                    goto retry;
+                }
+                s = PL_bufend;
+                PL_parser->in_pod = 1;
+                goto retry;
+            }
        }
        if (PL_expect == XBLOCK) {
            const char *t = s;
@@ -6255,7 +6305,7 @@ Perl_yylex(pTHX)
            }
            /* avoid v123abc() or $h{v1}, allow C<print v10;> */
            if (!isALPHA(*start) && (PL_expect == XTERM
-                       || PL_expect == XSTATE
+                       || PL_expect == XREF || PL_expect == XSTATE
                        || PL_expect == XTERMORDORDOR)) {
                GV *const gv = gv_fetchpvn_flags(s, start - s,
                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
@@ -6392,7 +6442,7 @@ Perl_yylex(pTHX)
            char tmpbuf[sizeof PL_tokenbuf + 1];
            *tmpbuf = '&';
            Copy(PL_tokenbuf, tmpbuf+1, len, char);
-           off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+           off = pad_findmy_pvn(tmpbuf, len+1, 0);
            if (off != NOT_IN_PAD) {
                assert(off); /* we assume this is boolean-true below */
                if (PAD_COMPNAME_FLAGS_isOUR(off)) {
@@ -7881,7 +7931,7 @@ Perl_yylex(pTHX)
                    *PL_tokenbuf = '&';
                    if (memchr(tmpbuf, ':', len) || key != KEY_sub
                     || pad_findmy_pvn(
-                           PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+                           PL_tokenbuf, len + 1, 0
                        ) != NOT_IN_PAD)
                        sv_setpvn(PL_subname, tmpbuf, len);
                    else {
@@ -8182,7 +8232,7 @@ S_pending_ident(pTHX)
     if (!has_colon) {
        if (!PL_in_my)
            tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
-                                    UTF ? SVf_UTF8 : 0);
+                                 0);
         if (tmp != NOT_IN_PAD) {
             /* might be an "our" variable" */
             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
@@ -8300,7 +8350,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
                char tmpbuf[256];
                Copy(w, tmpbuf+1, s - w, char);
                *tmpbuf = '&';
-               off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0);
+               off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
                if (off != NOT_IN_PAD) return;
            }
            Perl_croak(aTHX_ "No comma allowed after %s", what);
@@ -8397,7 +8447,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
        yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
        return SvREFCNT_inc_simple_NN(sv);
     }
-now_ok:
+  now_ok:
     cv = *cvp;
     if (!pv && s)
        pv = newSVpvn_flags(s, len, SVs_TEMP);
@@ -9452,7 +9502,7 @@ S_scan_inputsymbol(pTHX_ char *start)
            /* try to find it in the pad for this block, otherwise find
               add symbol table ops
            */
-           const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+           const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
            if (tmp != NOT_IN_PAD) {
                if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
                    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
@@ -9476,7 +9526,7 @@ S_scan_inputsymbol(pTHX_ char *start)
            else {
                GV *gv;
                ++d;
-intro_sym:
+              intro_sym:
                gv = gv_fetchpv(d,
                                GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
                                SVt_PV);
@@ -10418,7 +10468,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
     /* if it starts with a v, it could be a v-string */
     case 'v':
-vstring:
+    vstring:
                sv = newSV(5); /* preallocate storage space */
                ENTER_with_name("scan_vstring");
                SAVEFREESV(sv);
@@ -10512,7 +10562,7 @@ S_scan_formline(pTHX_ char *s)
        if (needargs) {
            const char *s2 = s;
            while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
-               || *s2 == 013)
+               || *s2 == '\v')
                s2++;
            if (*s2 == '{') {
                PL_expect = XTERMBLOCK;
@@ -10553,12 +10603,11 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
     CvFLAGS(PL_compcv) |= flags;
 
     PL_subline = CopLINE(PL_curcop);
-    CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB));
+    CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
     if (outsidecv && CvPADLIST(outsidecv))
-       CvPADLIST(PL_compcv)->xpadl_outid =
-           PadlistNAMES(CvPADLIST(outsidecv));
+       CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
 
     return oldsavestack_ix;
 }
@@ -10635,7 +10684,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     }
     else if (yychar > 255)
        sv_catpvs(where_sv, "next token ???");
-    else if (yychar == -2) { /* YYEMPTY */
+    else if (yychar == YYEMPTY) {
        if (PL_lex_state == LEX_NORMAL ||
           (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
            sv_catpvs(where_sv, "at end of line");