This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH] valgrind and /#/x
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 4e5a977..aef667e 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,6 +1,7 @@
 /*    toke.c
  *
- *    Copyright (c) 1991-2003, Larry Wall
+ *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+ *    2000, 2001, 2002, 2003, by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -681,7 +682,7 @@ S_check_uni(pTHX)
         char ch = *s;
         *s = '\0';
         Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                  "Warning: Use of \"%s\" without parens is ambiguous",
+                  "Warning: Use of \"%s\" without parentheses is ambiguous",
                   PL_last_uni);
         *s = ch;
     }
@@ -785,6 +786,8 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
        }
        PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
        PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
+       if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
+           SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
        force_next(token);
     }
     return s;
@@ -1321,7 +1324,7 @@ S_scan_const(pTHX_ char *start)
           except for the last char, which will be done separately. */
        else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
            if (s[2] == '#') {
-               while (s < send && *s != ')')
+               while (s+1 < send && *s != ')')
                    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
            }
            else if (s[2] == '{' /* This should match regcomp.c */
@@ -1340,10 +1343,8 @@ S_scan_const(pTHX_ char *start)
                        count--;
                    regparse++;
                }
-               if (*regparse != ')') {
+               if (*regparse != ')')
                    regparse--;         /* Leave one char for continuation. */
-                   yyerror("Sequence (?{...}) not terminated or not {}-balanced");
-               }
                while (s < regparse)
                    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
            }
@@ -2141,7 +2142,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
 #ifdef DEBUGGING
     static char* exp_name[] =
        { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
-         "ATTRTERM", "TERMBLOCK"
+         "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
        };
 #endif
 
@@ -2703,7 +2704,9 @@ Perl_yylex(pTHX)
                    else
                        newargv = PL_origargv;
                    newargv[0] = ipath;
+                   PERL_FPU_PRE_EXEC
                    PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
+                   PERL_FPU_POST_EXEC
                    Perl_croak(aTHX_ "Can't exec %s", ipath);
                }
 #endif
@@ -2726,6 +2729,14 @@ Perl_yylex(pTHX)
                            }
                            d = moreswitches(d);
                        } while (d);
+                       if (PL_doswitches && !switches_done) {
+                           int argc = PL_origargc;
+                           char **argv = PL_origargv;
+                           do {
+                               argc--,argv++;
+                           } while (argc && argv[0][0] == '-' && argv[0][1]);
+                           init_argv_symbols(argc,argv);
+                       }
                        if ((PERLDB_LINE && !oldpdb) ||
                            ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
                              /* if we have already added "LINE: while (<>) {",
@@ -2939,8 +2950,6 @@ Perl_yylex(pTHX)
        PL_tokenbuf[0] = '%';
        s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
        if (!PL_tokenbuf[1]) {
-           if (s == PL_bufend)
-               yyerror("Final % should be \\% or %name");
            PREREF('%');
        }
        PL_pending_ident = '%';
@@ -3088,6 +3097,7 @@ Perl_yylex(pTHX)
            PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
        else
            PL_expect = XTERM;
+       s = skipspace(s);
        TOKEN('(');
     case ';':
        CLINE;
@@ -3209,12 +3219,17 @@ Perl_yylex(pTHX)
                            || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
                                && !isALNUM(*t))))
                    {
+                       /* skip q//-like construct */
                        char *tmps;
                        char open, close, term;
                        I32 brackets = 1;
 
                        while (t < PL_bufend && isSPACE(*t))
                            t++;
+                       /* check for q => */
+                       if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
+                           OPERATOR(HASHBRACK);
+                       }
                        term = *t;
                        open = term;
                        if (term && (tmps = strchr("([{< )]}> )]}>",term)))
@@ -3227,7 +3242,7 @@ Perl_yylex(pTHX)
                                else if (*t == open)
                                    break;
                            }
-                       else
+                       else {
                            for (t++; t < PL_bufend; t++) {
                                if (*t == '\\' && t+1 < PL_bufend)
                                    t++;
@@ -3236,8 +3251,13 @@ Perl_yylex(pTHX)
                                else if (*t == open)
                                    brackets++;
                            }
+                       }
+                       t++;
                    }
-                   t++;
+                   else
+                       /* skip plain q word */
+                       while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+                            t += UTF8SKIP(t);
                }
                else if (isALNUM_lazy_if(t,UTF)) {
                    t += UTF8SKIP(t);
@@ -3537,9 +3557,7 @@ Perl_yylex(pTHX)
                    }
                }
                else {
-                   GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
-                   if (gv && GvCVu(gv))
-                       PL_expect = XTERM;      /* e.g. print $fh subr() */
+                   PL_expect = XTERM;          /* e.g. print $fh subr() */
                }
            }
            else if (isDIGIT(*s))
@@ -3563,8 +3581,6 @@ Perl_yylex(pTHX)
        PL_tokenbuf[0] = '@';
        s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
        if (!PL_tokenbuf[1]) {
-           if (s == PL_bufend)
-               yyerror("Final @ should be \\@ or @name");
            PREREF('@');
        }
        if (PL_lex_state == LEX_NORMAL)
@@ -4074,6 +4090,8 @@ Perl_yylex(pTHX)
                            TERM(FUNC0SUB);
                        if (strEQ(proto, "$"))
                            OPERATOR(UNIOPSUB);
+                       while (*proto == ';')
+                           proto++;
                        if (*proto == '&' && *s == '{') {
                            sv_setpv(PL_subname, PL_curstash ? 
                                        "__ANON__" : "__ANON__::__ANON__");
@@ -6687,8 +6705,12 @@ retval:
        Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
     }
     SvREFCNT_dec(herewas);
-    if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
-       SvUTF8_on(tmpstr);
+    if (!IN_BYTES) {
+       if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+           SvUTF8_on(tmpstr);
+       else if (PL_encoding)
+           sv_recode_to_utf8(tmpstr, PL_encoding);
+    }
     PL_lex_stuff = tmpstr;
     yylval.ival = op_type;
     return s;
@@ -6972,7 +6994,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
                    goto read_more_line;
                else {
                    /* handle quoted delimiters */
-                   if (*(svlast-1) == '\\') {
+                   if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
                        char *t;
                        for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
                            t--;
@@ -7936,3 +7958,89 @@ utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 }
 #endif
 
+/*
+Returns a pointer to the next character after the parsed
+vstring, as well as updating the passed in sv.
+
+Function must be called like
+
+       sv = NEWSV(92,5);
+       s = scan_vstring(s,sv);
+
+The sv should already be large enough to store the vstring
+passed in, for performance reasons.
+
+*/
+
+char *
+Perl_scan_vstring(pTHX_ char *s, SV *sv)
+{
+    char *pos = s;
+    char *start = s;
+    if (*pos == 'v') pos++;  /* get past 'v' */
+    while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
+       pos++;
+    if ( *pos != '.') {
+       /* this may not be a v-string if followed by => */
+       char *next = pos;
+       while (next < PL_bufend && isSPACE(*next))
+           ++next;
+       if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
+           /* return string not v-string */
+           sv_setpvn(sv,(char *)s,pos-s);
+           return pos;
+       }
+    }
+
+    if (!isALPHA(*pos)) {
+       UV rev;
+       U8 tmpbuf[UTF8_MAXLEN+1];
+       U8 *tmpend;
+
+       if (*s == 'v') s++;  /* get past 'v' */
+
+       sv_setpvn(sv, "", 0);
+
+       for (;;) {
+           rev = 0;
+           {
+               /* this is atoi() that tolerates underscores */
+               char *end = pos;
+               UV mult = 1;
+               while (--end >= s) {
+                   UV orev;
+                   if (*end == '_')
+                       continue;
+                   orev = rev;
+                   rev += (*end - '0') * mult;
+                   mult *= 10;
+                   if (orev > rev && ckWARN_d(WARN_OVERFLOW))
+                       Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
+                                   "Integer overflow in decimal number");
+               }
+           }
+#ifdef EBCDIC
+           if (rev > 0x7FFFFFFF)
+                Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
+#endif
+           /* Append native character for the rev point */
+           tmpend = uvchr_to_utf8(tmpbuf, rev);
+           sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
+           if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+                SvUTF8_on(sv);
+           if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
+                s = ++pos;
+           else {
+                s = pos;
+                break;
+           }
+           while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
+                pos++;
+       }
+       SvPOK_on(sv);
+       sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
+       SvRMAGICAL_on(sv);
+    }
+    return s;
+}
+