This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perl 4.0 patch 3: Patch 1 continued
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 2b88b1a..29ee126 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,4 +1,4 @@
-/* $Header: toke.c,v 3.0.1.9 90/08/13 22:37:25 lwall Locked $
+/* $RCSfile: toke.c,v $$Revision: 4.0.1.1 $$Date: 91/04/12 09:18:18 $
  *
  *    Copyright (c) 1989, Larry Wall
  *
@@ -6,55 +6,11 @@
  *    as specified in the README file that comes with the perl 3.0 kit.
  *
  * $Log:       toke.c,v $
- * Revision 3.0.1.9  90/08/13  22:37:25  lwall
- * patch28: defined(@array) and defined(%array) didn't work right
+ * Revision 4.0.1.1  91/04/12  09:18:18  lwall
+ * patch1: perl -de "print" wouldn't stop at the first statement
  * 
- * Revision 3.0.1.8  90/08/09  05:39:58  lwall
- * patch19: added require operator
- * patch19: added -x switch to extract script from input trash
- * patch19: bare @name didn't add array to symbol table
- * patch19: Added __LINE__ and __FILE__ tokens
- * patch19: Added __END__ token
- * patch19: Numeric literals are now stored only in floating point
- * patch19: some support for FPS compiler misfunction
- * patch19: "\\$foo" not handled right
- * patch19: program and data can now both come from STDIN
- * patch19: "here" strings caused warnings about uninitialized variables
- * 
- * Revision 3.0.1.7  90/03/27  16:32:37  lwall
- * patch16: MSDOS support
- * patch16: formats didn't work inside eval
- * patch16: final semicolon in program wasn't optional with -p or -n
- * 
- * Revision 3.0.1.6  90/03/12  17:06:36  lwall
- * patch13: last semicolon of program is now optional, just for Randal
- * patch13: added splice operator: @oldelems = splice(@array,$offset,$len,LIST)
- * 
- * Revision 3.0.1.5  90/02/28  18:47:06  lwall
- * patch9: return grandfathered to never be function call
- * patch9: non-existent perldb.pl now gives reasonable error message
- * patch9: perl can now start up other interpreters scripts
- * patch9: line numbers were bogus during certain portions of foreach evaluation
- * patch9: null hereis core dumped
- * 
- * Revision 3.0.1.4  89/12/21  20:26:56  lwall
- * patch7: -d switch incompatible with -p or -n
- * patch7: " ''$foo'' " didn't parse right
- * patch7: grandfathered m'pat' and s'pat'repl' to not be package qualifiers
- * 
- * Revision 3.0.1.3  89/11/17  15:43:15  lwall
- * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
- * patch5: } misadjusted expection of subsequent term or operator
- * patch5: y/abcde// didn't work
- * 
- * Revision 3.0.1.2  89/11/11  05:04:42  lwall
- * patch2: fixed a CLINE macro conflict
- * 
- * Revision 3.0.1.1  89/10/26  23:26:21  lwall
- * patch1: disambiguated word after "sort" better
- * 
- * Revision 3.0  89/10/18  15:32:33  lwall
- * 3.0 baseline
+ * Revision 4.0  91/03/20  01:42:14  lwall
+ * 4.0 baseline.
  * 
  */
 
 #include "perl.h"
 #include "perly.h"
 
-char *reparse;         /* if non-null, scanreg found ${foo[$bar]} */
+#ifdef I_FCNTL
+#include <fcntl.h>
+#endif
+#ifdef I_SYS_FILE
+#include <sys/file.h>
+#endif
+
+/* which backslash sequences to keep in m// or s// */
+
+static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtf0123456789[{]}";
+
+char *reparse;         /* if non-null, scanident found ${foo[$bar]} */
+
+void checkcomma();
 
 #ifdef CLINE
 #undef CLINE
@@ -79,13 +48,15 @@ char *reparse;              /* if non-null, scanreg found ${foo[$bar]} */
 #define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
 #define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
 #define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
+#define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
 #define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
+#define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
+#define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
 #define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
 #define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
 #define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
 #define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
 #define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
-#define LFUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)LFUNC4)
 #define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
 #define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
 #define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
@@ -106,7 +77,7 @@ char *reparse;               /* if non-null, scanreg found ${foo[$bar]} */
 /* This does similarly for list operators, merely by pretending that the
  * paren came before the listop rather than after.
  */
-#define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
+#define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
        (*s = META('('), bufptr = oldbufptr, '(') : \
        (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
 /* grandfather return to old style */
@@ -150,6 +121,7 @@ lop(f,s)
 int f;
 char *s;
 {
+    CLINE;
     if (*s != '(')
        s = skipspace(s);
     if (*s == '(') {
@@ -192,7 +164,7 @@ yylex()
        if ((*s & 127) == '(')
            *s++ = '(';
        else
-           warn("Unrecognized character \\%03o ignored", *s++);
+           warn("Unrecognized character \\%03o ignored", *s++ & 255);
        goto retry;
     }
 #endif
@@ -201,7 +173,7 @@ yylex()
        if ((*s & 127) == '(')
            *s++ = '(';
        else
-           warn("Unrecognized character \\%03o ignored", *s++);
+           warn("Unrecognized character \\%03o ignored", *s++ & 255);
        goto retry;
     case 4:
     case 26:
@@ -215,10 +187,17 @@ yylex()
            firstline = FALSE;
            if (minus_n || minus_p || perldb) {
                str_set(linestr,"");
-               if (perldb)
-                   str_cat(linestr, "require 'perldb.pl';");
+               if (perldb) {
+                   char *getenv();
+                   char *pdb = getenv("PERLDB");
+
+                   str_cat(linestr, pdb ? pdb : "require 'perldb.pl'");
+                   str_cat(linestr, ";");
+               }
                if (minus_n || minus_p) {
                    str_cat(linestr,"line: while (<>) {");
+                   if (minus_l)
+                       str_cat(linestr,"chop;");
                    if (minus_a)
                        str_cat(linestr,"@F=split(' ');");
                }
@@ -242,13 +221,15 @@ yylex()
        do {
            if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
              fake_eof:
-               if (preprocess)
-                   (void)mypclose(rsfp);
-               else if (rsfp == stdin)
-                   clearerr(stdin);
-               else
-                   (void)fclose(rsfp);
-               rsfp = Nullfp;
+               if (rsfp) {
+                   if (preprocess)
+                       (void)mypclose(rsfp);
+                   else if (rsfp == stdin)
+                       clearerr(stdin);
+                   else
+                       (void)fclose(rsfp);
+                   rsfp = Nullfp;
+               }
                if (minus_n || minus_p) {
                    str_set(linestr,minus_p ? ";}continue{print" : "");
                    str_cat(linestr,";}");
@@ -269,7 +250,7 @@ yylex()
            STR *str = Str_new(85,0);
 
            str_sset(str,linestr);
-           astore(lineary,(int)curcmd->c_line,str);
+           astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
        }
 #ifdef DEBUG
        if (firstline) {
@@ -316,7 +297,7 @@ yylex()
            }
        }
        goto retry;
-    case ' ': case '\t': case '\f':
+    case ' ': case '\t': case '\f': case '\r': case 013:
        s++;
        goto retry;
     case '#':
@@ -332,9 +313,9 @@ yylex()
                s[strlen(s)-1] = '\0';  /* wipe out trailing quote */
            }
            if (*s)
-               filename = savestr(s);
+               curcmd->c_filestab = fstab(s);
            else
-               filename = origfilename;
+               curcmd->c_filestab = fstab(origfilename);
            oldoldbufptr = oldbufptr = s = str_get(linestr);
        }
        /* FALL THROUGH */
@@ -345,6 +326,13 @@ yylex()
                s++;
            if (s < d)
                s++;
+           if (perldb) {
+               STR *str = Str_new(85,0);
+
+               str_nset(str,linestr->str_ptr, s - linestr->str_ptr);
+               astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
+               str_chop(linestr, s);
+           }
            if (in_format) {
                bufptr = s;
                yylval.formval = load_format();
@@ -387,6 +375,9 @@ yylex()
            case 't': FTST(O_FTTTY);
            case 'T': FTST(O_FTTEXT);
            case 'B': FTST(O_FTBINARY);
+           case 'M': stabent("\024",TRUE); FTST(O_FTMTIME);
+           case 'A': stabent("\024",TRUE); FTST(O_FTATIME);
+           case 'C': stabent("\024",TRUE); FTST(O_FTCTIME);
            default:
                s -= 2;
                break;
@@ -414,7 +405,7 @@ yylex()
 
     case '*':
        if (expectterm) {
-           s = scanreg(s,bufend,tokenbuf);
+           s = scanident(s,bufend,tokenbuf);
            yylval.stabval = stabent(tokenbuf,TRUE);
            TERM(STAR);
        }
@@ -426,7 +417,7 @@ yylex()
        MOP(O_MULTIPLY);
     case '%':
        if (expectterm) {
-           s = scanreg(s,bufend,tokenbuf);
+           s = scanident(s,bufend,tokenbuf);
            yylval.stabval = hadd(stabent(tokenbuf,TRUE));
            TERM(HSH);
        }
@@ -507,8 +498,13 @@ yylex()
        tmp = *s++;
        if (tmp == '<')
            OPERATOR(LS);
-       if (tmp == '=')
+       if (tmp == '=') {
+           tmp = *s++;
+           if (tmp == '>')
+               EOP(O_NCMP);
+           s--;
            ROP(O_LE);
+       }
        s--;
        ROP(O_LT);
     case '>':
@@ -534,12 +530,12 @@ yylex()
     case '$':
        if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
            s++;
-           s = scanreg(s,bufend,tokenbuf);
+           s = scanident(s,bufend,tokenbuf);
            yylval.stabval = aadd(stabent(tokenbuf,TRUE));
            TERM(ARYLEN);
        }
        d = s;
-       s = scanreg(s,bufend,tokenbuf);
+       s = scanident(s,bufend,tokenbuf);
        if (reparse) {          /* turn ${foo[bar]} into ($foo[bar]) */
          do_reparse:
            s[-1] = ')';
@@ -553,7 +549,7 @@ yylex()
 
     case '@':
        d = s;
-       s = scanreg(s,bufend,tokenbuf);
+       s = scanident(s,bufend,tokenbuf);
        if (reparse)
            goto do_reparse;
        yylval.stabval = aadd(stabent(tokenbuf,TRUE));
@@ -600,17 +596,41 @@ yylex()
                if (d[2] == 'L')
                    (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
                else
-                   strcpy(tokenbuf, filename);
+                   strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
                arg[1].arg_type = A_SINGLE;
                arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
                TERM(RSTRING);
            }
-           else if (strEQ(d,"__END__"))
+           else if (strEQ(d,"__END__")) {
+#ifndef TAINT
+               STAB *stab;
+               int fd;
+
+               if (stab = stabent("DATA",FALSE)) {
+                   stab->str_pok |= SP_MULTI;
+                   stab_io(stab) = stio_new();
+                   stab_io(stab)->ifp = rsfp;
+#if defined(HAS_FCNTL) && defined(F_SETFD)
+                   fd = fileno(rsfp);
+                   fcntl(fd,F_SETFD,fd >= 3);
+#endif
+                   if (preprocess)
+                       stab_io(stab)->type = '|';
+                   else if (rsfp == stdin)
+                       stab_io(stab)->type = '-';
+                   else
+                       stab_io(stab)->type = '<';
+                   rsfp = Nullfp;
+               }
+#endif
                goto fake_eof;
+           }
        }
        break;
     case 'a': case 'A':
        SNARFWORD;
+       if (strEQ(d,"alarm"))
+           UNI(O_ALARM);
        if (strEQ(d,"accept"))
            FOP22(O_ACCEPT);
        if (strEQ(d,"atan2"))
@@ -637,6 +657,10 @@ yylex()
            FOP(O_CLOSE);
        if (strEQ(d,"closedir"))
            FOP(O_CLOSEDIR);
+       if (strEQ(d,"cmp"))
+           EOP(O_SCMP);
+       if (strEQ(d,"caller"))
+           UNI(O_CALLER);
        if (strEQ(d,"crypt")) {
 #ifdef FCRYPT
            init_des();
@@ -701,7 +725,7 @@ yylex()
            HFUN(O_EACH);
        if (strEQ(d,"exec")) {
            set_csh();
-           LOP(O_EXEC);
+           LOP(O_EXEC_OP);
        }
        if (strEQ(d,"endhostent"))
            FUN0(O_EHOSTENT);
@@ -834,7 +858,7 @@ yylex()
            OPERATOR(IF);
        }
        if (strEQ(d,"index"))
-           FUN2(O_INDEX);
+           FUN2x(O_INDEX);
        if (strEQ(d,"int"))
            UNI(O_INT);
        if (strEQ(d,"ioctl"))
@@ -890,8 +914,22 @@ yylex()
            else
                RETURN(1);      /* force error */
        }
-       if (strEQ(d,"mkdir"))
-           FUN2(O_MKDIR);
+       switch (d[1]) {
+       case 'k':
+           if (strEQ(d,"mkdir"))
+               FUN2(O_MKDIR);
+           break;
+       case 's':
+           if (strEQ(d,"msgctl"))
+               FUN3(O_MSGCTL);
+           if (strEQ(d,"msgget"))
+               FUN2(O_MSGGET);
+           if (strEQ(d,"msgrcv"))
+               FUN5(O_MSGRCV);
+           if (strEQ(d,"msgsnd"))
+               FUN3(O_MSGSND);
+           break;
+       }
        break;
     case 'n': case 'N':
        SNARFWORD;
@@ -944,6 +982,10 @@ yylex()
            s = scanstr(s-2);
            TERM(RSTRING);
        }
+       if (strEQ(d,"qx")) {
+           s = scanstr(s-2);
+           TERM(RSTRING);
+       }
        break;
     case 'r': case 'R':
        SNARFWORD;
@@ -964,7 +1006,7 @@ yylex()
        if (strEQ(d,"rmdir"))
            UNI(O_RMDIR);
        if (strEQ(d,"rindex"))
-           FUN2(O_RINDEX);
+           FUN2x(O_RINDEX);
        if (strEQ(d,"read"))
            FOP3(O_READ);
        if (strEQ(d,"readdir"))
@@ -996,7 +1038,11 @@ yylex()
        switch (d[1]) {
        case 'a':
        case 'b':
+           break;
        case 'c':
+           if (strEQ(d,"scalar"))
+               UNI(O_SCALAR);
+           break;
        case 'd':
            break;
        case 'e':
@@ -1004,6 +1050,12 @@ yylex()
                OPERATOR(SSELECT);
            if (strEQ(d,"seek"))
                FOP3(O_SEEK);
+           if (strEQ(d,"semctl"))
+               FUN4(O_SEMCTL);
+           if (strEQ(d,"semget"))
+               FUN3(O_SEMGET);
+           if (strEQ(d,"semop"))
+               FUN2(O_SEMOP);
            if (strEQ(d,"send"))
                FOP3(O_SEND);
            if (strEQ(d,"setpgrp"))
@@ -1033,6 +1085,14 @@ yylex()
        case 'h':
            if (strEQ(d,"shift"))
                TERM(SHIFT);
+           if (strEQ(d,"shmctl"))
+               FUN3(O_SHMCTL);
+           if (strEQ(d,"shmget"))
+               FUN3(O_SHMGET);
+           if (strEQ(d,"shmread"))
+               FUN4(O_SHMREAD);
+           if (strEQ(d,"shmwrite"))
+               FUN4(O_SHMWRITE);
            if (strEQ(d,"shutdown"))
                FOP2(O_SHUTDOWN);
            break;
@@ -1107,7 +1167,7 @@ yylex()
            break;
        case 'u':
            if (strEQ(d,"substr"))
-               FUN3(O_SUBSTR);
+               FUN2x(O_SUBSTR);
            if (strEQ(d,"sub")) {
                subline = curcmd->c_line;
                d = bufend;
@@ -1144,6 +1204,10 @@ yylex()
                FUN2(O_SYMLINK);
            if (strEQ(d,"syscall"))
                LOP(O_SYSCALL);
+           if (strEQ(d,"sysread"))
+               FOP3(O_SYSREAD);
+           if (strEQ(d,"syswrite"))
+               FOP3(O_SYSWRITE);
            break;
        case 'z':
            break;
@@ -1215,6 +1279,8 @@ yylex()
            LOP(O_WARN);
        if (strEQ(d,"wait"))
            FUN0(O_WAIT);
+       if (strEQ(d,"waitpid"))
+           FUN2(O_WAITPID);
        if (strEQ(d,"wantarray")) {
            yylval.arg = op_new(1);
            yylval.arg->arg_type = O_ITEM;
@@ -1259,31 +1325,31 @@ yylex()
     return (CLINE, bufptr = s, (int)WORD);
 }
 
-int
+void
 checkcomma(s,what)
 register char *s;
 char *what;
 {
-    char *word;
+    char *someword;
 
     if (*s == '(')
        s++;
     while (s < bufend && isascii(*s) && isspace(*s))
        s++;
     if (isascii(*s) && (isalpha(*s) || *s == '_')) {
-       word = s++;
+       someword = s++;
        while (isalpha(*s) || isdigit(*s) || *s == '_')
            s++;
        while (s < bufend && isspace(*s))
            s++;
        if (*s == ',') {
            *s = '\0';
-           word = instr(
+           someword = instr(
              "tell eof times getlogin wait length shift umask getppid \
              cos exp int log rand sin sqrt ord wantarray",
-             word);
+             someword);
            *s = ',';
-           if (word)
+           if (someword)
                return;
            fatal("No comma allowed after %s", what);
        }
@@ -1291,7 +1357,7 @@ char *what;
 }
 
 char *
-scanreg(s,send,dest)
+scanident(s,send,dest)
 register char *s;
 register char *send;
 char *dest;
@@ -1345,8 +1411,8 @@ char *dest;
        else
            d[1] = '\0';
     }
-    if (*d == '^' && !isspace(*s))
-       *d = *s++ & 31;
+    if (*d == '^' && (isupper(*s) || index("[\\]^_?",*s)))
+       *d = *s++ ^ 64;
     return s;
 }
 
@@ -1380,7 +1446,7 @@ int len;
            e = d;
            break;
        case '\\':
-           if (d[1] && index("wWbB0123456789sSdD",d[1])) {
+           if (d[1] && index("wWbB0123456789sSdDlLuUE",d[1])) {
                e = d;
                break;
            }
@@ -1399,6 +1465,12 @@ int len;
            case 'r':
                *d = '\r';
                break;
+           case 'e':
+               *d = '\033';
+               break;
+           case 'a':
+               *d = '\007';
+               break;
            }
            /* FALL THROUGH */
        default:
@@ -1428,6 +1500,7 @@ register char *s;
     register char *e;
     int len;
     SPAT savespat;
+    STR *str = Str_new(93,0);
 
     Newz(801,spat,1,SPAT);
     spat->spat_next = curstash->tbl_spatroot;  /* link into spat list */
@@ -1445,8 +1518,9 @@ register char *s;
     default:
        fatal("panic: scanpat");
     }
-    s = cpytill(tokenbuf,s,bufend,s[-1],&len);
+    s = str_append_till(str,s,bufend,s[-1],patleave);
     if (s >= bufend) {
+       str_free(str);
        yyerror("Search pattern not terminated");
        yylval.arg = Nullarg;
        return s;
@@ -1463,8 +1537,9 @@ register char *s;
            spat->spat_flags |= SPAT_KEEP;
        }
     }
-    e = tokenbuf + len;
-    for (d=tokenbuf; d < e; d++) {
+    len = str->str_cur;
+    e = str->str_ptr + len;
+    for (d = str->str_ptr; d < e; d++) {
        if (*d == '\\')
            d++;
        else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
@@ -1474,19 +1549,18 @@ register char *s;
            spat->spat_runtime = arg = op_new(1);
            arg->arg_type = O_ITEM;
            arg[1].arg_type = A_DOUBLE;
-           arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
-           arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
-           d = scanreg(d,bufend,buf);
+           arg[1].arg_ptr.arg_str = str_smake(str);
+           d = scanident(d,bufend,buf);
            (void)stabent(buf,TRUE);            /* make sure it's created */
            for (; d < e; d++) {
                if (*d == '\\')
                    d++;
                else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
-                   d = scanreg(d,bufend,buf);
+                   d = scanident(d,bufend,buf);
                    (void)stabent(buf,TRUE);
                }
                else if (*d == '@') {
-                   d = scanreg(d,bufend,buf);
+                   d = scanident(d,bufend,buf);
                    if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
                      strEQ(buf,"SIG") || strEQ(buf,"INC"))
                        (void)stabent(buf,TRUE);
@@ -1501,8 +1575,8 @@ register char *s;
 #else
        (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
 #endif
-    if (*tokenbuf == '^') {
-       spat->spat_short = scanconst(tokenbuf+1,len-1);
+    if (*str->str_ptr == '^') {
+       spat->spat_short = scanconst(str->str_ptr+1,len-1);
        if (spat->spat_short) {
            spat->spat_slen = spat->spat_short->str_cur;
            if (spat->spat_slen == len - 1)
@@ -1511,7 +1585,7 @@ register char *s;
     }
     else {
        spat->spat_flags |= SPAT_SCANFIRST;
-       spat->spat_short = scanconst(tokenbuf,len);
+       spat->spat_short = scanconst(str->str_ptr,len);
        if (spat->spat_short) {
            spat->spat_slen = spat->spat_short->str_cur;
            if (spat->spat_slen == len)
@@ -1520,7 +1594,7 @@ register char *s;
     }  
     if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
        fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
-       spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
+       spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
            spat->spat_flags & SPAT_FOLD);
                /* Note that this regexp can still be used if someone says
                 * something like /a/ && s//b/;  so we can't delete it.
@@ -1535,11 +1609,12 @@ register char *s;
 #endif
        if (spat->spat_short)
            fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
-       spat->spat_regexp = regcomp(tokenbuf,tokenbuf+len,
-           spat->spat_flags & SPAT_FOLD,1);
+       spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
+           spat->spat_flags & SPAT_FOLD);
        hoistmust(spat);
     }
   got_pat:
+    str_free(str);
     yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
     return s;
 }
@@ -1552,37 +1627,41 @@ register char *s;
     register char *d;
     register char *e;
     int len;
+    STR *str = Str_new(93,0);
 
     Newz(802,spat,1,SPAT);
     spat->spat_next = curstash->tbl_spatroot;  /* link into spat list */
     curstash->tbl_spatroot = spat;
 
-    s = cpytill(tokenbuf,s+1,bufend,*s,&len);
+    s = str_append_till(str,s+1,bufend,*s,patleave);
     if (s >= bufend) {
+       str_free(str);
        yyerror("Substitution pattern not terminated");
        yylval.arg = Nullarg;
        return s;
     }
-    e = tokenbuf + len;
-    for (d=tokenbuf; d < e; d++) {
-       if ((*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') ||
-           (*d == '@' && d[-1] != '\\')) {
+    len = str->str_cur;
+    e = str->str_ptr + len;
+    for (d = str->str_ptr; d < e; d++) {
+       if (*d == '\\')
+           d++;
+       else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
+           *d == '@' ) {
            register ARG *arg;
 
            spat->spat_runtime = arg = op_new(1);
            arg->arg_type = O_ITEM;
            arg[1].arg_type = A_DOUBLE;
-           arg[1].arg_ptr.arg_str = str_make(tokenbuf,len);
-           arg[1].arg_ptr.arg_str->str_u.str_hash = curstash;
-           d = scanreg(d,bufend,buf);
+           arg[1].arg_ptr.arg_str = str_smake(str);
+           d = scanident(d,bufend,buf);
            (void)stabent(buf,TRUE);            /* make sure it's created */
            for (; *d; d++) {
                if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
-                   d = scanreg(d,bufend,buf);
+                   d = scanident(d,bufend,buf);
                    (void)stabent(buf,TRUE);
                }
                else if (*d == '@' && d[-1] != '\\') {
-                   d = scanreg(d,bufend,buf);
+                   d = scanident(d,bufend,buf);
                    if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
                      strEQ(buf,"SIG") || strEQ(buf,"INC"))
                        (void)stabent(buf,TRUE);
@@ -1591,21 +1670,21 @@ register char *s;
            goto get_repl;              /* skip compiling for now */
        }
     }
-    if (*tokenbuf == '^') {
-       spat->spat_short = scanconst(tokenbuf+1,len-1);
+    if (*str->str_ptr == '^') {
+       spat->spat_short = scanconst(str->str_ptr+1,len-1);
        if (spat->spat_short)
            spat->spat_slen = spat->spat_short->str_cur;
     }
     else {
        spat->spat_flags |= SPAT_SCANFIRST;
-       spat->spat_short = scanconst(tokenbuf,len);
+       spat->spat_short = scanconst(str->str_ptr,len);
        if (spat->spat_short)
            spat->spat_slen = spat->spat_short->str_cur;
     }
-    d = nsavestr(tokenbuf,len);
 get_repl:
     s = scanstr(s);
     if (s >= bufend) {
+       str_free(str);
        yyerror("Substitution replacement not terminated");
        yylval.arg = Nullarg;
        return s;
@@ -1632,10 +1711,10 @@ get_repl:
            s++;
            if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
                spat->spat_repl[1].arg_type = A_SINGLE;
-           spat->spat_repl = fixeval(make_op(O_EVAL,2,
+           spat->spat_repl = make_op(O_EVAL,2,
                spat->spat_repl,
                Nullarg,
-               Nullarg));
+               Nullarg);
            spat->spat_flags &= ~SPAT_CONST;
        }
        if (*s == 'g') {
@@ -1660,11 +1739,12 @@ get_repl:
     if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
        fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
     if (!spat->spat_runtime) {
-       spat->spat_regexp = regcomp(d,d+len,spat->spat_flags & SPAT_FOLD,1);
+       spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
+         spat->spat_flags & SPAT_FOLD);
        hoistmust(spat);
-       Safefree(d);
     }
     yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
+    str_free(str);
     return s;
 }
 
@@ -1709,7 +1789,7 @@ int *retlen;
 
     while (s < send && d - t <= 256) {
        if (s[1] == '-' && s+2 < send) {
-           for (i = s[0]; i <= s[2]; i++)
+           for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
                *d++ = i;
            s += 3;
        }
@@ -1729,14 +1809,17 @@ register char *s;
        l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
     register char *t;
     register char *r;
-    register char *tbl;
+    register short *tbl;
     register int i;
     register int j;
     int tlen, rlen;
+    int squash;
+    int delete;
+    int complement;
 
-    Newz(803,tbl,256,char);
+    New(803,tbl,256,short);
     arg[2].arg_type = A_NULL;
-    arg[2].arg_ptr.arg_cval = tbl;
+    arg[2].arg_ptr.arg_cval = (char*) tbl;
     s = scanstr(s);
     if (s >= bufend) {
        yyerror("Translation pattern not terminated");
@@ -1745,25 +1828,64 @@ register char *s;
     }
     t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
        yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
-    free_arg(yylval.arg);
+    arg_free(yylval.arg);
     s = scanstr(s-1);
     if (s >= bufend) {
        yyerror("Translation replacement not terminated");
        yylval.arg = Nullarg;
        return s;
     }
+    complement = delete = squash = 0;
+    while (*s == 'c' || *s == 'd' || *s == 's') {
+       if (*s == 'c')
+           complement = 1;
+       else if (*s == 'd')
+           delete = 2;
+       else
+           squash = 1;
+       s++;
+    }
     r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
        yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
-    free_arg(yylval.arg);
+    arg_free(yylval.arg);
+    arg[2].arg_len = delete|squash;
     yylval.arg = arg;
-    if (!*r) {
+    if (!rlen && !delete) {
        Safefree(r);
        r = t; rlen = tlen;
     }
-    for (i = 0, j = 0; i < tlen; i++,j++) {
-       if (j >= rlen)
-           --j;
-       tbl[t[i] & 0377] = r[j];
+    if (complement) {
+       Zero(tbl, 256, short);
+       for (i = 0; i < tlen; i++)
+           tbl[t[i] & 0377] = -1;
+       for (i = 0, j = 0; i < 256; i++) {
+           if (!tbl[i]) {
+               if (j >= rlen) {
+                   if (delete)
+                       tbl[i] = -2;
+                   else
+                       tbl[i] = r[j-1];
+               }
+               else
+                   tbl[i] = r[j++];
+           }
+       }
+    }
+    else {
+       for (i = 0; i < 256; i++)
+           tbl[i] = -1;
+       for (i = 0, j = 0; i < tlen; i++,j++) {
+           if (j >= rlen) {
+               if (delete) {
+                   if (tbl[t[i] & 0377] == -1)
+                       tbl[t[i] & 0377] = -2;
+                   continue;
+               }
+               --j;
+           }
+           if (tbl[t[i] & 0377] == -1)
+               tbl[t[i] & 0377] = r[j] & 0377;
+       }
     }
     if (r != t)
        Safefree(r);
@@ -1785,7 +1907,7 @@ register char *s;
     bool hereis = FALSE;
     STR *herewas;
     STR *str;
-    char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
+    char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
     int len;
 
     arg = op_new(1);
@@ -1802,7 +1924,7 @@ register char *s;
        goto snarf_it;
     case '0':
        {
-           long i;
+           unsigned long i;
            int shift;
 
            arg[1].arg_type = A_SINGLE;
@@ -1936,7 +2058,6 @@ register char *s;
            arg[1].arg_ptr.arg_stab = stab = genstab();
            stab_io(stab) = stio_new();
            stab_val(stab) = str_make(d,len);
-           stab_val(stab)->str_u.str_hash = curstash;
            Safefree(d);
            set_csh();
        }
@@ -1950,10 +2071,6 @@ register char *s;
            }
            else {
                arg[1].arg_type = A_READ;
-#ifdef NOTDEF
-               if (rsfp == stdin && (strEQ(d,"stdin") || strEQ(d,"STDIN")))
-                   yyerror("Can't get both program and data from <STDIN>");
-#endif
                arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
                if (!stab_io(arg[1].arg_ptr.arg_stab))
                    stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
@@ -1972,6 +2089,10 @@ register char *s;
            s++;
            goto do_double;
        }
+       if (*s == 'x') {
+           s++;
+           goto do_back;
+       }
        /* FALL THROUGH */
     case '\'':
       do_single:
@@ -2003,7 +2124,7 @@ register char *s;
                multi_open = multi_close = '<';
            else {
                multi_open = term;
-               if (tmps = index("([{< )]}> )]}>",term))
+               if (term && (tmps = index("([{< )]}> )]}>",term)))
                    term = tmps[5];
                multi_close = term;
            }
@@ -2045,7 +2166,8 @@ register char *s;
                    STR *str = Str_new(88,0);
 
                    str_sset(str,linestr);
-                   astore(lineary,(int)curcmd->c_line,str);
+                   astore(stab_xarray(curcmd->c_filestab),
+                     (int)curcmd->c_line,str);
                }
                bufend = linestr->str_ptr + linestr->str_cur;
                if (hereis) {
@@ -2085,6 +2207,8 @@ register char *s;
                    makesingle = FALSE; /* force interpretation */
                }
                else if (*s == '\\' && s+1 < send) {
+                   if (index("lLuUE",s[1]))
+                       makesingle = FALSE;
                    s++;
                }
                s++;
@@ -2094,7 +2218,7 @@ register char *s;
                if ((*s == '$' && s+1 < send &&
                    (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
                    (*s == '@' && s+1 < send) ) {
-                   len = scanreg(s,send,tokenbuf) - s;
+                   len = scanident(s,send,tokenbuf) - s;
                    if (*s == '$' || strEQ(tokenbuf,"ARGV")
                      || strEQ(tokenbuf,"ENV")
                      || strEQ(tokenbuf,"SIG")
@@ -2114,16 +2238,19 @@ register char *s;
                        continue;
                    case '0': case '1': case '2': case '3':
                    case '4': case '5': case '6': case '7':
-                       *d = *s++ - '0';
-                       if (s < send && *s && index("01234567",*s)) {
-                           *d <<= 3;
-                           *d += *s++ - '0';
-                       }
-                       if (s < send && *s && index("01234567",*s)) {
-                           *d <<= 3;
-                           *d += *s++ - '0';
-                       }
-                       d++;
+                       *d++ = scanoct(s, 3, &len);
+                       s += len;
+                       continue;
+                   case 'x':
+                       *d++ = scanhex(++s, 2, &len);
+                       s += len;
+                       continue;
+                   case 'c':
+                       s++;
+                       *d = *s++;
+                       if (islower(*d))
+                           *d = toupper(*d);
+                       *d++ ^= 64;
                        continue;
                    case 'b':
                        *d++ = '\b';
@@ -2140,6 +2267,12 @@ register char *s;
                    case 't':
                        *d++ = '\t';
                        break;
+                   case 'e':
+                       *d++ = '\033';
+                       break;
+                   case 'a':
+                       *d++ = '\007';
+                       break;
                    }
                    s++;
                    continue;
@@ -2151,8 +2284,6 @@ register char *s;
            if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
                    arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
 
-           tmpstr->str_u.str_hash = curstash;  /* so interp knows package */
-
            tmpstr->str_cur = d - tmpstr->str_ptr;
            arg[1].arg_ptr.arg_str = tmpstr;
            s = tmps;
@@ -2180,14 +2311,8 @@ load_format()
 
     Zero(&froot, 1, FCMD);
     s = bufptr;
-    while (s < bufend || (s = str_gets(linestr,rsfp, 0)) != Nullch) {
+    while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
        curcmd->c_line++;
-       if (perldb) {
-           STR *tmpstr = Str_new(89,0);
-
-           str_sset(tmpstr,linestr);
-           astore(lineary,(int)curcmd->c_line,tmpstr);
-       }
        if (in_eval && !rsfp) {
            eol = index(s,'\n');
            if (!eol++)
@@ -2195,9 +2320,18 @@ load_format()
        }
        else
            eol = bufend = linestr->str_ptr + linestr->str_cur;
-       if (strnEQ(s,".\n",2)) {
-           bufptr = s;
-           return froot.f_next;
+       if (perldb) {
+           STR *tmpstr = Str_new(89,0);
+
+           str_nset(tmpstr, s, eol-s);
+           astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
+       }
+       if (*s == '.') {
+           for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
+           if (*t == '\n') {
+               bufptr = s;
+               return froot.f_next;
+           }
        }
        if (*s == '#') {
            s = eol;
@@ -2254,7 +2388,35 @@ load_format()
                while (*s == '|')
                    s++;
                break;
+           case '#':
+           case '.':
+               /* Catch the special case @... and handle it as a string
+                  field. */
+               if (*s == '.' && s[1] == '.') {
+                   goto default_format;
+               }
+               fcmd->f_type = F_DECIMAL;
+               {
+                   char *p;
+
+                   /* Read a format in the form @####.####, where either group
+                      of ### may be empty, or the final .### may be missing. */
+                   while (*s == '#')
+                       s++;
+                   if (*s == '.') {
+                       s++;
+                       p = s;
+                       while (*s == '#')
+                           s++;
+                       fcmd->f_decimals = s-p;
+                       fcmd->f_flags |= FC_DP;
+                   } else {
+                       fcmd->f_decimals = 0;
+                   }
+               }
+               break;
            default:
+           default_format:
                fcmd->f_type = F_LEFT;
                break;
            }
@@ -2267,15 +2429,10 @@ load_format()
        }
        if (flinebeg) {
          again:
-           if (s >= bufend && (s = str_gets(linestr, rsfp, 0)) == Nullch)
+           if (s >= bufend &&
+             (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
                goto badform;
            curcmd->c_line++;
-           if (perldb) {
-               STR *tmpstr = Str_new(90,0);
-
-               str_sset(tmpstr,linestr);
-               astore(lineary,(int)curcmd->c_line,tmpstr);
-           }
            if (in_eval && !rsfp) {
                eol = index(s,'\n');
                if (!eol++)
@@ -2283,6 +2440,13 @@ load_format()
            }
            else
                eol = bufend = linestr->str_ptr + linestr->str_cur;
+           if (perldb) {
+               STR *tmpstr = Str_new(90,0);
+
+               str_nset(tmpstr, s, eol-s);
+               astore(stab_xarray(curcmd->c_filestab),
+                   (int)curcmd->c_line,tmpstr);
+           }
            if (strnEQ(s,".\n",2)) {
                bufptr = s;
                yyerror("Missing values line");
@@ -2320,7 +2484,7 @@ load_format()
                    case '$':
                        str_ncat(str, t, s - t);
                        t = s;
-                       s = scanreg(s,eol,tokenbuf);
+                       s = scanident(s,eol,tokenbuf);
                        str_ncat(str, t, s - t);
                        t = s;
                        if (s < eol && *s && index("$'\"",*s))