3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
10 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
15 #include "../netware/clibstuf.h"
17 #include "../patchlevel.h"
20 #include "../unicode_constants.h"
21 #define DELETE_CHAR DELETE_NATIVE
29 int oper1(int type, int arg1);
30 int oper2(int type, int arg1, int arg2);
31 int oper3(int type, int arg1, int arg2, int arg3);
32 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
33 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
34 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
36 char *savestr(char *str);
37 char *cpy2(register char *to, register char *from, register int delim);
40 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
41 static void usage(void);
46 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
47 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
48 printf("\n -D<number> sets debugging flags."
49 "\n -F<character> the awk script to translate is always invoked with"
51 "\n -n<fieldlist> specifies the names of the input fields if input does"
52 "\n not have to be split into an array."
53 "\n -<number> causes a2p to assume that input will always have that"
60 #pragma message disable (mainparm) /* We have the envp in main(). */
64 main(register int argc, register const char **argv, register const char **env)
72 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
76 linestr = str_new(80);
77 str = str_new(0); /* first used for -I flags */
78 for (argc--,argv++; argc; argc--,argv++) {
79 if (argv[0][0] != '-' || !argv[0][1])
84 debug = atoi(argv[0]+2);
86 yydebug = (debug & 1);
90 case '0': case '1': case '2': case '3': case '4':
91 case '5': case '6': case '7': case '8': case '9':
92 maxfld = atoi(argv[0]+1);
99 namelist = savestr(argv[0]+2);
110 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
111 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
114 fatal("Unrecognized switch: %s\n",argv[0]);
122 if (argv[0] == NULL) {
123 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
124 if ( isatty(fileno(stdin)) )
129 filename = savestr(argv[0]);
131 if (strEQ(filename,"-"))
136 rsfp = fopen(argv[0],"r");
138 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
142 bufptr = str_get(linestr);
146 /* now parse the report spec */
149 fatal("Translation aborted due to syntax errors.\n");
159 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
161 printf("\t\"%s\"\n",ops[i].cval),i++;
164 printf("\t%d",ops[i].ival),i++;
174 /* first pass to look for numeric variables */
176 prewalk(0,0,root,&i);
178 /* second pass to produce new program */
180 tmpstr = walk(0,0,root,&i,P_MIN);
181 str = str_make(STARTPERL);
182 str_cat(str, "\neval 'exec ");
184 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
185 if $running_under_some_shell;\n\
186 # this emulates #! processing on NIH machines.\n\
187 # (remove #! line above if indigestible)\n\n");
189 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
191 " # process any FOO=bar switches\n\n");
192 if (do_opens && opens) {
197 str_scat(str,tmpstr);
206 "Please check my work on the %d line%s I've marked with \"#???\".\n",
207 checkers, checkers == 1 ? "" : "s" );
209 "The operation I've selected may be wrong for the operand types.\n");
212 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
216 #define RETURN(retval) return (bufptr = s,retval)
217 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
218 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
219 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
234 fprintf(stderr,"Tokener at %s",s);
236 fprintf(stderr,"Tokener at %s\n",s);
242 "Unrecognized character %c in file %s line %d--ignoring.\n",
247 if (*s && *s != '\n') {
248 yyerror("Ignoring spurious backslash");
253 s = str_get(linestr);
258 if ((s = str_gets(linestr, rsfp)) == NULL) {
262 s = str_get(linestr);
273 yylval = string(s,0);
298 for (d = s + 1; isSPACE(*d); d++) ;
308 yylval = string("~",1);
326 yylval = string("**=",3);
328 yylval = string(s-1,2);
346 while (*s == ' ' || *s == '\t')
348 if (strnEQ(s,"getline",7))
356 yylval = string("==",2);
360 yylval = string("=",1);
366 yylval = string("!=",2);
370 yylval = string("!~",2);
379 yylval = string("<=",2);
388 yylval = string(">>",2);
392 yylval = string(">=",2);
400 while (isWORDCHAR(*s)) \
420 for (d = s; isDIGIT(*s); s++) ;
421 yylval = string(d,s-d);
427 for (d = s; isWORDCHAR(*s); )
429 split_to_array = TRUE;
432 yylval = string(d,s-d);
437 case '/': /* may either be division or pattern */
444 yylval = string("/=",2);
450 case '0': case '1': case '2': case '3': case '4':
451 case '5': case '6': case '7': case '8': case '9': case '.':
456 s = cpy2(tokenbuf,s,s[-1]);
458 fatal("String not terminated:\n%s",str_get(linestr));
460 yylval = string(tokenbuf,0);
465 if (strEQ(d,"ARGV")) {
466 yylval=numary(string("ARGV",0));
469 if (strEQ(d,"atan2")) {
476 if (strEQ(d,"break"))
478 if (strEQ(d,"BEGIN"))
483 if (strEQ(d,"continue"))
485 if (strEQ(d,"cos")) {
489 if (strEQ(d,"close")) {
494 if (strEQ(d,"chdir"))
496 else if (strEQ(d,"crypt"))
498 else if (strEQ(d,"chop"))
500 else if (strEQ(d,"chmod"))
502 else if (strEQ(d,"chown"))
509 if (strEQ(d,"delete"))
520 if (strEQ(d,"exit")) {
524 if (strEQ(d,"exp")) {
528 if (strEQ(d,"elsif"))
530 else if (strEQ(d,"eq"))
532 else if (strEQ(d,"eval"))
534 else if (strEQ(d,"eof"))
536 else if (strEQ(d,"each"))
538 else if (strEQ(d,"exec"))
545 if (saw_FS == 1 && in_begin) {
546 for (d = s; *d && isSPACE(*d); d++) ;
548 for (d++; *d && isSPACE(*d); d++) ;
549 if (*d == '"' && d[2] == '"')
557 else if (strEQ(d,"function"))
559 if (strEQ(d,"FILENAME"))
561 if (strEQ(d,"foreach"))
563 else if (strEQ(d,"format"))
565 else if (strEQ(d,"fork"))
567 else if (strEQ(d,"fh"))
572 if (strEQ(d,"getline"))
578 else if (strEQ(d,"gt"))
580 else if (strEQ(d,"goto"))
582 else if (strEQ(d,"gmtime"))
596 if (strEQ(d,"index")) {
599 if (strEQ(d,"int")) {
613 else if (strEQ(d,"kill"))
618 if (strEQ(d,"length")) {
622 if (strEQ(d,"log")) {
628 else if (strEQ(d,"local"))
630 else if (strEQ(d,"lt"))
632 else if (strEQ(d,"le"))
634 else if (strEQ(d,"locatime"))
636 else if (strEQ(d,"link"))
641 if (strEQ(d,"match")) {
650 do_chop = do_split = split_to_array = TRUE;
651 if (strEQ(d,"next")) {
660 if (strEQ(d,"ORS")) {
664 if (strEQ(d,"OFS")) {
668 if (strEQ(d,"OFMT")) {
673 else if (strEQ(d,"ord"))
675 else if (strEQ(d,"oct"))
680 if (strEQ(d,"print")) {
683 if (strEQ(d,"printf")) {
688 else if (strEQ(d,"pop"))
700 if (strEQ(d,"rand")) {
704 if (strEQ(d,"return"))
706 if (strEQ(d,"reset"))
708 else if (strEQ(d,"redo"))
710 else if (strEQ(d,"rename"))
715 if (strEQ(d,"split")) {
718 if (strEQ(d,"substr")) {
723 if (strEQ(d,"sprintf")) {
724 /* In old awk, { print sprintf("str%sg"),"in" } prints
725 * "string"; in new awk, "in" is not considered an argument to
726 * sprintf, so the statement breaks. To support both, the
727 * grammar treats arguments to SPRINTF_OLD like old awk,
728 * SPRINTF_NEW like new. Here we return the appropriate one.
730 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
732 if (strEQ(d,"sqrt")) {
736 if (strEQ(d,"SUBSEP")) {
739 if (strEQ(d,"sin")) {
743 if (strEQ(d,"srand")) {
747 if (strEQ(d,"system")) {
753 else if (strEQ(d,"shift"))
755 else if (strEQ(d,"select"))
757 else if (strEQ(d,"seek"))
759 else if (strEQ(d,"stat"))
761 else if (strEQ(d,"study"))
763 else if (strEQ(d,"sleep"))
765 else if (strEQ(d,"symlink"))
767 else if (strEQ(d,"sort"))
774 else if (strEQ(d,"tell"))
776 else if (strEQ(d,"time"))
778 else if (strEQ(d,"times"))
783 if (strEQ(d,"until"))
785 else if (strEQ(d,"unless"))
787 else if (strEQ(d,"umask"))
789 else if (strEQ(d,"unshift"))
791 else if (strEQ(d,"unlink"))
793 else if (strEQ(d,"utime"))
798 if (strEQ(d,"values"))
803 if (strEQ(d,"while"))
805 if (strEQ(d,"write"))
807 else if (strEQ(d,"wait"))
827 scanpat(register char *s)
835 fatal("Search pattern not found:\n%s",str_get(linestr));
839 for (; *s; s++,d++) {
843 else if (s[1] == '\\')
845 else if (s[1] == '[')
848 else if (*s == '[') {
851 if (*s == '\\' && s[1])
853 if (*s == '/' || (*s == '-' && s[1] == ']'))
856 } while (*s && *s != ']');
865 fatal("Search pattern not terminated:\n%s",str_get(linestr));
867 yylval = string(tokenbuf,0);
872 yyerror(const char *s)
874 fprintf(stderr,"%s in file %s at line %d\n",
879 scannum(register char *s)
884 case '1': case '2': case '3': case '4': case '5':
885 case '6': case '7': case '8': case '9': case '0' : case '.':
887 while (isDIGIT(*s)) {
893 while (isDIGIT(*s)) {
900 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
902 if (*s == '+' || *s == '-')
908 yylval = string(tokenbuf,0);
915 string(const char *ptr, int len)
919 ops[mop++].ival = OSTRING + (1<<8);
922 ops[mop].cval = (char *) safemalloc(len+1);
923 strncpy(ops[mop].cval,ptr,len);
924 ops[mop++].cval[len] = '\0';
926 fatal("Recompile a2p with larger OPSMAX\n");
936 fatal("type > 255 (%d)\n",type);
937 ops[mop++].ival = type;
939 fatal("Recompile a2p with larger OPSMAX\n");
944 oper1(int type, int arg1)
949 fatal("type > 255 (%d)\n",type);
950 ops[mop++].ival = type + (1<<8);
951 ops[mop++].ival = arg1;
953 fatal("Recompile a2p with larger OPSMAX\n");
958 oper2(int type, int arg1, int arg2)
963 fatal("type > 255 (%d)\n",type);
964 ops[mop++].ival = type + (2<<8);
965 ops[mop++].ival = arg1;
966 ops[mop++].ival = arg2;
968 fatal("Recompile a2p with larger OPSMAX\n");
973 oper3(int type, int arg1, int arg2, int arg3)
978 fatal("type > 255 (%d)\n",type);
979 ops[mop++].ival = type + (3<<8);
980 ops[mop++].ival = arg1;
981 ops[mop++].ival = arg2;
982 ops[mop++].ival = arg3;
984 fatal("Recompile a2p with larger OPSMAX\n");
989 oper4(int type, int arg1, int arg2, int arg3, int arg4)
994 fatal("type > 255 (%d)\n",type);
995 ops[mop++].ival = type + (4<<8);
996 ops[mop++].ival = arg1;
997 ops[mop++].ival = arg2;
998 ops[mop++].ival = arg3;
999 ops[mop++].ival = arg4;
1001 fatal("Recompile a2p with larger OPSMAX\n");
1006 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1011 fatal("type > 255 (%d)\n",type);
1012 ops[mop++].ival = type + (5<<8);
1013 ops[mop++].ival = arg1;
1014 ops[mop++].ival = arg2;
1015 ops[mop++].ival = arg3;
1016 ops[mop++].ival = arg4;
1017 ops[mop++].ival = arg5;
1019 fatal("Recompile a2p with larger OPSMAX\n");
1032 type = ops[branch].ival;
1035 for (i=depth; i; i--)
1037 if (type == OSTRING) {
1038 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1041 printf("(%-5d%s %d\n",branch,opname[type],len);
1043 for (i=1; i<=len; i++)
1044 dump(ops[branch+i].ival);
1046 for (i=depth; i; i--)
1053 bl(int arg, int maybe)
1057 else if ((ops[arg].ival & 255) != OBLOCK)
1058 return oper2(OBLOCK,arg,maybe);
1059 else if ((ops[arg].ival >> 8) < 2)
1060 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1071 for (s = str->str_ptr; *s; s++) {
1072 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1076 else if (*s == '\n') {
1077 for (t = s+1; isSPACE(*t & 127); t++) ;
1079 while (isSPACE(*t & 127) && *t != '\n') t--;
1080 if (*t == '\n' && t-s > 1) {
1093 char *d, *s, *t, *e;
1098 for (s = str->str_ptr; *s; s++) {
1107 else if (*s == '\t')
1109 if (pos > 78) { /* split a long line? */
1112 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1119 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1123 while (d > tokenbuf &&
1124 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1129 while (d > tokenbuf &&
1130 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1135 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1140 while (d > tokenbuf && *d != ' ')
1150 if (d[-1] != ';' && !(newpos % 4)) {
1156 newpos += strlen(t);
1171 for (t = tokenbuf; *t; t++) {
1175 strcpy(t+strlen(t)-1, "\t#???\n");
1181 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1183 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1186 fputs(tokenbuf,stdout);
1195 key = walk(0,0,arg,&dummy,P_MIN);
1197 hstore(symtab,key->str_ptr,str_make("1"));
1203 rememberargs(int arg)
1210 type = ops[arg].ival & 255;
1211 if (type == OCOMMA) {
1212 rememberargs(ops[arg+1].ival);
1213 rememberargs(ops[arg+3].ival);
1215 else if (type == OVAR) {
1217 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1220 fatal("panic: unknown argument type %d, line %d\n",type,line);
1227 int type = ops[arg].ival & 255;
1230 if (type != OSTRING)
1231 fatal("panic: aryrefarg %d, line %d\n",type,line);
1232 str = hfetch(curarghash,ops[arg+1].cval);
1239 fixfargs(int name, int arg, int prevargs)
1247 type = ops[arg].ival & 255;
1248 if (type == OCOMMA) {
1249 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1250 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1252 else if (type == OVAR) {
1253 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1254 if (strEQ(str_get(str),"*")) {
1257 str_set(str,""); /* in case another routine has this */
1258 ops[arg].ival &= ~255;
1259 ops[arg].ival |= OSTAR;
1260 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1261 fprintf(stderr,"Adding %s\n",tmpbuf);
1264 hstore(curarghash,tmpbuf,str);
1266 numargs = prevargs + 1;
1269 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1270 type,prevargs+1,line);
1275 fixrargs(char *name, int arg, int prevargs)
1283 type = ops[arg].ival & 255;
1284 if (type == OCOMMA) {
1285 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1286 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1289 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1290 sprintf(tmpbuf,"%s:%d",name,prevargs);
1291 str = hfetch(curarghash,tmpbuf);
1293 if (str && strEQ(str->str_ptr,"*")) {
1294 if (type == OVAR || type == OSTAR) {
1295 ops[arg].ival &= ~255;
1296 ops[arg].ival |= OSTAR;
1299 fatal("Can't pass expression by reference as arg %d of %s\n",
1302 numargs = prevargs + 1;