3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
10 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
15 #include "../netware/clibstuf.h"
17 #include "../patchlevel.h"
20 #include "../unicode_constants.h"
21 #define DELETE_CHAR DEL_NATIVE
29 int oper1(int type, int arg1);
30 int oper2(int type, int arg1, int arg2);
31 int oper3(int type, int arg1, int arg2, int arg3);
32 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
33 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
34 STR *walk(int useval, int level, int node, int *numericptr, int minprec);
36 char *savestr(char *str);
37 char *cpy2(char *to, char *from, int delim);
40 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
41 static void usage(void);
46 printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
47 printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
48 printf("\n -D<number> sets debugging flags."
49 "\n -F<character> the awk script to translate is always invoked with"
51 "\n -n<fieldlist> specifies the names of the input fields if input does"
52 "\n not have to be split into an array."
53 "\n -<number> causes a2p to assume that input will always have that"
60 main(int argc, const char **argv)
68 fnInitGpfGlobals(); /* For importing the CLIB calls in place of Watcom calls */
72 linestr = str_new(80);
73 str = str_new(0); /* first used for -I flags */
74 for (argc--,argv++; argc; argc--,argv++) {
75 if (argv[0][0] != '-' || !argv[0][1])
80 debug = atoi(argv[0]+2);
82 yydebug = (debug & 1);
86 case '0': case '1': case '2': case '3': case '4':
87 case '5': case '6': case '7': case '8': case '9':
88 maxfld = atoi(argv[0]+1);
95 namelist = savestr(argv[0]+2);
106 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
107 fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
110 fatal("Unrecognized switch: %s\n",argv[0]);
118 if (argv[0] == NULL) {
119 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
120 if ( isatty(fileno(stdin)) )
125 filename = savestr(argv[0]);
127 if (strEQ(filename,"-"))
132 rsfp = fopen(argv[0],"r");
134 fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
138 bufptr = str_get(linestr);
142 /* now parse the report spec */
145 fatal("Translation aborted due to syntax errors.\n");
155 printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
157 printf("\t\"%s\"\n",ops[i].cval),i++;
160 printf("\t%d",ops[i].ival),i++;
170 /* first pass to look for numeric variables */
172 prewalk(0,0,root,&i);
174 /* second pass to produce new program */
176 tmpstr = walk(0,0,root,&i,P_MIN);
177 str = str_make(STARTPERL);
178 str_cat(str, "\neval 'exec ");
180 str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
181 if $running_under_some_shell;\n\
182 # this emulates #! processing on NIH machines.\n\
183 # (remove #! line above if indigestible)\n\n");
185 "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
187 " # process any FOO=bar switches\n\n");
188 if (do_opens && opens) {
193 str_scat(str,tmpstr);
202 "Please check my work on the %d line%s I've marked with \"#???\".\n",
203 checkers, checkers == 1 ? "" : "s" );
205 "The operation I've selected may be wrong for the operand types.\n");
208 /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
212 #define RETURN(retval) return (bufptr = s,retval)
213 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
214 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
215 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
230 fprintf(stderr,"Tokener at %s",s);
232 fprintf(stderr,"Tokener at %s\n",s);
238 "Unrecognized character %c in file %s line %d--ignoring.\n",
243 if (*s && *s != '\n') {
244 yyerror("Ignoring spurious backslash");
249 s = str_get(linestr);
254 if ((s = str_gets(linestr, rsfp)) == NULL) {
258 s = str_get(linestr);
269 yylval = string(s,0);
294 for (d = s + 1; isSPACE(*d); d++) ;
304 yylval = string("~",1);
322 yylval = string("**=",3);
324 yylval = string(s-1,2);
342 while (*s == ' ' || *s == '\t')
344 if (strnEQ(s,"getline",7))
352 yylval = string("==",2);
356 yylval = string("=",1);
362 yylval = string("!=",2);
366 yylval = string("!~",2);
375 yylval = string("<=",2);
384 yylval = string(">>",2);
388 yylval = string(">=",2);
396 while (isWORDCHAR(*s)) \
416 for (d = s; isDIGIT(*s); s++) ;
417 yylval = string(d,s-d);
423 for (d = s; isWORDCHAR(*s); )
425 split_to_array = TRUE;
428 yylval = string(d,s-d);
433 case '/': /* may either be division or pattern */
440 yylval = string("/=",2);
446 case '0': case '1': case '2': case '3': case '4':
447 case '5': case '6': case '7': case '8': case '9': case '.':
452 s = cpy2(tokenbuf,s,s[-1]);
454 fatal("String not terminated:\n%s",str_get(linestr));
456 yylval = string(tokenbuf,0);
461 if (strEQ(d,"ARGV")) {
462 yylval=numary(string("ARGV",0));
465 if (strEQ(d,"atan2")) {
472 if (strEQ(d,"break"))
474 if (strEQ(d,"BEGIN"))
479 if (strEQ(d,"continue"))
481 if (strEQ(d,"cos")) {
485 if (strEQ(d,"close")) {
490 if (strEQ(d,"chdir"))
492 else if (strEQ(d,"crypt"))
494 else if (strEQ(d,"chop"))
496 else if (strEQ(d,"chmod"))
498 else if (strEQ(d,"chown"))
505 if (strEQ(d,"delete"))
516 if (strEQ(d,"exit")) {
520 if (strEQ(d,"exp")) {
524 if (strEQ(d,"elsif"))
526 else if (strEQ(d,"eq"))
528 else if (strEQ(d,"eval"))
530 else if (strEQ(d,"eof"))
532 else if (strEQ(d,"each"))
534 else if (strEQ(d,"exec"))
541 if (saw_FS == 1 && in_begin) {
542 for (d = s; *d && isSPACE(*d); d++) ;
544 for (d++; *d && isSPACE(*d); d++) ;
545 if (*d == '"' && d[2] == '"')
553 else if (strEQ(d,"function"))
555 if (strEQ(d,"FILENAME"))
557 if (strEQ(d,"foreach"))
559 else if (strEQ(d,"format"))
561 else if (strEQ(d,"fork"))
563 else if (strEQ(d,"fh"))
568 if (strEQ(d,"getline"))
574 else if (strEQ(d,"gt"))
576 else if (strEQ(d,"goto"))
578 else if (strEQ(d,"gmtime"))
592 if (strEQ(d,"index")) {
595 if (strEQ(d,"int")) {
609 else if (strEQ(d,"kill"))
614 if (strEQ(d,"length")) {
618 if (strEQ(d,"log")) {
624 else if (strEQ(d,"local"))
626 else if (strEQ(d,"lt"))
628 else if (strEQ(d,"le"))
630 else if (strEQ(d,"locatime"))
632 else if (strEQ(d,"link"))
637 if (strEQ(d,"match")) {
646 do_chop = do_split = split_to_array = TRUE;
647 if (strEQ(d,"next")) {
656 if (strEQ(d,"ORS")) {
660 if (strEQ(d,"OFS")) {
664 if (strEQ(d,"OFMT")) {
669 else if (strEQ(d,"ord"))
671 else if (strEQ(d,"oct"))
676 if (strEQ(d,"print")) {
679 if (strEQ(d,"printf")) {
684 else if (strEQ(d,"pop"))
696 if (strEQ(d,"rand")) {
700 if (strEQ(d,"return"))
702 if (strEQ(d,"reset"))
704 else if (strEQ(d,"redo"))
706 else if (strEQ(d,"rename"))
711 if (strEQ(d,"split")) {
714 if (strEQ(d,"substr")) {
719 if (strEQ(d,"sprintf")) {
720 /* In old awk, { print sprintf("str%sg"),"in" } prints
721 * "string"; in new awk, "in" is not considered an argument to
722 * sprintf, so the statement breaks. To support both, the
723 * grammar treats arguments to SPRINTF_OLD like old awk,
724 * SPRINTF_NEW like new. Here we return the appropriate one.
726 XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
728 if (strEQ(d,"sqrt")) {
732 if (strEQ(d,"SUBSEP")) {
735 if (strEQ(d,"sin")) {
739 if (strEQ(d,"srand")) {
743 if (strEQ(d,"system")) {
749 else if (strEQ(d,"shift"))
751 else if (strEQ(d,"select"))
753 else if (strEQ(d,"seek"))
755 else if (strEQ(d,"stat"))
757 else if (strEQ(d,"study"))
759 else if (strEQ(d,"sleep"))
761 else if (strEQ(d,"symlink"))
763 else if (strEQ(d,"sort"))
770 else if (strEQ(d,"tell"))
772 else if (strEQ(d,"time"))
774 else if (strEQ(d,"times"))
779 if (strEQ(d,"until"))
781 else if (strEQ(d,"unless"))
783 else if (strEQ(d,"umask"))
785 else if (strEQ(d,"unshift"))
787 else if (strEQ(d,"unlink"))
789 else if (strEQ(d,"utime"))
794 if (strEQ(d,"values"))
799 if (strEQ(d,"while"))
801 if (strEQ(d,"write"))
803 else if (strEQ(d,"wait"))
831 fatal("Search pattern not found:\n%s",str_get(linestr));
835 for (; *s; s++,d++) {
839 else if (s[1] == '\\')
841 else if (s[1] == '[')
844 else if (*s == '[') {
847 if (*s == '\\' && s[1])
849 if (*s == '/' || (*s == '-' && s[1] == ']'))
852 } while (*s && *s != ']');
861 fatal("Search pattern not terminated:\n%s",str_get(linestr));
863 yylval = string(tokenbuf,0);
868 yyerror(const char *s)
870 fprintf(stderr,"%s in file %s at line %d\n",
880 case '1': case '2': case '3': case '4': case '5':
881 case '6': case '7': case '8': case '9': case '0' : case '.':
883 while (isDIGIT(*s)) {
889 while (isDIGIT(*s)) {
896 if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
898 if (*s == '+' || *s == '-')
904 yylval = string(tokenbuf,0);
911 string(const char *ptr, int len)
915 ops[mop++].ival = OSTRING + (1<<8);
918 ops[mop].cval = (char *) safemalloc(len+1);
919 strncpy(ops[mop].cval,ptr,len);
920 ops[mop++].cval[len] = '\0';
922 fatal("Recompile a2p with larger OPSMAX\n");
932 fatal("type > 255 (%d)\n",type);
933 ops[mop++].ival = type;
935 fatal("Recompile a2p with larger OPSMAX\n");
940 oper1(int type, int arg1)
945 fatal("type > 255 (%d)\n",type);
946 ops[mop++].ival = type + (1<<8);
947 ops[mop++].ival = arg1;
949 fatal("Recompile a2p with larger OPSMAX\n");
954 oper2(int type, int arg1, int arg2)
959 fatal("type > 255 (%d)\n",type);
960 ops[mop++].ival = type + (2<<8);
961 ops[mop++].ival = arg1;
962 ops[mop++].ival = arg2;
964 fatal("Recompile a2p with larger OPSMAX\n");
969 oper3(int type, int arg1, int arg2, int arg3)
974 fatal("type > 255 (%d)\n",type);
975 ops[mop++].ival = type + (3<<8);
976 ops[mop++].ival = arg1;
977 ops[mop++].ival = arg2;
978 ops[mop++].ival = arg3;
980 fatal("Recompile a2p with larger OPSMAX\n");
985 oper4(int type, int arg1, int arg2, int arg3, int arg4)
990 fatal("type > 255 (%d)\n",type);
991 ops[mop++].ival = type + (4<<8);
992 ops[mop++].ival = arg1;
993 ops[mop++].ival = arg2;
994 ops[mop++].ival = arg3;
995 ops[mop++].ival = arg4;
997 fatal("Recompile a2p with larger OPSMAX\n");
1002 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1007 fatal("type > 255 (%d)\n",type);
1008 ops[mop++].ival = type + (5<<8);
1009 ops[mop++].ival = arg1;
1010 ops[mop++].ival = arg2;
1011 ops[mop++].ival = arg3;
1012 ops[mop++].ival = arg4;
1013 ops[mop++].ival = arg5;
1015 fatal("Recompile a2p with larger OPSMAX\n");
1028 type = ops[branch].ival;
1031 for (i=depth; i; i--)
1033 if (type == OSTRING) {
1034 printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1037 printf("(%-5d%s %d\n",branch,opname[type],len);
1039 for (i=1; i<=len; i++)
1040 dump(ops[branch+i].ival);
1042 for (i=depth; i; i--)
1049 bl(int arg, int maybe)
1053 else if ((ops[arg].ival & 255) != OBLOCK)
1054 return oper2(OBLOCK,arg,maybe);
1055 else if ((ops[arg].ival >> 8) < 2)
1056 return oper2(OBLOCK,ops[arg+1].ival,maybe);
1067 for (s = str->str_ptr; *s; s++) {
1068 if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1072 else if (*s == '\n') {
1073 for (t = s+1; isSPACE(*t & 127); t++) ;
1075 while (isSPACE(*t & 127) && *t != '\n') t--;
1076 if (*t == '\n' && t-s > 1) {
1089 char *d, *s, *t, *e;
1094 for (s = str->str_ptr; *s; s++) {
1103 else if (*s == '\t')
1105 if (pos > 78) { /* split a long line? */
1108 for (t = tokenbuf; isSPACE(*t & 127); t++) {
1115 while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1119 while (d > tokenbuf &&
1120 (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1125 while (d > tokenbuf &&
1126 (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1131 while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1136 while (d > tokenbuf && *d != ' ')
1146 if (d[-1] != ';' && !(newpos % 4)) {
1152 newpos += strlen(t);
1167 for (t = tokenbuf; *t; t++) {
1171 strcpy(t+strlen(t)-1, "\t#???\n");
1177 if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1179 if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1182 fputs(tokenbuf,stdout);
1191 key = walk(0,0,arg,&dummy,P_MIN);
1193 hstore(symtab,key->str_ptr,str_make("1"));
1199 rememberargs(int arg)
1206 type = ops[arg].ival & 255;
1207 if (type == OCOMMA) {
1208 rememberargs(ops[arg+1].ival);
1209 rememberargs(ops[arg+3].ival);
1211 else if (type == OVAR) {
1213 hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1216 fatal("panic: unknown argument type %d, line %d\n",type,line);
1223 int type = ops[arg].ival & 255;
1226 if (type != OSTRING)
1227 fatal("panic: aryrefarg %d, line %d\n",type,line);
1228 str = hfetch(curarghash,ops[arg+1].cval);
1235 fixfargs(int name, int arg, int prevargs)
1243 type = ops[arg].ival & 255;
1244 if (type == OCOMMA) {
1245 numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1246 numargs = fixfargs(name,ops[arg+3].ival,numargs);
1248 else if (type == OVAR) {
1249 str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1250 if (strEQ(str_get(str),"*")) {
1253 str_set(str,""); /* in case another routine has this */
1254 ops[arg].ival &= ~255;
1255 ops[arg].ival |= OSTAR;
1256 sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1257 fprintf(stderr,"Adding %s\n",tmpbuf);
1260 hstore(curarghash,tmpbuf,str);
1262 numargs = prevargs + 1;
1265 fatal("panic: unknown argument type %d, arg %d, line %d\n",
1266 type,prevargs+1,line);
1271 fixrargs(char *name, int arg, int prevargs)
1279 type = ops[arg].ival & 255;
1280 if (type == OCOMMA) {
1281 numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1282 numargs = fixrargs(name,ops[arg+3].ival,numargs);
1285 char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1286 sprintf(tmpbuf,"%s:%d",name,prevargs);
1287 str = hfetch(curarghash,tmpbuf);
1289 if (str && strEQ(str->str_ptr,"*")) {
1290 if (type == OVAR || type == OSTAR) {
1291 ops[arg].ival &= ~255;
1292 ops[arg].ival |= OSTAR;
1295 fatal("Can't pass expression by reference as arg %d of %s\n",
1298 numargs = prevargs + 1;