Commit | Line | Data |
---|---|---|
6294c161 DM |
1 | #!/usr/bin/perl |
2 | # | |
3 | # Regenerate (overwriting only if changed): | |
4 | # | |
5 | # regnodes.h | |
6 | # | |
7 | # from information stored in | |
8 | # | |
9 | # regcomp.sym | |
10 | # regexp.h | |
11 | # | |
12 | # Accepts the standard regen_lib -q and -v args. | |
13 | # | |
14 | # This script is normally invoked from regen.pl. | |
15 | ||
36bb303b NC |
16 | BEGIN { |
17 | # Get function prototypes | |
9ad884cb | 18 | require 'regen_lib.pl'; |
36bb303b | 19 | } |
d09b2d29 | 20 | #use Fatal qw(open close rename chmod unlink); |
03363afd YO |
21 | use strict; |
22 | use warnings; | |
23 | ||
d09b2d29 | 24 | open DESC, 'regcomp.sym'; |
d09b2d29 | 25 | |
03363afd | 26 | my $ind = 0; |
f9ef50a7 | 27 | my (@name,@rest,@type,@code,@args,@flags,@longj); |
03363afd | 28 | my ($desc,$lastregop); |
d09b2d29 | 29 | while (<DESC>) { |
03363afd YO |
30 | s/#.*$//; |
31 | next if /^\s*$/; | |
32 | s/\s*\z//; | |
33 | if (/^-+\s*$/) { | |
34 | $lastregop= $ind; | |
35 | next; | |
36 | } | |
37 | unless ($lastregop) { | |
38 | $ind++; | |
f8abb37e | 39 | ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+)\s*;\s*(.*)/; |
f9ef50a7 NC |
40 | ($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind]) |
41 | = split /[,\s]\s*/, $desc; | |
03363afd | 42 | } else { |
f8abb37e | 43 | my ($type,@lists)=split /\s+/, $_; |
03363afd YO |
44 | die "No list? $type" if !@lists; |
45 | foreach my $list (@lists) { | |
46 | my ($names,$special)=split /:/, $list , 2; | |
47 | $special ||= ""; | |
48 | foreach my $name (split /,/,$names) { | |
49 | my $real= $name eq 'resume' | |
50 | ? "resume_$type" | |
51 | : "${type}_$name"; | |
52 | my @suffix; | |
53 | if (!$special) { | |
54 | @suffix=(""); | |
55 | } elsif ($special=~/\d/) { | |
56 | @suffix=(1..$special); | |
57 | } elsif ($special eq 'FAIL') { | |
58 | @suffix=("","_fail"); | |
59 | } else { | |
60 | die "unknown :type ':$special'"; | |
61 | } | |
62 | foreach my $suffix (@suffix) { | |
63 | $ind++; | |
64 | $name[$ind]="$real$suffix"; | |
65 | $type[$ind]=$type; | |
24b23f37 | 66 | $rest[$ind]="state for $type"; |
03363afd YO |
67 | } |
68 | } | |
69 | } | |
70 | ||
71 | } | |
72 | } | |
5d458dd8 YO |
73 | # use fixed width to keep the diffs between regcomp.pl recompiles |
74 | # as small as possible. | |
75 | my ($width,$rwidth,$twidth)=(22,12,9); | |
03363afd YO |
76 | $lastregop ||= $ind; |
77 | my $tot = $ind; | |
d09b2d29 | 78 | close DESC; |
03363afd YO |
79 | die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" |
80 | if $lastregop>256; | |
d09b2d29 | 81 | |
f9ef50a7 NC |
82 | sub process_flags { |
83 | my ($flag, $varname, $comment) = @_; | |
84 | $comment = '' unless defined $comment; | |
85 | ||
86 | $ind = 0; | |
87 | my @selected; | |
88 | while (++$ind <= $lastregop) { | |
89 | push @selected, $name[$ind] if $flags[$ind] && $flags[$ind] eq $flag; | |
90 | } | |
91 | my $out_string = join ', ', @selected, 0; | |
92 | $out_string =~ s/(.{1,70},) /$1\n /g; | |
93 | return $comment . <<"EOP"; | |
94 | #ifndef DOINIT | |
95 | EXTCONST U8 PL_${varname}[]; | |
96 | #else | |
97 | EXTCONST U8 PL_${varname}[] = { | |
98 | $out_string | |
99 | }; | |
100 | #endif /* DOINIT */ | |
101 | ||
102 | EOP | |
103 | } | |
104 | ||
266db279 | 105 | my $tmp_h = 'regnodes.h-new'; |
d09b2d29 IZ |
106 | |
107 | unlink $tmp_h if -f $tmp_h; | |
108 | ||
424a4936 | 109 | my $out = safer_open($tmp_h); |
d09b2d29 | 110 | |
424a4936 | 111 | printf $out <<EOP, |
37442d52 RGS |
112 | /* -*- buffer-read-only: t -*- |
113 | !!!!!!! DO NOT EDIT THIS FILE !!!!!!! | |
9b155405 | 114 | This file is built by regcomp.pl from regcomp.sym. |
d09b2d29 IZ |
115 | Any changes made here will be lost! |
116 | */ | |
117 | ||
6bda09f9 YO |
118 | /* Regops and State definitions */ |
119 | ||
03363afd YO |
120 | #define %*s\t%d |
121 | #define %*s\t%d | |
122 | ||
d09b2d29 | 123 | EOP |
f9f4320a YO |
124 | -$width, REGNODE_MAX => $lastregop - 1, |
125 | -$width, REGMATCH_STATE_MAX => $tot - 1 | |
126 | ; | |
d09b2d29 | 127 | |
24b23f37 YO |
128 | |
129 | for ($ind=1; $ind <= $lastregop ; $ind++) { | |
03363afd | 130 | my $oind = $ind - 1; |
424a4936 | 131 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", |
03363afd | 132 | -$width, $name[$ind], $ind-1, $ind-1, $rest[$ind]; |
24b23f37 | 133 | } |
424a4936 | 134 | print $out "\t/* ------------ States ------------- */\n"; |
24b23f37 | 135 | for ( ; $ind <= $tot ; $ind++) { |
424a4936 | 136 | printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", |
24b23f37 | 137 | -$width, $name[$ind], $ind - $lastregop, $rest[$ind]; |
d09b2d29 IZ |
138 | } |
139 | ||
424a4936 | 140 | print $out <<EOP; |
03363afd | 141 | |
6bda09f9 | 142 | /* PL_regkind[] What type of regop or state is this. */ |
d09b2d29 IZ |
143 | |
144 | #ifndef DOINIT | |
22c35a8c | 145 | EXTCONST U8 PL_regkind[]; |
d09b2d29 | 146 | #else |
22c35a8c | 147 | EXTCONST U8 PL_regkind[] = { |
d09b2d29 IZ |
148 | EOP |
149 | ||
150 | $ind = 0; | |
151 | while (++$ind <= $tot) { | |
424a4936 | 152 | printf $out "\t%*s\t/* %*s */\n", |
03363afd | 153 | -1-$twidth, "$type[$ind],", -$width, $name[$ind]; |
424a4936 | 154 | print $out "\t/* ------------ States ------------- */\n" |
03363afd | 155 | if $ind == $lastregop and $lastregop != $tot; |
d09b2d29 IZ |
156 | } |
157 | ||
424a4936 | 158 | print $out <<EOP; |
d09b2d29 IZ |
159 | }; |
160 | #endif | |
161 | ||
6bda09f9 | 162 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
d09b2d29 IZ |
163 | |
164 | #ifdef REG_COMP_C | |
29de9391 | 165 | static const U8 regarglen[] = { |
d09b2d29 IZ |
166 | EOP |
167 | ||
168 | $ind = 0; | |
03363afd YO |
169 | while (++$ind <= $lastregop) { |
170 | my $size = 0; | |
d09b2d29 IZ |
171 | $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind]; |
172 | ||
424a4936 | 173 | printf $out "\t%*s\t/* %*s */\n", |
03363afd | 174 | -37, "$size,",-$rwidth,$name[$ind]; |
d09b2d29 IZ |
175 | } |
176 | ||
424a4936 | 177 | print $out <<EOP; |
d09b2d29 IZ |
178 | }; |
179 | ||
6bda09f9 YO |
180 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
181 | ||
29de9391 | 182 | static const char reg_off_by_arg[] = { |
d09b2d29 IZ |
183 | EOP |
184 | ||
185 | $ind = 0; | |
03363afd YO |
186 | while (++$ind <= $lastregop) { |
187 | my $size = $longj[$ind] || 0; | |
9b155405 | 188 | |
424a4936 | 189 | printf $out "\t%d,\t/* %*s */\n", |
03363afd | 190 | $size, -$rwidth, $name[$ind] |
d09b2d29 IZ |
191 | } |
192 | ||
424a4936 | 193 | print $out <<EOP; |
d09b2d29 | 194 | }; |
9b155405 | 195 | |
13d6edb4 NC |
196 | #endif /* REG_COMP_C */ |
197 | ||
6bda09f9 YO |
198 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
199 | ||
22429478 | 200 | #ifndef DOINIT |
13d6edb4 | 201 | EXTCONST char * PL_reg_name[]; |
22429478 | 202 | #else |
4764e399 | 203 | EXTCONST char * const PL_reg_name[] = { |
9b155405 IZ |
204 | EOP |
205 | ||
206 | $ind = 0; | |
24b23f37 YO |
207 | my $ofs = 1; |
208 | my $sym = ""; | |
9b155405 | 209 | while (++$ind <= $tot) { |
03363afd | 210 | my $size = $longj[$ind] || 0; |
9b155405 | 211 | |
424a4936 | 212 | printf $out "\t%*s\t/* $sym%#04x */\n", |
24b23f37 YO |
213 | -3-$width,qq("$name[$ind]",), $ind - $ofs; |
214 | if ($ind == $lastregop and $lastregop != $tot) { | |
424a4936 | 215 | print $out "\t/* ------------ States ------------- */\n"; |
24b23f37 YO |
216 | $ofs = $lastregop; |
217 | $sym = 'REGNODE_MAX +'; | |
218 | } | |
219 | ||
9b155405 IZ |
220 | } |
221 | ||
424a4936 | 222 | print $out <<EOP; |
9b155405 | 223 | }; |
22429478 | 224 | #endif /* DOINIT */ |
d09b2d29 | 225 | |
f7819f85 A |
226 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
227 | ||
228 | #ifndef DOINIT | |
229 | EXTCONST char * PL_reg_extflags_name[]; | |
230 | #else | |
231 | EXTCONST char * const PL_reg_extflags_name[] = { | |
d09b2d29 IZ |
232 | EOP |
233 | ||
f7819f85 A |
234 | open my $fh,"<","regexp.h" or die "Can't read regexp.h: $!"; |
235 | my %rxfv; | |
c8e4cf8b NC |
236 | my $val = 0; |
237 | my %reverse; | |
f7819f85 A |
238 | while (<$fh>) { |
239 | if (/#define\s+(RXf_\w+)\s+(0x[A-F\d]+)/i) { | |
c8e4cf8b NC |
240 | my $newval = eval $2; |
241 | if($val & $newval) { | |
242 | die sprintf "Both $1 and $reverse{$newval} use %08X", $newval; | |
243 | } | |
244 | $val|=$newval; | |
245 | $rxfv{$1}= $newval; | |
246 | $reverse{$newval} = $1; | |
f7819f85 A |
247 | } |
248 | } | |
249 | my %vrxf=reverse %rxfv; | |
424a4936 | 250 | printf $out "\t/* Bits in extflags defined: %032b */\n",$val; |
f7819f85 A |
251 | for (0..31) { |
252 | my $n=$vrxf{2**$_}||"UNUSED_BIT_$_"; | |
253 | $n=~s/^RXf_(PMf_)?//; | |
424a4936 | 254 | printf $out qq(\t%-20s/* 0x%08x */\n), |
f7819f85 A |
255 | qq("$n",),2**$_; |
256 | } | |
257 | ||
424a4936 | 258 | print $out <<EOP; |
f7819f85 A |
259 | }; |
260 | #endif /* DOINIT */ | |
261 | ||
f9ef50a7 NC |
262 | EOP |
263 | ||
264 | print $out process_flags('V', 'varies', <<'EOC'); | |
265 | /* The following have no fixed length. U8 so we can do strchr() on it. */ | |
266 | EOC | |
267 | ||
268 | print $out process_flags('S', 'simple', <<'EOC'); | |
269 | /* The following always have a length of 1. U8 we can do strchr() on it. */ | |
270 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ | |
271 | EOC | |
272 | ||
273 | print $out <<EOP; | |
f7819f85 A |
274 | /* ex: set ro: */ |
275 | EOP | |
08858ed2 | 276 | safer_close($out); |
d09b2d29 | 277 | |
424a4936 | 278 | rename_if_different $tmp_h, 'regnodes.h'; |