Commit | Line | Data |
---|---|---|
916e4025 | 1 | #!/usr/bin/perl -w |
6294c161 DM |
2 | # |
3 | # Regenerate (overwriting only if changed): | |
4 | # | |
5 | # regnodes.h | |
6 | # | |
7 | # from information stored in | |
8 | # | |
9 | # regcomp.sym | |
10 | # regexp.h | |
11 | # | |
12 | # Accepts the standard regen_lib -q and -v args. | |
13 | # | |
14 | # This script is normally invoked from regen.pl. | |
15 | ||
36bb303b NC |
16 | BEGIN { |
17 | # Get function prototypes | |
af001346 | 18 | require 'regen/regen_lib.pl'; |
36bb303b | 19 | } |
03363afd | 20 | use strict; |
03363afd | 21 | |
d09b2d29 | 22 | open DESC, 'regcomp.sym'; |
d09b2d29 | 23 | |
03363afd | 24 | my $ind = 0; |
f9ef50a7 | 25 | my (@name,@rest,@type,@code,@args,@flags,@longj); |
03363afd | 26 | my ($desc,$lastregop); |
d09b2d29 | 27 | while (<DESC>) { |
03363afd YO |
28 | s/#.*$//; |
29 | next if /^\s*$/; | |
916e4025 NC |
30 | chomp; # No \z in 5.004 |
31 | s/\s*$//; | |
03363afd YO |
32 | if (/^-+\s*$/) { |
33 | $lastregop= $ind; | |
34 | next; | |
35 | } | |
36 | unless ($lastregop) { | |
f8abb37e | 37 | ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+)\s*;\s*(.*)/; |
f9ef50a7 NC |
38 | ($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind]) |
39 | = split /[,\s]\s*/, $desc; | |
93882df0 | 40 | ++$ind; |
03363afd | 41 | } else { |
f8abb37e | 42 | my ($type,@lists)=split /\s+/, $_; |
03363afd YO |
43 | die "No list? $type" if !@lists; |
44 | foreach my $list (@lists) { | |
45 | my ($names,$special)=split /:/, $list , 2; | |
46 | $special ||= ""; | |
47 | foreach my $name (split /,/,$names) { | |
48 | my $real= $name eq 'resume' | |
49 | ? "resume_$type" | |
50 | : "${type}_$name"; | |
51 | my @suffix; | |
52 | if (!$special) { | |
53 | @suffix=(""); | |
54 | } elsif ($special=~/\d/) { | |
55 | @suffix=(1..$special); | |
56 | } elsif ($special eq 'FAIL') { | |
57 | @suffix=("","_fail"); | |
58 | } else { | |
59 | die "unknown :type ':$special'"; | |
60 | } | |
61 | foreach my $suffix (@suffix) { | |
03363afd YO |
62 | $name[$ind]="$real$suffix"; |
63 | $type[$ind]=$type; | |
24b23f37 | 64 | $rest[$ind]="state for $type"; |
93882df0 | 65 | ++$ind; |
03363afd YO |
66 | } |
67 | } | |
68 | } | |
69 | ||
70 | } | |
71 | } | |
5d458dd8 YO |
72 | # use fixed width to keep the diffs between regcomp.pl recompiles |
73 | # as small as possible. | |
74 | my ($width,$rwidth,$twidth)=(22,12,9); | |
03363afd YO |
75 | $lastregop ||= $ind; |
76 | my $tot = $ind; | |
d09b2d29 | 77 | close DESC; |
03363afd YO |
78 | die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" |
79 | if $lastregop>256; | |
d09b2d29 | 80 | |
f9ef50a7 NC |
81 | sub process_flags { |
82 | my ($flag, $varname, $comment) = @_; | |
83 | $comment = '' unless defined $comment; | |
84 | ||
85 | $ind = 0; | |
86 | my @selected; | |
ded4dd2a | 87 | my $bitmap = ''; |
93882df0 | 88 | do { |
ded4dd2a NC |
89 | my $set = $flags[$ind] && $flags[$ind] eq $flag ? 1 : 0; |
90 | # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic | |
91 | # ops in the C code. | |
92 | my $current = do { | |
916e4025 | 93 | local $^W; |
ded4dd2a | 94 | ord do { |
ded4dd2a NC |
95 | substr $bitmap, ($ind >> 3); |
96 | } | |
97 | }; | |
916e4025 | 98 | substr($bitmap, ($ind >> 3), 1) = chr($current | ($set << ($ind & 7))); |
ded4dd2a NC |
99 | |
100 | push @selected, $name[$ind] if $set; | |
93882df0 | 101 | } while (++$ind < $lastregop); |
f9ef50a7 NC |
102 | my $out_string = join ', ', @selected, 0; |
103 | $out_string =~ s/(.{1,70},) /$1\n /g; | |
ded4dd2a NC |
104 | |
105 | my $out_mask = join ', ', map {sprintf "0x%02X", ord $_} split '', $bitmap; | |
106 | ||
f9ef50a7 | 107 | return $comment . <<"EOP"; |
ded4dd2a | 108 | #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
e52fc539 | 109 | |
f9ef50a7 | 110 | #ifndef DOINIT |
916e4025 | 111 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; |
f9ef50a7 | 112 | #else |
916e4025 | 113 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { |
f9ef50a7 NC |
114 | $out_string |
115 | }; | |
116 | #endif /* DOINIT */ | |
117 | ||
ded4dd2a NC |
118 | #ifndef DOINIT |
119 | EXTCONST U8 PL_${varname}_bitmask[]; | |
120 | #else | |
121 | EXTCONST U8 PL_${varname}_bitmask[] = { | |
122 | $out_mask | |
123 | }; | |
124 | #endif /* DOINIT */ | |
f9ef50a7 NC |
125 | EOP |
126 | } | |
127 | ||
cc49830d NC |
128 | my $out = open_new('regnodes.h', '>', |
129 | { by => 'regen/regcomp.pl', from => 'regcomp.sym' }); | |
424a4936 | 130 | printf $out <<EOP, |
6bda09f9 YO |
131 | /* Regops and State definitions */ |
132 | ||
03363afd YO |
133 | #define %*s\t%d |
134 | #define %*s\t%d | |
135 | ||
d09b2d29 | 136 | EOP |
f9f4320a YO |
137 | -$width, REGNODE_MAX => $lastregop - 1, |
138 | -$width, REGMATCH_STATE_MAX => $tot - 1 | |
139 | ; | |
d09b2d29 | 140 | |
24b23f37 | 141 | |
93882df0 | 142 | for ($ind=0; $ind < $lastregop ; ++$ind) { |
424a4936 | 143 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", |
93882df0 | 144 | -$width, $name[$ind], $ind, $ind, $rest[$ind]; |
24b23f37 | 145 | } |
424a4936 | 146 | print $out "\t/* ------------ States ------------- */\n"; |
93882df0 | 147 | for ( ; $ind < $tot ; $ind++) { |
424a4936 | 148 | printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", |
93882df0 | 149 | -$width, $name[$ind], $ind - $lastregop + 1, $rest[$ind]; |
d09b2d29 IZ |
150 | } |
151 | ||
424a4936 | 152 | print $out <<EOP; |
03363afd | 153 | |
6bda09f9 | 154 | /* PL_regkind[] What type of regop or state is this. */ |
d09b2d29 IZ |
155 | |
156 | #ifndef DOINIT | |
22c35a8c | 157 | EXTCONST U8 PL_regkind[]; |
d09b2d29 | 158 | #else |
22c35a8c | 159 | EXTCONST U8 PL_regkind[] = { |
d09b2d29 IZ |
160 | EOP |
161 | ||
162 | $ind = 0; | |
93882df0 | 163 | do { |
424a4936 | 164 | printf $out "\t%*s\t/* %*s */\n", |
03363afd | 165 | -1-$twidth, "$type[$ind],", -$width, $name[$ind]; |
424a4936 | 166 | print $out "\t/* ------------ States ------------- */\n" |
93882df0 NC |
167 | if $ind + 1 == $lastregop and $lastregop != $tot; |
168 | } while (++$ind < $tot); | |
d09b2d29 | 169 | |
424a4936 | 170 | print $out <<EOP; |
d09b2d29 IZ |
171 | }; |
172 | #endif | |
173 | ||
6bda09f9 | 174 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
d09b2d29 IZ |
175 | |
176 | #ifdef REG_COMP_C | |
29de9391 | 177 | static const U8 regarglen[] = { |
d09b2d29 IZ |
178 | EOP |
179 | ||
180 | $ind = 0; | |
93882df0 | 181 | do { |
03363afd | 182 | my $size = 0; |
d09b2d29 IZ |
183 | $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind]; |
184 | ||
424a4936 | 185 | printf $out "\t%*s\t/* %*s */\n", |
03363afd | 186 | -37, "$size,",-$rwidth,$name[$ind]; |
93882df0 | 187 | } while (++$ind < $lastregop); |
d09b2d29 | 188 | |
424a4936 | 189 | print $out <<EOP; |
d09b2d29 IZ |
190 | }; |
191 | ||
6bda09f9 YO |
192 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
193 | ||
29de9391 | 194 | static const char reg_off_by_arg[] = { |
d09b2d29 IZ |
195 | EOP |
196 | ||
197 | $ind = 0; | |
93882df0 | 198 | do { |
03363afd | 199 | my $size = $longj[$ind] || 0; |
9b155405 | 200 | |
424a4936 | 201 | printf $out "\t%d,\t/* %*s */\n", |
03363afd | 202 | $size, -$rwidth, $name[$ind] |
93882df0 | 203 | } while (++$ind < $lastregop); |
d09b2d29 | 204 | |
424a4936 | 205 | print $out <<EOP; |
d09b2d29 | 206 | }; |
9b155405 | 207 | |
13d6edb4 NC |
208 | #endif /* REG_COMP_C */ |
209 | ||
6bda09f9 YO |
210 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
211 | ||
22429478 | 212 | #ifndef DOINIT |
13d6edb4 | 213 | EXTCONST char * PL_reg_name[]; |
22429478 | 214 | #else |
4764e399 | 215 | EXTCONST char * const PL_reg_name[] = { |
9b155405 IZ |
216 | EOP |
217 | ||
218 | $ind = 0; | |
93882df0 | 219 | my $ofs = 0; |
24b23f37 | 220 | my $sym = ""; |
93882df0 | 221 | do { |
03363afd | 222 | my $size = $longj[$ind] || 0; |
9b155405 | 223 | |
424a4936 | 224 | printf $out "\t%*s\t/* $sym%#04x */\n", |
24b23f37 | 225 | -3-$width,qq("$name[$ind]",), $ind - $ofs; |
93882df0 | 226 | if ($ind + 1 == $lastregop and $lastregop != $tot) { |
424a4936 | 227 | print $out "\t/* ------------ States ------------- */\n"; |
93882df0 | 228 | $ofs = $lastregop - 1; |
24b23f37 YO |
229 | $sym = 'REGNODE_MAX +'; |
230 | } | |
231 | ||
93882df0 | 232 | } while (++$ind < $tot); |
9b155405 | 233 | |
424a4936 | 234 | print $out <<EOP; |
9b155405 | 235 | }; |
22429478 | 236 | #endif /* DOINIT */ |
d09b2d29 | 237 | |
f7819f85 A |
238 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
239 | ||
240 | #ifndef DOINIT | |
241 | EXTCONST char * PL_reg_extflags_name[]; | |
242 | #else | |
243 | EXTCONST char * const PL_reg_extflags_name[] = { | |
d09b2d29 IZ |
244 | EOP |
245 | ||
f7819f85 | 246 | my %rxfv; |
6a080ccd | 247 | my %definitions; # Remember what the symbol definitions are |
c8e4cf8b NC |
248 | my $val = 0; |
249 | my %reverse; | |
1850c8f9 | 250 | foreach my $file ("op_reg_common.h", "regexp.h") { |
916e4025 NC |
251 | open FH,"<$file" or die "Can't read $file: $!"; |
252 | while (<FH>) { | |
1850c8f9 KW |
253 | |
254 | # optional leading '_'. Return symbol in $1, and strip it from | |
255 | # rest of line | |
256 | if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) { | |
257 | chomp; | |
258 | my $define = $1; | |
259 | s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank | |
260 | ||
261 | # Replace any prior defined symbols by their values | |
262 | foreach my $key (keys %definitions) { | |
263 | s/\b$key\b/$definitions{$key}/g; | |
264 | } | |
5c72e80d TC |
265 | |
266 | # Remove the U suffix from unsigned int literals | |
267 | s/\b([0-9]+)U\b/$1/g; | |
268 | ||
1850c8f9 | 269 | my $newval = eval $_; # Get numeric definition |
6a080ccd | 270 | |
1850c8f9 | 271 | $definitions{$define} = $newval; |
6a080ccd | 272 | |
1850c8f9 KW |
273 | next unless $_ =~ /<</; # Bit defines use left shift |
274 | if($val & $newval) { | |
275 | die sprintf "Both $define and $reverse{$newval} use %08X", $newval; | |
276 | } | |
277 | $val|=$newval; | |
278 | $rxfv{$define}= $newval; | |
279 | $reverse{$newval} = $define; | |
6a080ccd | 280 | } |
f7819f85 | 281 | } |
1850c8f9 | 282 | } |
f7819f85 | 283 | my %vrxf=reverse %rxfv; |
916e4025 | 284 | printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', $val; |
f7819f85 | 285 | for (0..31) { |
5458d9a0 KW |
286 | my $power_of_2 = 2**$_; |
287 | my $n=$vrxf{$power_of_2}; | |
288 | if (! $n) { | |
289 | ||
290 | # Here, there was no name that matched exactly the bit. It could be | |
291 | # either that it is unused, or the name matches multiple bits. | |
292 | if (! ($val & $power_of_2)) { | |
293 | $n = "UNUSED_BIT_$_"; | |
294 | } | |
295 | else { | |
296 | ||
297 | # Here, must be because it matches multiple bits. Look through | |
298 | # all possibilities until find one that matches this one. Use | |
299 | # that name, and all the bits it matches | |
300 | foreach my $name (keys %rxfv) { | |
301 | if ($rxfv{$name} & $power_of_2) { | |
302 | $n = $name; | |
303 | $power_of_2 = $rxfv{$name}; | |
304 | last; | |
305 | } | |
306 | } | |
307 | } | |
308 | } | |
f7819f85 | 309 | $n=~s/^RXf_(PMf_)?//; |
424a4936 | 310 | printf $out qq(\t%-20s/* 0x%08x */\n), |
5458d9a0 | 311 | qq("$n",),$power_of_2; |
f7819f85 A |
312 | } |
313 | ||
424a4936 | 314 | print $out <<EOP; |
f7819f85 A |
315 | }; |
316 | #endif /* DOINIT */ | |
317 | ||
f9ef50a7 NC |
318 | EOP |
319 | ||
320 | print $out process_flags('V', 'varies', <<'EOC'); | |
321 | /* The following have no fixed length. U8 so we can do strchr() on it. */ | |
322 | EOC | |
323 | ||
324 | print $out process_flags('S', 'simple', <<'EOC'); | |
ce716c52 | 325 | |
f9ef50a7 NC |
326 | /* The following always have a length of 1. U8 we can do strchr() on it. */ |
327 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ | |
328 | EOC | |
329 | ||
ce716c52 | 330 | read_only_bottom_close_and_rename($out); |