| 1 | #!/usr/bin/perl -w |
| 2 | # |
| 3 | # Regenerate (overwriting only if changed): |
| 4 | # |
| 5 | # mg_names.inc |
| 6 | # mg_raw.h |
| 7 | # mg_vtable.h |
| 8 | # pod/perlguts.pod |
| 9 | # |
| 10 | # from information stored in this file. pod/perlguts.pod is not completely |
| 11 | # regenerated. Only the magic table is replaced; the other parts remain |
| 12 | # untouched. |
| 13 | # |
| 14 | # Accepts the standard regen_lib -q and -v args. |
| 15 | # |
| 16 | # This script is normally invoked from regen.pl. |
| 17 | |
| 18 | use strict; |
| 19 | require 5.004; |
| 20 | |
| 21 | BEGIN { |
| 22 | # Get function prototypes |
| 23 | require './regen/regen_lib.pl'; |
| 24 | } |
| 25 | |
| 26 | my %mg = |
| 27 | ( |
| 28 | sv => { char => "\0", vtable => 'sv', readonly_acceptable => 1, |
| 29 | desc => 'Special scalar variable' }, |
| 30 | # overload, or type "A" magic, used to be here. Hence overloaded is |
| 31 | # often called AMAGIC internally, even though it does not use "A" |
| 32 | # magic any more. |
| 33 | overload_table => { char => 'c', vtable => 'ovrld', |
| 34 | desc => 'Holds overload table (AMT) on stash' }, |
| 35 | bm => { char => 'B', vtable => 'regexp', value_magic => 1, |
| 36 | readonly_acceptable => 1, |
| 37 | desc => 'Boyer-Moore (fast string search)' }, |
| 38 | regdata => { char => 'D', vtable => 'regdata', |
| 39 | desc => "Regex match position data\n(\@+ and \@- vars)" }, |
| 40 | regdatum => { char => 'd', vtable => 'regdatum', |
| 41 | desc => 'Regex match position data element' }, |
| 42 | env => { char => 'E', vtable => 'env', desc => '%ENV hash' }, |
| 43 | envelem => { char => 'e', vtable => 'envelem', |
| 44 | desc => '%ENV hash element' }, |
| 45 | fm => { char => 'f', vtable => 'regexp', value_magic => 1, |
| 46 | readonly_acceptable => 1, desc => "Formline ('compiled' format)" }, |
| 47 | regex_global => { char => 'g', vtable => 'mglob', value_magic => 1, |
| 48 | readonly_acceptable => 1, desc => 'm//g target' }, |
| 49 | hints => { char => 'H', vtable => 'hints', desc => '%^H hash' }, |
| 50 | hintselem => { char => 'h', vtable => 'hintselem', |
| 51 | desc => '%^H hash element' }, |
| 52 | isa => { char => 'I', vtable => 'isa', desc => '@ISA array' }, |
| 53 | isaelem => { char => 'i', vtable => 'isaelem', |
| 54 | desc => '@ISA array element' }, |
| 55 | nkeys => { char => 'k', vtable => 'nkeys', value_magic => 1, |
| 56 | desc => 'scalar(keys()) lvalue' }, |
| 57 | dbfile => { char => 'L', |
| 58 | desc => 'Debugger %_<filename' }, |
| 59 | dbline => { char => 'l', vtable => 'dbline', |
| 60 | desc => 'Debugger %_<filename element' }, |
| 61 | shared => { char => 'N', desc => 'Shared between threads', |
| 62 | unknown_to_sv_magic => 1 }, |
| 63 | shared_scalar => { char => 'n', desc => 'Shared between threads', |
| 64 | unknown_to_sv_magic => 1 }, |
| 65 | collxfrm => { char => 'o', vtable => 'collxfrm', value_magic => 1, |
| 66 | desc => 'Locale transformation' }, |
| 67 | tied => { char => 'P', vtable => 'pack', |
| 68 | value_magic => 1, # treat as value, so 'local @tied' isn't tied |
| 69 | desc => 'Tied array or hash' }, |
| 70 | tiedelem => { char => 'p', vtable => 'packelem', |
| 71 | desc => 'Tied array or hash element' }, |
| 72 | tiedscalar => { char => 'q', vtable => 'packelem', |
| 73 | desc => 'Tied scalar or handle' }, |
| 74 | qr => { char => 'r', vtable => 'regexp', value_magic => 1, |
| 75 | readonly_acceptable => 1, desc => 'Precompiled qr// regex' }, |
| 76 | sig => { char => 'S', desc => '%SIG hash' }, |
| 77 | sigelem => { char => 's', vtable => 'sigelem', |
| 78 | desc => '%SIG hash element' }, |
| 79 | taint => { char => 't', vtable => 'taint', value_magic => 1, |
| 80 | desc => 'Taintedness' }, |
| 81 | uvar => { char => 'U', vtable => 'uvar', |
| 82 | desc => 'Available for use by extensions' }, |
| 83 | uvar_elem => { char => 'u', desc => 'Reserved for use by extensions', |
| 84 | unknown_to_sv_magic => 1 }, |
| 85 | vec => { char => 'v', vtable => 'vec', value_magic => 1, |
| 86 | desc => 'vec() lvalue' }, |
| 87 | vstring => { char => 'V', value_magic => 1, |
| 88 | desc => 'SV was vstring literal' }, |
| 89 | utf8 => { char => 'w', vtable => 'utf8', value_magic => 1, |
| 90 | desc => 'Cached UTF-8 information' }, |
| 91 | substr => { char => 'x', vtable => 'substr', value_magic => 1, |
| 92 | desc => 'substr() lvalue' }, |
| 93 | defelem => { char => 'y', vtable => 'defelem', value_magic => 1, |
| 94 | desc => "Shadow \"foreach\" iterator variable /\nsmart parameter vivification" }, |
| 95 | nonelem => { char => 'Y', vtable => 'nonelem', value_magic => 1, |
| 96 | desc => "Array element that does not exist" }, |
| 97 | arylen => { char => '#', vtable => 'arylen', value_magic => 1, |
| 98 | desc => 'Array length ($#ary)' }, |
| 99 | pos => { char => '.', vtable => 'pos', value_magic => 1, |
| 100 | desc => 'pos() lvalue' }, |
| 101 | backref => { char => '<', vtable => 'backref', value_magic => 1, |
| 102 | readonly_acceptable => 1, desc => 'For weak ref data' }, |
| 103 | symtab => { char => ':', value_magic => 1, |
| 104 | desc => 'Extra data for symbol tables' }, |
| 105 | rhash => { char => '%', value_magic => 1, |
| 106 | desc => 'Extra data for restricted hashes' }, |
| 107 | arylen_p => { char => '@', value_magic => 1, |
| 108 | desc => 'To move arylen out of XPVAV' }, |
| 109 | ext => { char => '~', desc => 'Available for use by extensions', |
| 110 | readonly_acceptable => 1 }, |
| 111 | checkcall => { char => ']', value_magic => 1, vtable => 'checkcall', |
| 112 | desc => 'Inlining/mutation of call to this CV'}, |
| 113 | debugvar => { char => '*', desc => '$DB::single, signal, trace vars', |
| 114 | vtable => 'debugvar' }, |
| 115 | lvref => { char => '\\', vtable => 'lvref', |
| 116 | desc => "Lvalue reference constructor" }, |
| 117 | ); |
| 118 | |
| 119 | # These have a subtly different "namespace" from the magic types. |
| 120 | my %sig = |
| 121 | ( |
| 122 | 'sv' => {get => 'get', set => 'set'}, |
| 123 | 'env' => {set => 'set_all_env', clear => 'clear_all_env'}, |
| 124 | 'envelem' => {set => 'setenv', clear => 'clearenv'}, |
| 125 | 'sigelem' => {get => 'getsig', set => 'setsig', clear => 'clearsig', |
| 126 | cond => '#ifndef PERL_MICRO'}, |
| 127 | 'pack' => {len => 'sizepack', clear => 'wipepack'}, |
| 128 | 'packelem' => {get => 'getpack', set => 'setpack', clear => 'clearpack'}, |
| 129 | 'dbline' => {set => 'setdbline'}, |
| 130 | 'isa' => {set => 'setisa', clear => 'clearisa'}, |
| 131 | 'isaelem' => {set => 'setisa'}, |
| 132 | 'arylen' => {get => 'getarylen', set => 'setarylen', const => 1}, |
| 133 | 'arylen_p' => {clear => 'cleararylen_p', free => 'freearylen_p'}, |
| 134 | 'mglob' => {set => 'setmglob'}, |
| 135 | 'nkeys' => {get => 'getnkeys', set => 'setnkeys'}, |
| 136 | 'taint' => {get => 'gettaint', set => 'settaint'}, |
| 137 | 'substr' => {get => 'getsubstr', set => 'setsubstr'}, |
| 138 | 'vec' => {get => 'getvec', set => 'setvec'}, |
| 139 | 'pos' => {get => 'getpos', set => 'setpos'}, |
| 140 | 'uvar' => {get => 'getuvar', set => 'setuvar'}, |
| 141 | 'defelem' => {get => 'getdefelem', set => 'setdefelem'}, |
| 142 | 'nonelem' => {set => 'setnonelem'}, |
| 143 | 'regexp' => {set => 'setregexp', alias => [qw(bm fm)]}, |
| 144 | 'regdata' => {len => 'regdata_cnt'}, |
| 145 | 'regdatum' => {get => 'regdatum_get', set => 'regdatum_set'}, |
| 146 | 'backref' => {free => 'killbackrefs'}, |
| 147 | 'ovrld' => {free => 'freeovrld'}, |
| 148 | 'utf8' => {set => 'setutf8'}, |
| 149 | 'collxfrm' => {set => 'setcollxfrm', |
| 150 | cond => '#ifdef USE_LOCALE_COLLATE'}, |
| 151 | 'hintselem' => {set => 'sethint', clear => 'clearhint'}, |
| 152 | 'hints' => {clear => 'clearhints'}, |
| 153 | 'checkcall' => {copy => 'copycallchecker'}, |
| 154 | 'debugvar' => { set => 'setdebugvar', get => 'getdebugvar' }, |
| 155 | 'lvref' => {set => 'setlvref'}, |
| 156 | ); |
| 157 | |
| 158 | my ($vt, $raw, $names) = map { |
| 159 | open_new($_, '>', |
| 160 | { by => 'regen/mg_vtable.pl', file => $_, style => '*' }); |
| 161 | } 'mg_vtable.h', 'mg_raw.h', 'mg_names.inc'; |
| 162 | my $guts = open_new("pod/perlguts.pod", ">"); |
| 163 | |
| 164 | print $vt <<'EOH'; |
| 165 | /* These constants should be used in preference to raw characters |
| 166 | * when using magic. Note that some perl guts still assume |
| 167 | * certain character properties of these constants, namely that |
| 168 | * isUPPER() and toLOWER() may do useful mappings. |
| 169 | */ |
| 170 | |
| 171 | EOH |
| 172 | |
| 173 | # Of course, it would be *much* easier if we could output this table directly |
| 174 | # here and now. However, for our sins, we try to support EBCDIC, which wouldn't |
| 175 | # be *so* bad, except that there are (at least) 3 EBCDIC charset variants, and |
| 176 | # they don't agree on the code point for '~'. Which we use. Great. |
| 177 | # So we have to get the local build runtime to sort our table in character order |
| 178 | # (And of course, just to be helpful, in POSIX BC '~' is \xFF, so we can't even |
| 179 | # simplify the C code by assuming that the last element of the array is |
| 180 | # predictable) |
| 181 | |
| 182 | { |
| 183 | my $longest = 0; |
| 184 | foreach (keys %mg) { |
| 185 | $longest = length $_ if length $_ > $longest; |
| 186 | } |
| 187 | |
| 188 | my $longest_p1 = $longest + 1; |
| 189 | |
| 190 | my %mg_order; |
| 191 | while (my ($name, $data) = each %mg) { |
| 192 | my $byte = $data->{char}; |
| 193 | if ($byte =~ /[[:print:]]/) { |
| 194 | $data->{r_char} = $byte; # readable char |
| 195 | ($data->{c_char} = $byte) =~ s/([\\"])/\\$1/g; # for C strings |
| 196 | } |
| 197 | else { |
| 198 | $data->{c_char} = $data->{r_char} = '\\'.ord $byte; |
| 199 | } |
| 200 | $mg_order{(uc $byte) . $byte} = $name; |
| 201 | } |
| 202 | my @rows; |
| 203 | my @names; |
| 204 | foreach (sort keys %mg_order) { |
| 205 | my $name = $mg_order{$_}; |
| 206 | push @names, $name; |
| 207 | my $data = $mg{$name}; |
| 208 | my $i = ord $data->{char}; |
| 209 | unless ($data->{unknown_to_sv_magic}) { |
| 210 | my $value = $data->{vtable} |
| 211 | ? "want_vtbl_$data->{vtable}" : 'magic_vtable_max'; |
| 212 | $value .= ' | PERL_MAGIC_READONLY_ACCEPTABLE' |
| 213 | if $data->{readonly_acceptable}; |
| 214 | $value .= ' | PERL_MAGIC_VALUE_MAGIC' if $data->{value_magic}; |
| 215 | my $comment = "/* $name '$data->{r_char}' $data->{desc} */"; |
| 216 | $comment =~ s/([\\"])/\\$1/g; |
| 217 | $comment =~ tr/\n/ /; |
| 218 | print $raw qq{ { '$data->{c_char}', "$value",\n "$comment" },\n}; |
| 219 | } |
| 220 | |
| 221 | my $comment = $data->{desc}; |
| 222 | my $leader = ' ' x ($longest + 27); |
| 223 | $comment =~ s/\n/\n$leader/s; |
| 224 | printf $vt "#define PERL_MAGIC_%-${longest}s '%s' /* %s */\n", |
| 225 | $name, $data->{c_char}, $comment; |
| 226 | |
| 227 | my $char = $data->{r_char}; |
| 228 | $char =~ s/([\\"])/\\$1/g; |
| 229 | printf $names qq[\t{ PERL_MAGIC_%-${longest_p1}s "%s(%s)" },\n], |
| 230 | "$name,", $name, $char; |
| 231 | |
| 232 | push @rows, [(sprintf "%-2s PERL_MAGIC_%s", $data->{r_char},$name), |
| 233 | $data->{vtable} ? "vtbl_$data->{vtable}" : '(none)', |
| 234 | $data->{desc}]; |
| 235 | } |
| 236 | select +(select($guts), do { |
| 237 | my @header = ('(old-style char and macro)', 'MGVTBL', 'Type of magic'); |
| 238 | my @widths = (0, 0); |
| 239 | foreach my $row (@rows) { |
| 240 | for (0, 1) { |
| 241 | $widths[$_] = length $row->[$_] |
| 242 | if length $row->[$_] > $widths[$_]; |
| 243 | } |
| 244 | } |
| 245 | my $indent = ' '; |
| 246 | my $format |
| 247 | = sprintf "$indent%%-%ds%%-%ds%%s\n", $widths[0] + 1, $widths[1] + 1; |
| 248 | my $desc_wrap = |
| 249 | 79 - 7 - (length $indent) - $widths[0] - $widths[1] - 2; |
| 250 | |
| 251 | open my $oldguts, "<", "pod/perlguts.pod" |
| 252 | or die "$0 cannot open pod/perlguts.pod for reading: $!"; |
| 253 | while (<$oldguts>) { |
| 254 | print; |
| 255 | last if /^=for mg_vtable.pl begin/ |
| 256 | } |
| 257 | |
| 258 | print "\n", $indent . "mg_type\n"; |
| 259 | printf $format, @header; |
| 260 | printf $format, map {'-' x length $_} @header; |
| 261 | foreach (@rows) { |
| 262 | my ($type, $vtbl, $desc) = @$_; |
| 263 | $desc =~ tr/\n/ /; |
| 264 | my @cont; |
| 265 | if (length $desc > $desc_wrap) { |
| 266 | # If it's too long, first split on '(', if there. |
| 267 | # [Which, if there, is always short enough, currently. |
| 268 | # Make this more robust if that changes] |
| 269 | ($desc, @cont) = split /(?=\()/, $desc; |
| 270 | if (!@cont) { |
| 271 | ($desc, @cont) = $desc =~ /(.{1,$desc_wrap})(?: |\z)/g |
| 272 | } |
| 273 | } |
| 274 | printf $format, $type, $vtbl, $desc; |
| 275 | printf $format, '', '', $_ foreach @cont; |
| 276 | } |
| 277 | print "\n\n"; |
| 278 | |
| 279 | print "=for apidoc Amnh||PERL_MAGIC_$_\n" for @names; |
| 280 | print "\n"; |
| 281 | |
| 282 | while (<$oldguts>) { |
| 283 | last if /^=for mg_vtable.pl end/; |
| 284 | } |
| 285 | do { print } while <$oldguts>; |
| 286 | })[0]; |
| 287 | } |
| 288 | |
| 289 | my @names = sort keys %sig; |
| 290 | { |
| 291 | my $want = join ",\n ", (map {"want_vtbl_$_"} @names), 'magic_vtable_max'; |
| 292 | my $names = join qq{",\n "}, @names; |
| 293 | |
| 294 | print $vt <<"EOH"; |
| 295 | |
| 296 | enum { /* pass one of these to get_vtbl */ |
| 297 | $want |
| 298 | }; |
| 299 | |
| 300 | #ifdef DOINIT |
| 301 | EXTCONST char * const PL_magic_vtable_names[magic_vtable_max] = { |
| 302 | "$names" |
| 303 | }; |
| 304 | #else |
| 305 | EXTCONST char * const PL_magic_vtable_names[magic_vtable_max]; |
| 306 | #endif |
| 307 | |
| 308 | EOH |
| 309 | } |
| 310 | |
| 311 | print $vt <<'EOH'; |
| 312 | /* These all need to be 0, not NULL, as NULL can be (void*)0, which is a |
| 313 | * pointer to data, whereas we're assigning pointers to functions, which are |
| 314 | * not the same beast. ANSI doesn't allow the assignment from one to the other. |
| 315 | * (although most, but not all, compilers are prepared to do it) |
| 316 | */ |
| 317 | |
| 318 | /* order is: |
| 319 | get |
| 320 | set |
| 321 | len |
| 322 | clear |
| 323 | free |
| 324 | copy |
| 325 | dup |
| 326 | local |
| 327 | */ |
| 328 | |
| 329 | #ifdef DOINIT |
| 330 | EXT_MGVTBL PL_magic_vtables[magic_vtable_max] = { |
| 331 | EOH |
| 332 | |
| 333 | my @vtable_names; |
| 334 | my @aliases; |
| 335 | |
| 336 | while (my $name = shift @names) { |
| 337 | my $data = $sig{$name}; |
| 338 | push @vtable_names, $name; |
| 339 | my @funcs = map { |
| 340 | $data->{$_} ? "Perl_magic_$data->{$_}" : 0; |
| 341 | } qw(get set len clear free copy dup local); |
| 342 | |
| 343 | $funcs[0] = "(int (*)(pTHX_ SV *, MAGIC *))" . $funcs[0] if $data->{const}; |
| 344 | my $funcs = join ", ", @funcs; |
| 345 | |
| 346 | # Because we can't have a , after the last {...} |
| 347 | my $comma = @names ? ',' : ''; |
| 348 | |
| 349 | print $vt "$data->{cond}\n" if $data->{cond}; |
| 350 | print $vt " { $funcs }$comma\n"; |
| 351 | print $vt <<"EOH" if $data->{cond}; |
| 352 | #else |
| 353 | { 0, 0, 0, 0, 0, 0, 0, 0 }$comma |
| 354 | #endif |
| 355 | EOH |
| 356 | foreach(@{$data->{alias}}) { |
| 357 | push @aliases, "#define want_vtbl_$_ want_vtbl_$name\n"; |
| 358 | push @vtable_names, $_; |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | print $vt <<'EOH'; |
| 363 | }; |
| 364 | #else |
| 365 | EXT_MGVTBL PL_magic_vtables[magic_vtable_max]; |
| 366 | #endif |
| 367 | |
| 368 | EOH |
| 369 | |
| 370 | print $vt (sort @aliases), "\n"; |
| 371 | |
| 372 | print $vt "#define PL_vtbl_$_ PL_magic_vtables[want_vtbl_$_]\n" |
| 373 | foreach sort @vtable_names; |
| 374 | |
| 375 | # 63, not 64, As we rely on the last possible value to mean "NULL vtable" |
| 376 | die "Too many vtable names" if @vtable_names > 63; |
| 377 | |
| 378 | read_only_bottom_close_and_rename($_) foreach $vt, $raw, $names; |
| 379 | close_and_rename($guts); |