This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regexp.h -reorder regexp to close x86-64 alignment holes
[perl5.git] / autodoc.pl
CommitLineData
94bdecf9 1#!/usr/bin/perl -w
6294c161
DM
2#
3# Unconditionally regenerate:
4#
5# pod/perlintern.pod
6# pod/perlapi.pod
7#
8# from information stored in
9#
10# embed.fnc
11# plus all the .c and .h files listed in MANIFEST
12#
13# Has an optional arg, which is the directory to chdir to before reading
14# MANIFEST and *.[ch].
15#
52a9d53b 16# This script is invoked as part of 'make all'
151c3fe5 17#
f554dfc5
MH
18# '=head1' are the only headings looked for. If the first non-blank line after
19# the heading begins with a word character, it is considered to be the first
20# line of documentation that applies to the heading itself. That is, it is
21# output immediately after the heading, before the first function, and not
22# indented. The next input line that is a pod directive terminates this
23# heading-level documentation.
94bdecf9 24
1fcde0e9
KW
25# The meanings of the flags fields in embed.fnc and the source code is
26# documented at the top of embed.fnc.
27
56a0c332 28use strict;
a64c954a 29
7882b24a
NC
30if (@ARGV) {
31 my $workdir = shift;
32 chdir $workdir
33 or die "Couldn't chdir to '$workdir': $!";
34}
3d7c117d
MB
35require './regen/regen_lib.pl';
36require './regen/embed_lib.pl';
7882b24a 37
7b1f0a98
KW
38my @specialized_docs = sort qw( perlguts
39 perlxs
40 perlxstut
b87d9527 41 perlclib
a1a5a9c8 42 warnings
678f21a2 43 perlapio
31b83e34 44 perlcall
0f292d69 45 perlfilter
6a6f8717 46 perlmroapi
1a2a04a3 47 config.h
7b1f0a98 48 );
1a2a04a3
KW
49sub name_in_pod($) {
50 my $name = shift;
51 return "F<$name>" if $name =~ /\./;
52 return "L<$name>";
53}
54my $other_places_api = join " ", map { name_in_pod($_) } sort @specialized_docs, 'perlintern';
55my $other_places_intern = join " ", map { name_in_pod($_) } sort @specialized_docs, 'perlapi';
7b1f0a98 56
1a2a04a3 57@specialized_docs = map { name_in_pod($_) } sort @specialized_docs;
b87d9527
KW
58$specialized_docs[-1] =~ s/^/and /;
59my $specialized_docs = join ", ", @specialized_docs;
60
94bdecf9 61#
346f75ff 62# See database of global and static function prototypes in embed.fnc
94bdecf9
JH
63# This is used to generate prototype headers under various configurations,
64# export symbols lists for different platforms, and macros to provide an
65# implicit interpreter context argument.
66#
67
6a235718 68my %docs;
df6bd76f 69my %seen;
5ce57792 70my %funcflags;
5ce57792 71my %missing;
94bdecf9
JH
72
73my $curheader = "Unknown section";
74
75sub autodoc ($$) { # parse a file and extract documentation info
76 my($fh,$file) = @_;
151c3fe5 77 my($in, $doc, $line, $header_doc);
f554dfc5
MH
78
79 # Count lines easier
80 my $get_next_line = sub { $line++; return <$fh> };
81
94bdecf9 82FUNC:
f554dfc5 83 while (defined($in = $get_next_line->())) {
94bdecf9
JH
84 if ($in=~ /^=head1 (.*)/) {
85 $curheader = $1;
151c3fe5 86
f554dfc5 87 # If the next non-space line begins with a word char, then it is
1c82f4a4 88 # the start of heading-level documentation.
20046047 89 if (defined($doc = $get_next_line->())) {
f554dfc5
MH
90 # Skip over empty lines
91 while ($doc =~ /^\s+$/) {
92 if (! defined($doc = $get_next_line->())) {
93 next FUNC;
94 }
95 }
96
151c3fe5
KW
97 if ($doc !~ /^\w/) {
98 $in = $doc;
99 redo FUNC;
100 }
101 $header_doc = $doc;
151c3fe5
KW
102
103 # Continue getting the heading-level documentation until read
104 # in any pod directive (or as a fail-safe, find a closing
105 # comment to this pod in a C language file
106HDR_DOC:
f554dfc5 107 while (defined($doc = $get_next_line->())) {
151c3fe5
KW
108 if ($doc =~ /^=\w/) {
109 $in = $doc;
110 redo FUNC;
111 }
151c3fe5
KW
112
113 if ($doc =~ m:^\s*\*/$:) {
114 warn "=cut missing? $file:$line:$doc";;
115 last HDR_DOC;
116 }
117 $header_doc .= $doc;
118 }
119 }
94bdecf9
JH
120 next FUNC;
121 }
df6bd76f
KW
122
123 # Parentheses are used to accept anything that looks like 'for
124 # apidoc', and later verify that things are the actual correct syntax.
125 my $apidoc_re = qr/^(\s*)(=?)(\s*)for(\s*)apidoc(\s*)(.*?)\s*\n/;
126
127 if ($in =~ /^=for comment/) {
128 $in = $get_next_line->();
129 if ($in =~ /skip apidoc/) { # Skips the next apidoc-like line
130 while (defined($in = $get_next_line->())) {
131 last if $in =~ $apidoc_re;
132 }
133 }
134 next FUNC;
135 }
136
137 if ($in =~ $apidoc_re) {
138 my $is_in_proper_form = length $1 == 0
139 && length $2 > 0
140 && length $3 == 0
141 && length $4 > 0
142 && length $5 > 0
143 && length $6 > 0;
144 my $proto_in_file = $6;
20046047
KE
145 my $proto = $proto_in_file;
146 $proto = "||$proto" unless $proto =~ /\|/;
147 my($flags, $ret, $name, @args) = split /\s*\|\s*/, $proto;
df6bd76f 148 $name && $is_in_proper_form or die <<EOS;
256dda50
TC
149Bad apidoc at $file line $.:
150 $in
151Expected:
152 =for apidoc flags|returntype|name|arg|arg|...
153 =for apidoc flags|returntype|name
154 =for apidoc name
155EOS
3dbfa774
KW
156 die "flag $1 is not legal (for function $name (from $file))"
157 if $flags =~ / ( [^AabCDdEefhiMmNnTOoPpRrSsUuWXx] ) /x;
6523e108
KW
158 next FUNC if $flags =~ /h/;
159
3dbfa774 160 die "'u' flag must also have 'm' flag' for $name" if $flags =~ /u/ && $flags !~ /m/;
0a60f600
KW
161 warn ("'$name' not \\w+ in '$proto_in_file' in $file")
162 if $flags !~ /N/ && $name !~ / ^ [_[:alpha:]] \w* $ /x;
df6bd76f
KW
163
164 if (exists $seen{$name}) {
165 die ("'$name' in $file was already documented in $seen{$name}");
166 }
167 else {
168 $seen{$name} = $file;
169 }
170
20046047 171 my $docs = "";
94bdecf9 172DOC:
20046047 173 while (defined($doc = $get_next_line->())) {
72d4186d
KW
174
175 # Other pod commands are considered part of the current
176 # function's docs, so can have lists, etc.
177 last DOC if $doc =~ /^=(cut|for\s+apidoc|head)/;
20046047
KE
178 if ($doc =~ m:^\*/$:) {
179 warn "=cut missing? $file:$line:$doc";;
180 last DOC;
72d4186d 181 }
20046047
KE
182 $docs .= $doc;
183 }
184 $docs = "\n$docs" if $docs and $docs !~ /^\n/;
5ce57792 185
20046047 186 # If the entry is also in embed.fnc, it should be defined
8902d554 187 # completely there, but not here
20046047
KE
188 my $embed_docref = delete $funcflags{$name};
189 if ($embed_docref and %$embed_docref) {
8902d554
KW
190 warn "embed.fnc entry overrides redundant information in"
191 . " '$proto_in_file' in $file" if $flags || $ret || @args;
192 $flags = $embed_docref->{'flags'};
5514c4f1
KW
193 warn "embed.fnc entry '$name' missing 'd' flag"
194 unless $flags =~ /d/;
6523e108 195 next FUNC if $flags =~ /h/;
8902d554 196 $ret = $embed_docref->{'retval'};
20046047 197 @args = @{$embed_docref->{args}};
5514c4f1
KW
198 } elsif ($flags !~ /m/) { # Not in embed.fnc, is missing if not a
199 # macro
20046047
KE
200 $missing{$name} = $file;
201 }
5ce57792 202
8902d554 203 my $inline_where = $flags =~ /A/ ? 'api' : 'guts';
5ce57792 204
20046047 205 if (exists $docs{$inline_where}{$curheader}{$name}) {
7a6610ca
DM
206 warn "$0: duplicate API entry for '$name' in $inline_where/$curheader\n";
207 next;
208 }
20046047
KE
209 $docs{$inline_where}{$curheader}{$name}
210 = [$flags, $docs, $ret, $file, @args];
5ce57792 211
151c3fe5
KW
212 # Create a special entry with an empty-string name for the
213 # heading-level documentation.
20046047 214 if (defined $header_doc) {
151c3fe5
KW
215 $docs{$inline_where}{$curheader}{""} = $header_doc;
216 undef $header_doc;
217 }
218
20046047
KE
219 if (defined $doc) {
220 if ($doc =~ /^=(?:for|head)/) {
221 $in = $doc;
222 redo FUNC;
223 }
224 } else {
225 warn "$file:$line:$in";
226 }
227 }
94bdecf9
JH
228 }
229}
230
231sub docout ($$$) { # output the docs for one function
232 my($fh, $name, $docref) = @_;
233 my($flags, $docs, $ret, $file, @args) = @$docref;
d8c40edc 234 $name =~ s/\s*$//;
94bdecf9 235
d4e99c76
KW
236 if ($flags =~ /D/) {
237 $docs = "\n\nDEPRECATED! It is planned to remove this function from a
238future release of Perl. Do not use it for new code; remove it from
239existing code.\n\n$docs";
240 }
241 else {
58a428bb
KW
242 $docs = "\n\nNOTE: this function is experimental and may change or be
243removed without notice.\n\n$docs" if $flags =~ /x/;
d4e99c76 244 }
54c193ae
KW
245
246 # Is Perl_, but no #define foo # Perl_foo
247 my $p = $flags =~ /p/ && $flags =~ /o/ && $flags !~ /M/;
248
94bdecf9 249 $docs .= "NOTE: the perl_ form of this function is deprecated.\n\n"
20046047 250 if $flags =~ /O/;
54c193ae
KW
251 if ($p) {
252 $docs .= "NOTE: this function must be explicitly called as Perl_$name";
d7cc3209 253 $docs .= " with an aTHX_ parameter" if $flags !~ /T/;
54c193ae
KW
254 $docs .= ".\n\n"
255 }
94bdecf9 256
d8c40edc 257 print $fh "=item $name\nX<$name>\n$docs";
94bdecf9
JH
258
259 if ($flags =~ /U/) { # no usage
8b5ff177 260 warn("U and s flags are incompatible") if $flags =~ /s/;
20046047 261 # nothing
05ca4832 262 } else {
8b5ff177 263 if ($flags =~ /n/) { # no args
1fcde0e9
KW
264 warn("n flag without m") unless $flags =~ /m/;
265 warn("n flag but apparently has args") if @args;
1ded1f42
KW
266 print $fh "\t$ret\t$name";
267 } else { # full usage
1ded1f42
KW
268 my $n = "Perl_"x$p . $name;
269 my $large_ret = length $ret > 7;
270 my $indent_size = 7+8 # nroff: 7 under =head + 8 under =item
271 +8+($large_ret ? 1 + length $ret : 8)
272 +length($n) + 1;
273 my $indent;
274 print $fh "\t$ret" . ($large_ret ? ' ' : "\t") . "$n(";
275 my $long_args;
276 for (@args) {
277 if ($indent_size + 2 + length > 79) {
278 $long_args=1;
279 $indent_size -= length($n) - 3;
280 last;
281 }
282 }
283 my $args = '';
2f4e6339 284 if ($flags !~ /T/ && ($p || ($flags =~ /m/ && $name =~ /^Perl_/))) {
1ded1f42
KW
285 $args = @args ? "pTHX_ " : "pTHX";
286 if ($long_args) { print $fh $args; $args = '' }
287 }
288 $long_args and print $fh "\n";
289 my $first = !$long_args;
290 while () {
291 if (!@args or
292 length $args
293 && $indent_size + 3 + length($args[0]) + length $args > 79
294 ) {
295 print $fh
296 $first ? '' : (
297 $indent //=
298 "\t".($large_ret ? " " x (1+length $ret) : "\t")
299 ." "x($long_args ? 4 : 1 + length $n)
300 ),
301 $args, (","x($args ne 'pTHX_ ') . "\n")x!!@args;
302 $args = $first = '';
303 }
304 @args or last;
305 $args .= ", "x!!(length $args && $args ne 'pTHX_ ')
306 . shift @args;
307 }
308 if ($long_args) { print $fh "\n", substr $indent, 0, -4 }
309 print $fh ")";
310 }
8b5ff177 311 print $fh ";" if $flags =~ /s/; # semicolon "dTHR;"
1ded1f42 312 print $fh "\n\n";
94bdecf9
JH
313 }
314 print $fh "=for hackers\nFound in file $file\n\n";
315}
316
f83c6033
KW
317sub sort_helper {
318 # Do a case-insensitive dictionary sort, with only alphabetics
319 # significant, falling back to using everything for determinancy
1354d57e 320 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
f83c6033
KW
321 || uc($a) cmp uc($b)
322 || $a cmp $b;
323}
324
7b73ff98 325sub output {
5a0155e6 326 my ($podname, $header, $dochash, $missing, $footer) = @_;
6a4c4cd4
DM
327 #
328 # strip leading '|' from each line which had been used to hide
329 # pod from pod checkers.
330 s/^\|//gm for $header, $footer;
331
7882b24a 332 my $fh = open_new("pod/$podname.pod", undef,
20046047 333 {by => "$0 extracting documentation",
f1f44974 334 from => 'the C source files'}, 1);
e0492643 335
7882b24a 336 print $fh $header;
e0492643 337
7b73ff98 338 my $key;
f83c6033 339 for $key (sort sort_helper keys %$dochash) {
20046047
KE
340 my $section = $dochash->{$key};
341 print $fh "\n=head1 $key\n\n";
151c3fe5
KW
342
343 # Output any heading-level documentation and delete so won't get in
344 # the way later
345 if (exists $section->{""}) {
346 print $fh $section->{""} . "\n";
347 delete $section->{""};
348 }
20046047 349 print $fh "=over 8\n\n";
151c3fe5 350
20046047
KE
351 for my $key (sort sort_helper keys %$section) {
352 docout($fh, $key, $section->{$key});
353 }
354 print $fh "\n=back\n";
7b73ff98
NC
355 }
356
5a0155e6 357 if (@$missing) {
a23e6e20 358 print $fh "\n=head1 Undocumented functions\n\n";
2616800a 359 print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
474d0ac8 360The following functions have been flagged as part of the public API,
72d33970 361but are currently undocumented. Use them at your own risk, as the
ba4591a5
KW
362interfaces are subject to change. Functions that are not listed in this
363document are not intended for public use, and should NOT be used under any
364circumstances.
365
5a4fed09
KW
366If you feel you need to use one of these functions, first send email to
367L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. It may be
368that there is a good reason for the function not being documented, and it
369should be removed from this list; or it may just be that no one has gotten
370around to documenting it. In the latter case, you will be asked to submit a
371patch to document the function. Once your patch is accepted, it will indicate
372that the interface is stable (unless it is explicitly marked otherwise) and
373usable by you.
cf5f2f8f 374_EOB_
2616800a
FC
375The following functions are currently undocumented. If you use one of
376them, you may wish to consider creating and submitting documentation for
377it.
2616800a 378_EOB_
6a4c4cd4
DM
379 print $fh "\n=over\n\n";
380
cf5f2f8f
KW
381 for my $missing (sort @$missing) {
382 print $fh "=item $missing\nX<$missing>\n\n";
5a0155e6 383 }
cf5f2f8f
KW
384 print $fh "=back\n\n";
385}
7882b24a 386 print $fh $footer, "=cut\n";
5a0155e6 387
7882b24a 388 read_only_bottom_close_and_rename($fh);
cd093254
MM
389}
390
e8e591c9
NC
391foreach (@{(setup_embed())[0]}) {
392 next if @$_ < 2;
393 my ($flags, $retval, $func, @args) = @$_;
394 s/\b(?:NN|NULLOK)\b\s+//g for @args;
bc350081 395
5ce57792 396 $funcflags{$func} = {
20046047
KE
397 flags => $flags,
398 retval => $retval,
399 args => \@args,
400 };
5ce57792
NC
401}
402
5ce57792
NC
403# glob() picks up docs from extra .c or .h files that may be in unclean
404# development trees.
741c0772
NC
405open my $fh, '<', 'MANIFEST'
406 or die "Can't open MANIFEST: $!";
407while (my $line = <$fh>) {
b87d9527 408 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
5ce57792 409
1ae6ead9 410 open F, '<', $file or die "Cannot open $file for docs: $!\n";
5ce57792
NC
411 $curheader = "Functions in file $file\n";
412 autodoc(\*F,$file);
413 close F or die "Error closing $file: $!\n";
414}
741c0772 415close $fh or die "Error whilst reading MANIFEST: $!";
5ce57792
NC
416
417for (sort keys %funcflags) {
418 next unless $funcflags{$_}{flags} =~ /d/;
6523e108 419 next if $funcflags{$_}{flags} =~ /h/;
5ce57792 420 warn "no docs for $_\n"
bc350081 421}
94bdecf9 422
5ce57792 423foreach (sort keys %missing) {
5ce57792 424 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
94bdecf9
JH
425}
426
5ce57792
NC
427# walk table providing an array of components in each line to
428# subroutine, printing the result
429
ff5af78d
KW
430# List of funcs in the public API that aren't also marked as core-only,
431# experimental nor deprecated.
b87d9527
KW
432my @missing_api = grep $funcflags{$_}{flags} =~ /A/
433 && $funcflags{$_}{flags} !~ /[xD]/
434 && !$docs{api}{$_}, keys %funcflags;
435output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
6a4c4cd4
DM
436|=encoding UTF-8
437|
438|=head1 NAME
439|
440|perlapi - autogenerated documentation for the perl public API
441|
442|=head1 DESCRIPTION
443|X<Perl API> X<API> X<api>
444|
b87d9527
KW
445|This file contains most of the documentation of the perl public API, as
446|generated by F<embed.pl>. Specifically, it is a listing of functions,
447|macros, flags, and variables that may be used by extension writers. Some
448|specialized items are instead documented in $specialized_docs.
449|
450|L<At the end|/Undocumented functions> is a list of functions which have yet
451|to be documented. Patches welcome! The interfaces of these are subject to
452|change without notice.
453|
454|Anything not listed here is not part of the public API, and should not be
455|used by extension writers at all. For these reasons, blindly using functions
456|listed in proto.h is to be avoided when writing extensions.
6a4c4cd4
DM
457|
458|In Perl, unlike C, a string of characters may generally contain embedded
459|C<NUL> characters. Sometimes in the documentation a Perl string is referred
460|to as a "buffer" to distinguish it from a C string, but sometimes they are
461|both just referred to as strings.
462|
463|Note that all Perl API global variables must be referenced with the C<PL_>
464|prefix. Again, those not listed here are not to be used by extension writers,
465|and can be changed or removed without notice; same with macros.
466|Some macros are provided for compatibility with the older,
467|unadorned names, but this support may be disabled in a future release.
468|
469|Perl was originally written to handle US-ASCII only (that is characters
470|whose ordinal numbers are in the range 0 - 127).
471|And documentation and comments may still use the term ASCII, when
472|sometimes in fact the entire range from 0 - 255 is meant.
473|
474|The non-ASCII characters below 256 can have various meanings, depending on
475|various things. (See, most notably, L<perllocale>.) But usually the whole
476|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
477|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
478|"Latin1" as referring just to the characters in the range 128 through 255, or
479|somethimes from 160 through 255.
480|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
481|
482|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
483|L<perlebcdic>). Most of the documentation (and even comments in the code)
484|ignore the EBCDIC possibility.
485|For almost all purposes the differences are transparent.
486|As an example, under EBCDIC,
487|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
488|whenever this documentation refers to C<utf8>
489|(and variants of that name, including in function names),
490|it also (essentially transparently) means C<UTF-EBCDIC>.
491|But the ordinals of characters differ between ASCII, EBCDIC, and
492|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
493|number of bytes than in UTF-8.
494|
495|The listing below is alphabetical, case insensitive.
496|
94bdecf9 497_EOB_
6a4c4cd4
DM
498|
499|=head1 AUTHORS
500|
501|Until May 1997, this document was maintained by Jeff Okamoto
7b1f0a98 502|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
6a4c4cd4
DM
503|
504|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
505|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
506|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
507|Stephen McCamant, and Gurusamy Sarathy.
508|
7b1f0a98 509|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
6a4c4cd4
DM
510|
511|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
512|
513|=head1 SEE ALSO
514|
7b1f0a98 515$other_places_api
94bdecf9
JH
516_EOE_
517
79fc8511
FC
518# List of non-static internal functions
519my @missing_guts =
9f589e47 520 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
5a0155e6 521
7b1f0a98 522output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
6a4c4cd4
DM
523|=head1 NAME
524|
525|perlintern - autogenerated documentation of purely B<internal>
20046047 526|Perl functions
6a4c4cd4
DM
527|
528|=head1 DESCRIPTION
529|X<internal Perl functions> X<interpreter functions>
530|
531|This file is the autogenerated documentation of functions in the
532|Perl interpreter that are documented using Perl's internal documentation
533|format but are not marked as part of the Perl API. In other words,
534|B<they are not for use in extensions>!
535|
7b1f0a98 536_EOB_
6a4c4cd4
DM
537|
538|=head1 AUTHORS
539|
540|The autodocumentation system was originally added to the Perl core by
541|Benjamin Stuhl. Documentation is by whoever was kind enough to
542|document their functions.
543|
544|=head1 SEE ALSO
545|
7b1f0a98
KW
546$other_places_intern
547_EOE_