This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
document the unary minus exception for barewords
[perl5.git] / autodoc.pl
CommitLineData
94bdecf9 1#!/usr/bin/perl -w
6294c161
DM
2#
3# Unconditionally regenerate:
4#
5# pod/perlintern.pod
6# pod/perlapi.pod
7#
8# from information stored in
9#
10# embed.fnc
11# plus all the .c and .h files listed in MANIFEST
12#
13# Has an optional arg, which is the directory to chdir to before reading
14# MANIFEST and *.[ch].
15#
52a9d53b 16# This script is invoked as part of 'make all'
151c3fe5 17#
f554dfc5
MH
18# '=head1' are the only headings looked for. If the first non-blank line after
19# the heading begins with a word character, it is considered to be the first
20# line of documentation that applies to the heading itself. That is, it is
21# output immediately after the heading, before the first function, and not
22# indented. The next input line that is a pod directive terminates this
23# heading-level documentation.
94bdecf9 24
1fcde0e9
KW
25# The meanings of the flags fields in embed.fnc and the source code is
26# documented at the top of embed.fnc.
27
56a0c332 28use strict;
a64c954a 29
7882b24a
NC
30if (@ARGV) {
31 my $workdir = shift;
32 chdir $workdir
33 or die "Couldn't chdir to '$workdir': $!";
34}
3d7c117d
MB
35require './regen/regen_lib.pl';
36require './regen/embed_lib.pl';
7882b24a 37
7b1f0a98
KW
38my @specialized_docs = sort qw( perlguts
39 perlxs
40 perlxstut
b87d9527 41 perlclib
a1a5a9c8 42 warnings
678f21a2 43 perlapio
31b83e34 44 perlcall
0f292d69 45 perlfilter
6a6f8717 46 perlmroapi
1a2a04a3 47 config.h
7b1f0a98 48 );
1a2a04a3
KW
49sub name_in_pod($) {
50 my $name = shift;
51 return "F<$name>" if $name =~ /\./;
52 return "L<$name>";
53}
54my $other_places_api = join " ", map { name_in_pod($_) } sort @specialized_docs, 'perlintern';
55my $other_places_intern = join " ", map { name_in_pod($_) } sort @specialized_docs, 'perlapi';
7b1f0a98 56
1a2a04a3 57@specialized_docs = map { name_in_pod($_) } sort @specialized_docs;
b87d9527
KW
58$specialized_docs[-1] =~ s/^/and /;
59my $specialized_docs = join ", ", @specialized_docs;
60
94bdecf9 61#
346f75ff 62# See database of global and static function prototypes in embed.fnc
94bdecf9
JH
63# This is used to generate prototype headers under various configurations,
64# export symbols lists for different platforms, and macros to provide an
65# implicit interpreter context argument.
66#
67
6a235718 68my %docs;
df6bd76f 69my %seen;
5ce57792 70my %funcflags;
5ce57792 71my %missing;
94bdecf9
JH
72
73my $curheader = "Unknown section";
74
75sub autodoc ($$) { # parse a file and extract documentation info
76 my($fh,$file) = @_;
151c3fe5 77 my($in, $doc, $line, $header_doc);
f554dfc5
MH
78
79 # Count lines easier
80 my $get_next_line = sub { $line++; return <$fh> };
81
94bdecf9 82FUNC:
f554dfc5 83 while (defined($in = $get_next_line->())) {
94bdecf9
JH
84 if ($in=~ /^=head1 (.*)/) {
85 $curheader = $1;
151c3fe5 86
f554dfc5 87 # If the next non-space line begins with a word char, then it is
1c82f4a4 88 # the start of heading-level documentation.
20046047 89 if (defined($doc = $get_next_line->())) {
f554dfc5
MH
90 # Skip over empty lines
91 while ($doc =~ /^\s+$/) {
92 if (! defined($doc = $get_next_line->())) {
93 next FUNC;
94 }
95 }
96
151c3fe5
KW
97 if ($doc !~ /^\w/) {
98 $in = $doc;
99 redo FUNC;
100 }
101 $header_doc = $doc;
151c3fe5
KW
102
103 # Continue getting the heading-level documentation until read
104 # in any pod directive (or as a fail-safe, find a closing
105 # comment to this pod in a C language file
106HDR_DOC:
f554dfc5 107 while (defined($doc = $get_next_line->())) {
151c3fe5
KW
108 if ($doc =~ /^=\w/) {
109 $in = $doc;
110 redo FUNC;
111 }
151c3fe5
KW
112
113 if ($doc =~ m:^\s*\*/$:) {
114 warn "=cut missing? $file:$line:$doc";;
115 last HDR_DOC;
116 }
117 $header_doc .= $doc;
118 }
119 }
94bdecf9
JH
120 next FUNC;
121 }
df6bd76f
KW
122
123 # Parentheses are used to accept anything that looks like 'for
124 # apidoc', and later verify that things are the actual correct syntax.
125 my $apidoc_re = qr/^(\s*)(=?)(\s*)for(\s*)apidoc(\s*)(.*?)\s*\n/;
126
127 if ($in =~ /^=for comment/) {
128 $in = $get_next_line->();
129 if ($in =~ /skip apidoc/) { # Skips the next apidoc-like line
130 while (defined($in = $get_next_line->())) {
131 last if $in =~ $apidoc_re;
132 }
133 }
134 next FUNC;
135 }
136
137 if ($in =~ $apidoc_re) {
138 my $is_in_proper_form = length $1 == 0
139 && length $2 > 0
140 && length $3 == 0
141 && length $4 > 0
142 && length $5 > 0
143 && length $6 > 0;
144 my $proto_in_file = $6;
20046047
KE
145 my $proto = $proto_in_file;
146 $proto = "||$proto" unless $proto =~ /\|/;
147 my($flags, $ret, $name, @args) = split /\s*\|\s*/, $proto;
df6bd76f 148 $name && $is_in_proper_form or die <<EOS;
256dda50
TC
149Bad apidoc at $file line $.:
150 $in
151Expected:
152 =for apidoc flags|returntype|name|arg|arg|...
153 =for apidoc flags|returntype|name
154 =for apidoc name
155EOS
3dbfa774
KW
156 die "flag $1 is not legal (for function $name (from $file))"
157 if $flags =~ / ( [^AabCDdEefhiMmNnTOoPpRrSsUuWXx] ) /x;
6523e108
KW
158 next FUNC if $flags =~ /h/;
159
3dbfa774 160 die "'u' flag must also have 'm' flag' for $name" if $flags =~ /u/ && $flags !~ /m/;
0a60f600
KW
161 warn ("'$name' not \\w+ in '$proto_in_file' in $file")
162 if $flags !~ /N/ && $name !~ / ^ [_[:alpha:]] \w* $ /x;
df6bd76f
KW
163
164 if (exists $seen{$name}) {
165 die ("'$name' in $file was already documented in $seen{$name}");
166 }
167 else {
168 $seen{$name} = $file;
169 }
170
20046047 171 my $docs = "";
94bdecf9 172DOC:
20046047 173 while (defined($doc = $get_next_line->())) {
72d4186d
KW
174
175 # Other pod commands are considered part of the current
176 # function's docs, so can have lists, etc.
177 last DOC if $doc =~ /^=(cut|for\s+apidoc|head)/;
20046047
KE
178 if ($doc =~ m:^\*/$:) {
179 warn "=cut missing? $file:$line:$doc";;
180 last DOC;
72d4186d 181 }
20046047
KE
182 $docs .= $doc;
183 }
184 $docs = "\n$docs" if $docs and $docs !~ /^\n/;
5ce57792 185
20046047 186 # If the entry is also in embed.fnc, it should be defined
8902d554 187 # completely there, but not here
20046047
KE
188 my $embed_docref = delete $funcflags{$name};
189 if ($embed_docref and %$embed_docref) {
8902d554
KW
190 warn "embed.fnc entry overrides redundant information in"
191 . " '$proto_in_file' in $file" if $flags || $ret || @args;
192 $flags = $embed_docref->{'flags'};
5514c4f1
KW
193 warn "embed.fnc entry '$name' missing 'd' flag"
194 unless $flags =~ /d/;
6523e108 195 next FUNC if $flags =~ /h/;
8902d554 196 $ret = $embed_docref->{'retval'};
20046047 197 @args = @{$embed_docref->{args}};
5514c4f1
KW
198 } elsif ($flags !~ /m/) { # Not in embed.fnc, is missing if not a
199 # macro
20046047
KE
200 $missing{$name} = $file;
201 }
5ce57792 202
8902d554 203 my $inline_where = $flags =~ /A/ ? 'api' : 'guts';
5ce57792 204
20046047 205 if (exists $docs{$inline_where}{$curheader}{$name}) {
7a6610ca
DM
206 warn "$0: duplicate API entry for '$name' in $inline_where/$curheader\n";
207 next;
208 }
20046047
KE
209 $docs{$inline_where}{$curheader}{$name}
210 = [$flags, $docs, $ret, $file, @args];
5ce57792 211
151c3fe5
KW
212 # Create a special entry with an empty-string name for the
213 # heading-level documentation.
20046047 214 if (defined $header_doc) {
151c3fe5
KW
215 $docs{$inline_where}{$curheader}{""} = $header_doc;
216 undef $header_doc;
217 }
218
20046047
KE
219 if (defined $doc) {
220 if ($doc =~ /^=(?:for|head)/) {
221 $in = $doc;
222 redo FUNC;
223 }
224 } else {
225 warn "$file:$line:$in";
226 }
227 }
94bdecf9
JH
228 }
229}
230
231sub docout ($$$) { # output the docs for one function
232 my($fh, $name, $docref) = @_;
233 my($flags, $docs, $ret, $file, @args) = @$docref;
d8c40edc 234 $name =~ s/\s*$//;
94bdecf9 235
d4e99c76 236 if ($flags =~ /D/) {
9164a823
KW
237 my $function = $flags =~ /n/ ? 'definition' : 'function';
238 $docs = "\n\nDEPRECATED! It is planned to remove this $function from a
d4e99c76
KW
239future release of Perl. Do not use it for new code; remove it from
240existing code.\n\n$docs";
241 }
242 else {
58a428bb
KW
243 $docs = "\n\nNOTE: this function is experimental and may change or be
244removed without notice.\n\n$docs" if $flags =~ /x/;
d4e99c76 245 }
54c193ae
KW
246
247 # Is Perl_, but no #define foo # Perl_foo
7d4ca04c
KW
248 my $p = (($flags =~ /p/ && $flags =~ /o/ && $flags !~ /M/)
249 || ($flags =~ /f/ && $flags !~ /T/)); # Can't handle threaded varargs
54c193ae 250
94bdecf9 251 $docs .= "NOTE: the perl_ form of this function is deprecated.\n\n"
20046047 252 if $flags =~ /O/;
54c193ae
KW
253 if ($p) {
254 $docs .= "NOTE: this function must be explicitly called as Perl_$name";
d7cc3209 255 $docs .= " with an aTHX_ parameter" if $flags !~ /T/;
54c193ae
KW
256 $docs .= ".\n\n"
257 }
94bdecf9 258
d8c40edc 259 print $fh "=item $name\nX<$name>\n$docs";
94bdecf9
JH
260
261 if ($flags =~ /U/) { # no usage
8b5ff177 262 warn("U and s flags are incompatible") if $flags =~ /s/;
20046047 263 # nothing
05ca4832 264 } else {
8b5ff177 265 if ($flags =~ /n/) { # no args
1fcde0e9
KW
266 warn("n flag without m") unless $flags =~ /m/;
267 warn("n flag but apparently has args") if @args;
1ded1f42
KW
268 print $fh "\t$ret\t$name";
269 } else { # full usage
1ded1f42
KW
270 my $n = "Perl_"x$p . $name;
271 my $large_ret = length $ret > 7;
272 my $indent_size = 7+8 # nroff: 7 under =head + 8 under =item
273 +8+($large_ret ? 1 + length $ret : 8)
274 +length($n) + 1;
275 my $indent;
276 print $fh "\t$ret" . ($large_ret ? ' ' : "\t") . "$n(";
277 my $long_args;
278 for (@args) {
279 if ($indent_size + 2 + length > 79) {
280 $long_args=1;
281 $indent_size -= length($n) - 3;
282 last;
283 }
284 }
285 my $args = '';
2f4e6339 286 if ($flags !~ /T/ && ($p || ($flags =~ /m/ && $name =~ /^Perl_/))) {
1ded1f42
KW
287 $args = @args ? "pTHX_ " : "pTHX";
288 if ($long_args) { print $fh $args; $args = '' }
289 }
290 $long_args and print $fh "\n";
291 my $first = !$long_args;
292 while () {
293 if (!@args or
294 length $args
295 && $indent_size + 3 + length($args[0]) + length $args > 79
296 ) {
297 print $fh
298 $first ? '' : (
299 $indent //=
300 "\t".($large_ret ? " " x (1+length $ret) : "\t")
301 ." "x($long_args ? 4 : 1 + length $n)
302 ),
303 $args, (","x($args ne 'pTHX_ ') . "\n")x!!@args;
304 $args = $first = '';
305 }
306 @args or last;
307 $args .= ", "x!!(length $args && $args ne 'pTHX_ ')
308 . shift @args;
309 }
310 if ($long_args) { print $fh "\n", substr $indent, 0, -4 }
311 print $fh ")";
312 }
8b5ff177 313 print $fh ";" if $flags =~ /s/; # semicolon "dTHR;"
1ded1f42 314 print $fh "\n\n";
94bdecf9
JH
315 }
316 print $fh "=for hackers\nFound in file $file\n\n";
317}
318
f83c6033
KW
319sub sort_helper {
320 # Do a case-insensitive dictionary sort, with only alphabetics
321 # significant, falling back to using everything for determinancy
1354d57e 322 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
f83c6033
KW
323 || uc($a) cmp uc($b)
324 || $a cmp $b;
325}
326
7b73ff98 327sub output {
5a0155e6 328 my ($podname, $header, $dochash, $missing, $footer) = @_;
6a4c4cd4
DM
329 #
330 # strip leading '|' from each line which had been used to hide
331 # pod from pod checkers.
332 s/^\|//gm for $header, $footer;
333
7882b24a 334 my $fh = open_new("pod/$podname.pod", undef,
20046047 335 {by => "$0 extracting documentation",
f1f44974 336 from => 'the C source files'}, 1);
e0492643 337
7882b24a 338 print $fh $header;
e0492643 339
7b73ff98 340 my $key;
f83c6033 341 for $key (sort sort_helper keys %$dochash) {
20046047
KE
342 my $section = $dochash->{$key};
343 print $fh "\n=head1 $key\n\n";
151c3fe5
KW
344
345 # Output any heading-level documentation and delete so won't get in
346 # the way later
347 if (exists $section->{""}) {
348 print $fh $section->{""} . "\n";
349 delete $section->{""};
350 }
20046047 351 print $fh "=over 8\n\n";
151c3fe5 352
20046047
KE
353 for my $key (sort sort_helper keys %$section) {
354 docout($fh, $key, $section->{$key});
355 }
356 print $fh "\n=back\n";
7b73ff98
NC
357 }
358
5a0155e6 359 if (@$missing) {
a23e6e20 360 print $fh "\n=head1 Undocumented functions\n\n";
2616800a 361 print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
474d0ac8 362The following functions have been flagged as part of the public API,
72d33970 363but are currently undocumented. Use them at your own risk, as the
ba4591a5
KW
364interfaces are subject to change. Functions that are not listed in this
365document are not intended for public use, and should NOT be used under any
366circumstances.
367
5a4fed09
KW
368If you feel you need to use one of these functions, first send email to
369L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. It may be
370that there is a good reason for the function not being documented, and it
371should be removed from this list; or it may just be that no one has gotten
372around to documenting it. In the latter case, you will be asked to submit a
373patch to document the function. Once your patch is accepted, it will indicate
374that the interface is stable (unless it is explicitly marked otherwise) and
375usable by you.
cf5f2f8f 376_EOB_
2616800a
FC
377The following functions are currently undocumented. If you use one of
378them, you may wish to consider creating and submitting documentation for
379it.
2616800a 380_EOB_
6a4c4cd4
DM
381 print $fh "\n=over\n\n";
382
cf5f2f8f
KW
383 for my $missing (sort @$missing) {
384 print $fh "=item $missing\nX<$missing>\n\n";
5a0155e6 385 }
cf5f2f8f
KW
386 print $fh "=back\n\n";
387}
7882b24a 388 print $fh $footer, "=cut\n";
5a0155e6 389
7882b24a 390 read_only_bottom_close_and_rename($fh);
cd093254
MM
391}
392
e8e591c9
NC
393foreach (@{(setup_embed())[0]}) {
394 next if @$_ < 2;
395 my ($flags, $retval, $func, @args) = @$_;
396 s/\b(?:NN|NULLOK)\b\s+//g for @args;
bc350081 397
5ce57792 398 $funcflags{$func} = {
20046047
KE
399 flags => $flags,
400 retval => $retval,
401 args => \@args,
402 };
5ce57792
NC
403}
404
5ce57792
NC
405# glob() picks up docs from extra .c or .h files that may be in unclean
406# development trees.
741c0772
NC
407open my $fh, '<', 'MANIFEST'
408 or die "Can't open MANIFEST: $!";
409while (my $line = <$fh>) {
b87d9527 410 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
5ce57792 411
1ae6ead9 412 open F, '<', $file or die "Cannot open $file for docs: $!\n";
5ce57792
NC
413 $curheader = "Functions in file $file\n";
414 autodoc(\*F,$file);
415 close F or die "Error closing $file: $!\n";
416}
741c0772 417close $fh or die "Error whilst reading MANIFEST: $!";
5ce57792
NC
418
419for (sort keys %funcflags) {
420 next unless $funcflags{$_}{flags} =~ /d/;
6523e108 421 next if $funcflags{$_}{flags} =~ /h/;
5ce57792 422 warn "no docs for $_\n"
bc350081 423}
94bdecf9 424
5ce57792 425foreach (sort keys %missing) {
5ce57792 426 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
94bdecf9
JH
427}
428
5ce57792
NC
429# walk table providing an array of components in each line to
430# subroutine, printing the result
431
ff5af78d
KW
432# List of funcs in the public API that aren't also marked as core-only,
433# experimental nor deprecated.
b87d9527
KW
434my @missing_api = grep $funcflags{$_}{flags} =~ /A/
435 && $funcflags{$_}{flags} !~ /[xD]/
436 && !$docs{api}{$_}, keys %funcflags;
437output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
6a4c4cd4
DM
438|=encoding UTF-8
439|
440|=head1 NAME
441|
442|perlapi - autogenerated documentation for the perl public API
443|
444|=head1 DESCRIPTION
445|X<Perl API> X<API> X<api>
446|
b87d9527
KW
447|This file contains most of the documentation of the perl public API, as
448|generated by F<embed.pl>. Specifically, it is a listing of functions,
449|macros, flags, and variables that may be used by extension writers. Some
450|specialized items are instead documented in $specialized_docs.
451|
452|L<At the end|/Undocumented functions> is a list of functions which have yet
453|to be documented. Patches welcome! The interfaces of these are subject to
454|change without notice.
455|
456|Anything not listed here is not part of the public API, and should not be
457|used by extension writers at all. For these reasons, blindly using functions
458|listed in proto.h is to be avoided when writing extensions.
6a4c4cd4
DM
459|
460|In Perl, unlike C, a string of characters may generally contain embedded
461|C<NUL> characters. Sometimes in the documentation a Perl string is referred
462|to as a "buffer" to distinguish it from a C string, but sometimes they are
463|both just referred to as strings.
464|
465|Note that all Perl API global variables must be referenced with the C<PL_>
466|prefix. Again, those not listed here are not to be used by extension writers,
467|and can be changed or removed without notice; same with macros.
468|Some macros are provided for compatibility with the older,
469|unadorned names, but this support may be disabled in a future release.
470|
471|Perl was originally written to handle US-ASCII only (that is characters
472|whose ordinal numbers are in the range 0 - 127).
473|And documentation and comments may still use the term ASCII, when
474|sometimes in fact the entire range from 0 - 255 is meant.
475|
476|The non-ASCII characters below 256 can have various meanings, depending on
477|various things. (See, most notably, L<perllocale>.) But usually the whole
478|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
479|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
480|"Latin1" as referring just to the characters in the range 128 through 255, or
481|somethimes from 160 through 255.
482|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
483|
484|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
485|L<perlebcdic>). Most of the documentation (and even comments in the code)
486|ignore the EBCDIC possibility.
487|For almost all purposes the differences are transparent.
488|As an example, under EBCDIC,
489|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
490|whenever this documentation refers to C<utf8>
491|(and variants of that name, including in function names),
492|it also (essentially transparently) means C<UTF-EBCDIC>.
493|But the ordinals of characters differ between ASCII, EBCDIC, and
494|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
495|number of bytes than in UTF-8.
496|
497|The listing below is alphabetical, case insensitive.
498|
94bdecf9 499_EOB_
6a4c4cd4
DM
500|
501|=head1 AUTHORS
502|
503|Until May 1997, this document was maintained by Jeff Okamoto
7b1f0a98 504|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
6a4c4cd4
DM
505|
506|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
507|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
508|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
509|Stephen McCamant, and Gurusamy Sarathy.
510|
7b1f0a98 511|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
6a4c4cd4
DM
512|
513|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
514|
515|=head1 SEE ALSO
516|
7b1f0a98 517$other_places_api
94bdecf9
JH
518_EOE_
519
79fc8511
FC
520# List of non-static internal functions
521my @missing_guts =
9f589e47 522 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
5a0155e6 523
7b1f0a98 524output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
6a4c4cd4
DM
525|=head1 NAME
526|
527|perlintern - autogenerated documentation of purely B<internal>
20046047 528|Perl functions
6a4c4cd4
DM
529|
530|=head1 DESCRIPTION
531|X<internal Perl functions> X<interpreter functions>
532|
533|This file is the autogenerated documentation of functions in the
534|Perl interpreter that are documented using Perl's internal documentation
535|format but are not marked as part of the Perl API. In other words,
536|B<they are not for use in extensions>!
537|
7b1f0a98 538_EOB_
6a4c4cd4
DM
539|
540|=head1 AUTHORS
541|
542|The autodocumentation system was originally added to the Perl core by
543|Benjamin Stuhl. Documentation is by whoever was kind enough to
544|document their functions.
545|
546|=head1 SEE ALSO
547|
7b1f0a98
KW
548$other_places_intern
549_EOE_