This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: /[[:lower:]]/i should match the same as /\p{Lower}/i
[perl5.git] / autodoc.pl
... / ...
CommitLineData
1#!/usr/bin/perl -w
2#
3# Unconditionally regenerate:
4#
5# pod/perlintern.pod
6# pod/perlapi.pod
7#
8# from information stored in
9#
10# embed.fnc
11# plus all the .c and .h files listed in MANIFEST
12#
13# Has an optional arg, which is the directory to chdir to before reading
14# MANIFEST and *.[ch].
15#
16# This script is normally invoked as part of 'make all', but is also
17# called from from regen.pl.
18#
19# '=head1' are the only headings looked for. If the next line after the
20# heading begins with a word character, it is considered to be the first line
21# of documentation that applies to the heading itself. That is, it is output
22# immediately after the heading, before the first function, and not indented.
23# The next input line that is a pod directive terminates this heading-level
24# documentation.
25
26use strict;
27
28#
29# See database of global and static function prototypes in embed.fnc
30# This is used to generate prototype headers under various configurations,
31# export symbols lists for different platforms, and macros to provide an
32# implicit interpreter context argument.
33#
34
35my %docs;
36my %funcflags;
37my %macro = (
38 ax => 1,
39 items => 1,
40 ix => 1,
41 svtype => 1,
42 );
43my %missing;
44
45my $curheader = "Unknown section";
46
47sub autodoc ($$) { # parse a file and extract documentation info
48 my($fh,$file) = @_;
49 my($in, $doc, $line, $header_doc);
50FUNC:
51 while (defined($in = <$fh>)) {
52 if ($in =~ /^#\s*define\s+([A-Za-z_][A-Za-z_0-9]+)\(/ &&
53 ($file ne 'embed.h' || $file ne 'proto.h')) {
54 $macro{$1} = $file;
55 next FUNC;
56 }
57 if ($in=~ /^=head1 (.*)/) {
58 $curheader = $1;
59
60 # If the next line begins with a word char, then is the start of
61 # heading-level documentation.
62 if (defined($doc = <$fh>)) {
63 if ($doc !~ /^\w/) {
64 $in = $doc;
65 redo FUNC;
66 }
67 $header_doc = $doc;
68 $line++;
69
70 # Continue getting the heading-level documentation until read
71 # in any pod directive (or as a fail-safe, find a closing
72 # comment to this pod in a C language file
73HDR_DOC:
74 while (defined($doc = <$fh>)) {
75 if ($doc =~ /^=\w/) {
76 $in = $doc;
77 redo FUNC;
78 }
79 $line++;
80
81 if ($doc =~ m:^\s*\*/$:) {
82 warn "=cut missing? $file:$line:$doc";;
83 last HDR_DOC;
84 }
85 $header_doc .= $doc;
86 }
87 }
88 next FUNC;
89 }
90 $line++;
91 if ($in =~ /^=for\s+apidoc\s+(.*?)\s*\n/) {
92 my $proto = $1;
93 $proto = "||$proto" unless $proto =~ /\|/;
94 my($flags, $ret, $name, @args) = split /\|/, $proto;
95 my $docs = "";
96DOC:
97 while (defined($doc = <$fh>)) {
98 $line++;
99 last DOC if $doc =~ /^=\w+/;
100 if ($doc =~ m:^\*/$:) {
101 warn "=cut missing? $file:$line:$doc";;
102 last DOC;
103 }
104 $docs .= $doc;
105 }
106 $docs = "\n$docs" if $docs and $docs !~ /^\n/;
107
108 # Check the consistency of the flags
109 my ($embed_where, $inline_where);
110 my ($embed_may_change, $inline_may_change);
111
112 my $docref = delete $funcflags{$name};
113 if ($docref and %$docref) {
114 $embed_where = $docref->{flags} =~ /A/ ? 'api' : 'guts';
115 $embed_may_change = $docref->{flags} =~ /M/;
116 } else {
117 $missing{$name} = $file;
118 }
119 if ($flags =~ /m/) {
120 $inline_where = $flags =~ /A/ ? 'api' : 'guts';
121 $inline_may_change = $flags =~ /x/;
122
123 if (defined $embed_where && $inline_where ne $embed_where) {
124 warn "Function '$name' inconsistency: embed.fnc says $embed_where, Pod says $inline_where";
125 }
126
127 if (defined $embed_may_change
128 && $inline_may_change ne $embed_may_change) {
129 my $message = "Function '$name' inconsistency: ";
130 if ($embed_may_change) {
131 $message .= "embed.fnc says 'may change', Pod does not";
132 } else {
133 $message .= "Pod says 'may change', embed.fnc does not";
134 }
135 warn $message;
136 }
137 } elsif (!defined $embed_where) {
138 warn "Unable to place $name!\n";
139 next;
140 } else {
141 $inline_where = $embed_where;
142 $flags .= 'x' if $embed_may_change;
143 @args = @{$docref->{args}};
144 $ret = $docref->{retval};
145 }
146
147 $docs{$inline_where}{$curheader}{$name}
148 = [$flags, $docs, $ret, $file, @args];
149
150 # Create a special entry with an empty-string name for the
151 # heading-level documentation.
152 if (defined $header_doc) {
153 $docs{$inline_where}{$curheader}{""} = $header_doc;
154 undef $header_doc;
155 }
156
157 if (defined $doc) {
158 if ($doc =~ /^=(?:for|head)/) {
159 $in = $doc;
160 redo FUNC;
161 }
162 } else {
163 warn "$file:$line:$in";
164 }
165 }
166 }
167}
168
169sub docout ($$$) { # output the docs for one function
170 my($fh, $name, $docref) = @_;
171 my($flags, $docs, $ret, $file, @args) = @$docref;
172 $name =~ s/\s*$//;
173
174 $docs .= "NOTE: this function is experimental and may change or be
175removed without notice.\n\n" if $flags =~ /x/;
176 $docs .= "NOTE: the perl_ form of this function is deprecated.\n\n"
177 if $flags =~ /p/;
178 $docs .= "NOTE: this function must be explicitly called as Perl_$name with an aTHX_ parameter.\n\n"
179 if $flags =~ /o/;
180
181 print $fh "=item $name\nX<$name>\n$docs";
182
183 if ($flags =~ /U/) { # no usage
184 # nothing
185 } elsif ($flags =~ /s/) { # semicolon ("dTHR;")
186 print $fh "\t\t$name;\n\n";
187 } elsif ($flags =~ /n/) { # no args
188 print $fh "\t$ret\t$name\n\n";
189 } else { # full usage
190 my $p = $flags =~ /o/; # no #define foo Perl_foo
191 my $n = "Perl_"x$p . $name;
192 my $large_ret = length $ret > 7;
193 my $indent_size = 7+8 # nroff: 7 under =head + 8 under =item
194 +8+($large_ret ? 1 + length $ret : 8)
195 +length($n) + 1;
196 my $indent;
197 print $fh "\t$ret" . ($large_ret ? ' ' : "\t") . "$n(";
198 my $long_args;
199 for (@args) {
200 if ($indent_size + 2 + length > 79) {
201 $long_args=1;
202 $indent_size -= length($n) - 3;
203 last;
204 }
205 }
206 my $args = '';
207 if ($p) {
208 $args = @args ? "pTHX_ " : "pTHX";
209 if ($long_args) { print $fh $args; $args = '' }
210 }
211 $long_args and print $fh "\n";
212 my $first = !$long_args;
213 while () {
214 if (!@args or
215 length $args
216 && $indent_size + 3 + length($args[0]) + length $args > 79
217 ) {
218 print $fh
219 $first ? '' : (
220 $indent //=
221 "\t".($large_ret ? " " x (1+length $ret) : "\t")
222 ." "x($long_args ? 4 : 1 + length $n)
223 ),
224 $args, (","x($args ne 'pTHX_ ') . "\n")x!!@args;
225 $args = $first = '';
226 }
227 @args or last;
228 $args .= ", "x!!(length $args && $args ne 'pTHX_ ')
229 . shift @args;
230 }
231 if ($long_args) { print $fh "\n", substr $indent, 0, -4 }
232 print $fh ")\n\n";
233 }
234 print $fh "=for hackers\nFound in file $file\n\n";
235}
236
237sub output {
238 my ($podname, $header, $dochash, $missing, $footer) = @_;
239 my $filename = "pod/$podname.pod";
240 open my $fh, '>', $filename or die "Can't open $filename: $!";
241
242 print $fh <<"_EOH_", $header;
243-*- buffer-read-only: t -*-
244
245!!!!!!! DO NOT EDIT THIS FILE !!!!!!!
246This file is built by $0 extracting documentation from the C source
247files.
248
249_EOH_
250
251 my $key;
252 # case insensitive sort, with fallback for determinacy
253 for $key (sort { uc($a) cmp uc($b) || $a cmp $b } keys %$dochash) {
254 my $section = $dochash->{$key};
255 print $fh "\n=head1 $key\n\n";
256
257 # Output any heading-level documentation and delete so won't get in
258 # the way later
259 if (exists $section->{""}) {
260 print $fh $section->{""} . "\n";
261 delete $section->{""};
262 }
263 print $fh "=over 8\n\n";
264
265 # Again, fallback for determinacy
266 for my $key (sort { uc($a) cmp uc($b) || $a cmp $b } keys %$section) {
267 docout($fh, $key, $section->{$key});
268 }
269 print $fh "\n=back\n";
270 }
271
272 if (@$missing) {
273 print $fh "\n=head1 Undocumented functions\n\n";
274 print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
275The following functions have been flagged as part of the public API,
276but are currently undocumented. Use them at your own risk, as the
277interfaces are subject to change.
278
279If you use one of them, you may wish to consider creating and submitting
280documentation for it. If your patch is accepted, this will indicate that
281the interface is stable (unless it is explicitly marked otherwise).
282
283=over
284
285_EOB_
286The following functions are currently undocumented. If you use one of
287them, you may wish to consider creating and submitting documentation for
288it.
289
290=over
291
292_EOB_
293 for my $missing (sort @$missing) {
294 print $fh "=item $missing\nX<$missing>\n\n";
295 }
296 print $fh "=back\n\n";
297}
298
299print $fh $footer, <<'_EOF_';
300=cut
301
302 ex: set ro:
303_EOF_
304
305 close $fh or die "Can't close $filename: $!";
306}
307
308if (@ARGV) {
309 my $workdir = shift;
310 chdir $workdir
311 or die "Couldn't chdir to '$workdir': $!";
312}
313
314open IN, "embed.fnc" or die $!;
315
316while (<IN>) {
317 chomp;
318 next if /^:/;
319 while (s|\\\s*$||) {
320 $_ .= <IN>;
321 chomp;
322 }
323 s/\s+$//;
324 next if /^\s*(#|$)/;
325
326 my ($flags, $retval, $func, @args) = split /\s*\|\s*/, $_;
327
328 next unless $func;
329
330 s/\b(NN|NULLOK)\b\s+//g for @args;
331 $func =~ s/\t//g; # clean up fields from embed.pl
332 $retval =~ s/\t//;
333
334 $funcflags{$func} = {
335 flags => $flags,
336 retval => $retval,
337 args => \@args,
338 };
339}
340
341my $file;
342# glob() picks up docs from extra .c or .h files that may be in unclean
343# development trees.
344my $MANIFEST = do {
345 local ($/, *FH);
346 open FH, "MANIFEST" or die "Can't open MANIFEST: $!";
347 <FH>;
348};
349
350for $file (($MANIFEST =~ /^(\S+\.c)\t/gm), ($MANIFEST =~ /^(\S+\.h)\t/gm)) {
351 open F, "< $file" or die "Cannot open $file for docs: $!\n";
352 $curheader = "Functions in file $file\n";
353 autodoc(\*F,$file);
354 close F or die "Error closing $file: $!\n";
355}
356
357for (sort keys %funcflags) {
358 next unless $funcflags{$_}{flags} =~ /d/;
359 warn "no docs for $_\n"
360}
361
362foreach (sort keys %missing) {
363 next if $macro{$_};
364 # Heuristics for known not-a-function macros:
365 next if /^[A-Z]/;
366 next if /^dj?[A-Z]/;
367
368 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
369}
370
371# walk table providing an array of components in each line to
372# subroutine, printing the result
373
374# List of funcs in the public API that aren't also marked as experimental.
375my @missing_api = grep $funcflags{$_}{flags} =~ /A/ && $funcflags{$_}{flags} !~ /M/ && !$docs{api}{$_}, keys %funcflags;
376output('perlapi', <<'_EOB_', $docs{api}, \@missing_api, <<'_EOE_');
377=head1 NAME
378
379perlapi - autogenerated documentation for the perl public API
380
381=head1 DESCRIPTION
382X<Perl API> X<API> X<api>
383
384This file contains the documentation of the perl public API generated by
385embed.pl, specifically a listing of functions, macros, flags, and variables
386that may be used by extension writers. L<At the end|/Undocumented functions>
387is a list of functions which have yet to be documented. The interfaces of
388those are subject to change without notice. Any functions not listed here are
389not part of the public API, and should not be used by extension writers at
390all. For these reasons, blindly using functions listed in proto.h is to be
391avoided when writing extensions.
392
393Note that all Perl API global variables must be referenced with the C<PL_>
394prefix. Some macros are provided for compatibility with the older,
395unadorned names, but this support may be disabled in a future release.
396
397Perl was originally written to handle US-ASCII only (that is characters
398whose ordinal numbers are in the range 0 - 127).
399And documentation and comments may still use the term ASCII, when
400sometimes in fact the entire range from 0 - 255 is meant.
401
402Note that Perl can be compiled and run under EBCDIC (See L<perlebcdic>)
403or ASCII. Most of the documentation (and even comments in the code)
404ignore the EBCDIC possibility.
405For almost all purposes the differences are transparent.
406As an example, under EBCDIC,
407instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
408whenever this documentation refers to C<utf8>
409(and variants of that name, including in function names),
410it also (essentially transparently) means C<UTF-EBCDIC>.
411But the ordinals of characters differ between ASCII, EBCDIC, and
412the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy more bytes
413than in UTF-8.
414
415Also, on some EBCDIC machines, functions that are documented as operating on
416US-ASCII (or Basic Latin in Unicode terminology) may in fact operate on all
417256 characters in the EBCDIC range, not just the subset corresponding to
418US-ASCII.
419
420The listing below is alphabetical, case insensitive.
421
422_EOB_
423
424=head1 AUTHORS
425
426Until May 1997, this document was maintained by Jeff Okamoto
427<okamoto@corp.hp.com>. It is now maintained as part of Perl itself.
428
429With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
430Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
431Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
432Stephen McCamant, and Gurusamy Sarathy.
433
434API Listing originally by Dean Roehrich <roehrich@cray.com>.
435
436Updated to be autogenerated from comments in the source by Benjamin Stuhl.
437
438=head1 SEE ALSO
439
440L<perlguts>, L<perlxs>, L<perlxstut>, L<perlintern>
441
442_EOE_
443
444# List of non-static internal functions
445my @missing_guts =
446 grep $funcflags{$_}{flags} !~ /[As]/ && !$docs{guts}{$_}, keys %funcflags;
447
448output('perlintern', <<'END', $docs{guts}, \@missing_guts, <<'END');
449=head1 NAME
450
451perlintern - autogenerated documentation of purely B<internal>
452 Perl functions
453
454=head1 DESCRIPTION
455X<internal Perl functions> X<interpreter functions>
456
457This file is the autogenerated documentation of functions in the
458Perl interpreter that are documented using Perl's internal documentation
459format but are not marked as part of the Perl API. In other words,
460B<they are not for use in extensions>!
461
462END
463
464=head1 AUTHORS
465
466The autodocumentation system was originally added to the Perl core by
467Benjamin Stuhl. Documentation is by whoever was kind enough to
468document their functions.
469
470=head1 SEE ALSO
471
472L<perlguts>, L<perlapi>
473
474END