This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
win32/@makefiles: move the full perl config.h to full/
[perl5.git] / autodoc.pl
CommitLineData
94bdecf9 1#!/usr/bin/perl -w
00d68f0d
KW
2
3use Text::Tabs;
a9a7c81e 4#
6294c161
DM
5# Unconditionally regenerate:
6#
7# pod/perlintern.pod
8# pod/perlapi.pod
9#
10# from information stored in
11#
12# embed.fnc
9b983db7 13# plus all the core .c, .h, and .pod files listed in MANIFEST
e10306bf
KW
14# plus %extra_input_pods
15
16my %extra_input_pods = ( 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => 1 );
17
6294c161 18# Has an optional arg, which is the directory to chdir to before reading
9b983db7 19# MANIFEST and the files
6294c161 20#
52a9d53b 21# This script is invoked as part of 'make all'
151c3fe5 22#
9b983db7
KW
23# The generated pod consists of sections of related elements, functions,
24# macros, and variables. The keys of %valid_sections give the current legal
25# ones. Just add a new key to add a section.
26#
27# Throughout the files read by this script are lines like
28#
29# =for apidoc_section Section Name
3f620621 30# =for apidoc_section $section_name_variable
9b983db7
KW
31#
32# "Section Name" (after having been stripped of leading space) must be one of
3f620621
KW
33# the legal section names, or an error is thrown. $section_name_variable must
34# be one of the legal section name variables defined below; these expand to
35# legal section names. This form is used so that minor wording changes in
1e45e087 36# these titles can be confined to this file. All the names of the variables
3f620621
KW
37# end in '_scn'; this suffix is optional in the apidoc_section lines.
38#
39# All API elements defined between this line and the next 'apidoc_section'
40# line will go into the section "Section Name" (or $section_name_variable),
41# sorted by dictionary order within it. perlintern and perlapi are parallel
42# documents, each potentially with a section "Section Name". Each element is
43# marked as to which document it goes into. If there are none for a
44# particular section in perlapi, that section is omitted.
9b983db7
KW
45#
46# Also, in .[ch] files, there may be
47#
48# =head1 Section Name
49#
50# lines in comments. These are also used by this program to switch to section
51# "Section Name". The difference is that if there are any lines after the
52# =head1, inside the same comment, and before any =for apidoc-ish lines, they
53# are used as a heading for section "Section Name" (in both perlintern and
54# perlapi). This includes any =head[2-5]. If more than one '=head1 Section
55# Name' line has content, they appear in the generated pod in an undefined
3f620621 56# order. Note that you can't use a $section_name_variable in =head1 lines
9b983db7
KW
57#
58# The next =head1, =for apidoc_section, or file end terminates what goes into
59# the current section
60#
61# The %valid_sections hash below also can have header content, which will
62# appear before any =head1 content. The hash can also have footer content
63# content, which will appear at the end of the section, after all the
64# elements.
65#
66# The lines that define the actual functions, etc are documented in embed.fnc,
67# because they have flags which must be kept in sync with that file.
1fcde0e9 68
56a0c332 69use strict;
04adaf08 70use warnings;
a64c954a 71
3c21fb41
TC
72my $config_h = 'config.h';
73if (@ARGV >= 2 && $ARGV[0] eq "-c") {
74 shift;
75 $config_h = shift;
76}
77
fa5d437b
KW
78my $nroff_min_indent = 4; # for non-heading lines
79# 80 column terminal - 2 for pager adding 2 columns;
80my $max_width = 80 - 2 - $nroff_min_indent;
81my $standard_indent = 4; # Any additional indentations
e4f5ddf4 82
7882b24a
NC
83if (@ARGV) {
84 my $workdir = shift;
85 chdir $workdir
86 or die "Couldn't chdir to '$workdir': $!";
87}
3d7c117d
MB
88require './regen/regen_lib.pl';
89require './regen/embed_lib.pl';
7882b24a 90
a026e430 91my %described_elsewhere;
b87d9527 92
94bdecf9 93#
346f75ff 94# See database of global and static function prototypes in embed.fnc
94bdecf9
JH
95# This is used to generate prototype headers under various configurations,
96# export symbols lists for different platforms, and macros to provide an
97# implicit interpreter context argument.
98#
99
6a235718 100my %docs;
df6bd76f 101my %seen;
5ce57792 102my %funcflags;
5ce57792 103my %missing;
00d68f0d
KW
104my %missing_macros;
105
106my $link_text = "Described in";
107
783469eb
KW
108my $description_indent = 4;
109my $usage_indent = 3; # + initial blank yields 4 total
110
3e294c2f 111my $AV_scn = 'AV Handling';
3951f999
KW
112my $callback_scn = 'Callback Functions';
113my $casting_scn = 'Casting';
114my $casing_scn = 'Character case changing';
115my $classification_scn = 'Character classification';
116my $names_scn = 'Character names';
117my $scope_scn = 'Compile-time scope hooks';
118my $compiler_scn = 'Compiler and Preprocessor information';
119my $directives_scn = 'Compiler directives';
120my $concurrency_scn = 'Concurrency';
240d081e 121my $COP_scn = 'COPs and Hint Hashes';
3951f999
KW
122my $CV_scn = 'CV Handling';
123my $custom_scn = 'Custom Operators';
d1b9805e
KW
124my $debugging_scn = 'Debugging';
125my $display_scn = 'Display functions';
9b1c4c45 126my $embedding_scn = 'Embedding, Threads, and Interpreter Cloning';
3951f999
KW
127my $errno_scn = 'Errno';
128my $exceptions_scn = 'Exception Handling (simple) Macros';
129my $filesystem_scn = 'Filesystem configuration values';
b6f27e58 130my $filters_scn = 'Source Filters';
baa5caa0 131my $floating_scn = 'Floating point';
3951f999
KW
132my $genconfig_scn = 'General Configuration';
133my $globals_scn = 'Global Variables';
f3d866f2 134my $GV_scn = 'GV Handling and Stashes';
3951f999
KW
135my $hook_scn = 'Hook manipulation';
136my $HV_scn = 'HV Handling';
137my $io_scn = 'Input/Output';
a87f9c51 138my $io_formats_scn = 'I/O Formats';
baa5caa0 139my $integer_scn = 'Integer';
3951f999 140my $lexer_scn = 'Lexer interface';
3e294c2f 141my $locale_scn = 'Locales';
3951f999
KW
142my $magic_scn = 'Magic';
143my $memory_scn = 'Memory Management';
3e294c2f 144my $MRO_scn = 'MRO';
3951f999
KW
145my $multicall_scn = 'Multicall Functions';
146my $numeric_scn = 'Numeric Functions';
72957953
KW
147
148# Now combined, as unclear which functions go where, but separate names kept
149# to avoid 1) other code changes; 2) in case it seems better to split again
150my $optrees_scn = 'Optrees';
151my $optree_construction_scn = $optrees_scn; # Was 'Optree construction';
152my $optree_manipulation_scn = $optrees_scn; # Was 'Optree Manipulation Functions'
3951f999
KW
153my $pack_scn = 'Pack and Unpack';
154my $pad_scn = 'Pad Data Structures';
155my $password_scn = 'Password and Group access';
6ef63541 156my $reports_scn = 'Reports and Formats';
3951f999 157my $paths_scn = 'Paths to system commands';
3951f999
KW
158my $prototypes_scn = 'Prototype information';
159my $regexp_scn = 'REGEXP Functions';
160my $signals_scn = 'Signals';
161my $site_scn = 'Site configuration';
162my $sockets_scn = 'Sockets configuration values';
3951f999
KW
163my $stack_scn = 'Stack Manipulation Macros';
164my $string_scn = 'String Handling';
165my $SV_flags_scn = 'SV Flags';
166my $SV_scn = 'SV Handling';
d8e799d8 167my $tainting_scn = 'Tainting';
3951f999
KW
168my $time_scn = 'Time';
169my $typedefs_scn = 'Typedef names';
170my $unicode_scn = 'Unicode Support';
171my $utility_scn = 'Utility Functions';
172my $versioning_scn = 'Versioning';
173my $warning_scn = 'Warning and Dieing';
3e294c2f 174my $XS_scn = 'XS';
3951f999 175
55d5a9fe
KW
176# Kept separate at end
177my $undocumented_scn = 'Undocumented elements';
178
00d68f0d 179my %valid_sections = (
3e294c2f 180 $AV_scn => {},
3951f999
KW
181 $callback_scn => {},
182 $casting_scn => {},
183 $casing_scn => {},
184 $classification_scn => {},
185 $scope_scn => {},
186 $compiler_scn => {},
187 $directives_scn => {},
188 $concurrency_scn => {},
189 $COP_scn => {},
89222628
KW
190 $CV_scn => {
191 header => <<~'EOT',
192 This section documents functions to manipulate CVs which are
193 code-values, meaning subroutines. For more information, see
194 L<perlguts>.
195 EOT
196 },
197
3951f999 198 $custom_scn => {},
d1b9805e
KW
199 $debugging_scn => {},
200 $display_scn => {},
3951f999
KW
201 $embedding_scn => {},
202 $errno_scn => {},
203 $exceptions_scn => {},
204 $filesystem_scn => {
6927cd44
KW
205 header => <<~'EOT',
206 Also see L</List of capability HAS_foo symbols>.
207 EOT
208 },
b6f27e58 209 $filters_scn => {},
3951f999 210 $floating_scn => {
00d68f0d 211 header => <<~'EOT',
6927cd44
KW
212 Also L</List of capability HAS_foo symbols> lists capabilities
213 that arent in this section. For example C<HAS_ASINH>, for the
214 hyperbolic sine function.
215 EOT
216 },
3951f999 217 $genconfig_scn => {
00d68f0d
KW
218 header => <<~'EOT',
219 This section contains configuration information not otherwise
220 found in the more specialized sections of this document. At the
221 end is a list of C<#defines> whose name should be enough to tell
222 you what they do, and a list of #defines which tell you if you
223 need to C<#include> files to get the corresponding functionality.
224 EOT
225
014f1aa3 226 footer => <<~EOT,
00d68f0d
KW
227
228 =head2 List of capability C<HAS_I<foo>> symbols
229
6927cd44
KW
230 This is a list of those symbols that dont appear elsewhere in ths
231 document that indicate if the current platform has a certain
232 capability. Their names all begin with C<HAS_>. Only those
233 symbols whose capability is directly derived from the name are
234 listed here. All others have their meaning expanded out elsewhere
235 in this document. This (relatively) compact list is because we
236 think that the expansion would add little or no value and take up
237 a lot of space (because there are so many). If you think certain
238 ones should be expanded, send email to
014f1aa3 239 L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
00d68f0d
KW
240
241 Each symbol here will be C<#define>d if and only if the platform
242 has the capability. If you need more detail, see the
6927cd44
KW
243 corresponding entry in F<config.h>. For convenience, the list is
244 split so that the ones that indicate there is a reentrant version
245 of a capability are listed separately
00d68f0d
KW
246
247 __HAS_LIST__
248
6927cd44
KW
249 And, the reentrant capabilities:
250
251 __HAS_R_LIST__
252
00d68f0d
KW
253 Example usage:
254
014f1aa3 255 =over $standard_indent
00d68f0d
KW
256
257 #ifdef HAS_STRNLEN
258 use strnlen()
259 #else
260 use an alternative implementation
261 #endif
262
263 =back
264
265 =head2 List of C<#include> needed symbols
266
267 This list contains symbols that indicate if certain C<#include>
268 files are present on the platform. If your code accesses the
269 functionality that one of these is for, you will need to
270 C<#include> it if the symbol on this list is C<#define>d. For
271 more detail, see the corresponding entry in F<config.h>.
272
273 __INCLUDE_LIST__
274
275 Example usage:
276
014f1aa3 277 =over $standard_indent
00d68f0d
KW
278
279 #ifdef I_WCHAR
280 #include <wchar.h>
281 #endif
282
283 =back
284 EOT
285 },
3951f999
KW
286 $globals_scn => {},
287 $GV_scn => {},
288 $hook_scn => {},
289 $HV_scn => {},
290 $io_scn => {},
a87f9c51
KW
291 $io_formats_scn => {
292 header => <<~'EOT',
293 These are used for formatting the corresponding type For example,
294 instead of saying
295
296 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
297
298 use
299
300 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
301
302 This keeps you from having to know if, say an IV, needs to be
303 printed as C<%d>, C<%ld>, or something else.
304 EOT
305 },
3951f999
KW
306 $integer_scn => {},
307 $lexer_scn => {},
3e294c2f 308 $locale_scn => {},
3951f999
KW
309 $magic_scn => {},
310 $memory_scn => {},
3e294c2f 311 $MRO_scn => {},
3951f999
KW
312 $multicall_scn => {},
313 $numeric_scn => {},
72957953 314 $optrees_scn => {},
3951f999
KW
315 $optree_construction_scn => {},
316 $optree_manipulation_scn => {},
317 $pack_scn => {},
318 $pad_scn => {},
319 $password_scn => {},
320 $paths_scn => {},
3951f999
KW
321 $prototypes_scn => {},
322 $regexp_scn => {},
6ef63541
KW
323 $reports_scn => {
324 header => <<~"EOT",
325 These are used in the simple report generation feature of Perl.
326 See L<perlform>.
327 EOT
328 },
3951f999
KW
329 $signals_scn => {},
330 $site_scn => {
6927cd44
KW
331 header => <<~'EOT',
332 These variables give details as to where various libraries,
333 installation destinations, I<etc.>, go, as well as what various
334 installation options were selected
335 EOT
336 },
3951f999 337 $sockets_scn => {},
3951f999
KW
338 $stack_scn => {},
339 $string_scn => {
89222628
KW
340 header => <<~EOT,
341 See also C<L</$unicode_scn>>.
6927cd44
KW
342 EOT
343 },
3951f999
KW
344 $SV_flags_scn => {},
345 $SV_scn => {},
d8e799d8 346 $tainting_scn => {},
3951f999
KW
347 $time_scn => {},
348 $typedefs_scn => {},
89222628
KW
349 $unicode_scn => {
350 header => <<~EOT,
351 L<perlguts/Unicode Support> has an introduction to this API.
352
353 See also C<L</$classification_scn>>,
354 C<L</$casing_scn>>,
355 and C<L</$string_scn>>.
356 Various functions outside this section also work specially with
357 Unicode. Search for the string "utf8" in this document.
358 EOT
359 },
3951f999
KW
360 $utility_scn => {},
361 $versioning_scn => {},
362 $warning_scn => {},
3e294c2f 363 $XS_scn => {},
00d68f0d 364);
94bdecf9 365
9e019b59
KW
366# Somewhat loose match for an apidoc line so we can catch minor typos.
367# Parentheses are used to capture portions so that below we verify
368# that things are the actual correct syntax.
369my $apidoc_re = qr/ ^ (\s*) # $1
370 (=?) # $2
371 (\s*) # $3
372 for (\s*) # $4
373 apidoc (_item)? # $5
374 (\s*) # $6
375 (.*?) # $7
376 \s* \n /x;
183f6e59 377# Only certain flags, dealing with display, are acceptable for apidoc_item
3734d2f5 378my $display_flags = "fFnDopTx;";
9e019b59
KW
379
380sub check_api_doc_line ($$) {
381 my ($file, $in) = @_;
382
383 return unless $in =~ $apidoc_re;
384
385 my $is_item = defined $5;
386 my $is_in_proper_form = length $1 == 0
387 && length $2 > 0
388 && length $3 == 0
389 && length $4 > 0
6e93196a
KW
390 && length $7 > 0
391 && ( length $6 > 0
392 || ($is_item && substr($7, 0, 1) eq '|'));
9e019b59
KW
393 my $proto_in_file = $7;
394 my $proto = $proto_in_file;
395 $proto = "||$proto" if $proto !~ /\|/;
396 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
397
398 $name && $is_in_proper_form or die <<EOS;
399Bad apidoc at $file line $.:
400 $in
401Expected:
402 =for apidoc flags|returntype|name|arg|arg|...
403 =for apidoc flags|returntype|name
404 =for apidoc name
405(or 'apidoc_item')
406EOS
407
56aac0d0 408 die "Only [$display_flags] allowed in apidoc_item:\n$in"
783469eb
KW
409 if $is_item && $flags =~ /[^$display_flags]/;
410
9e019b59
KW
411 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
412}
94bdecf9 413
4be3b9ca
KW
414sub embed_override($) {
415 my ($element_name) = shift;
416
417 # If the entry is also in embed.fnc, it should be defined
418 # completely there, but not here
419 my $embed_docref = delete $funcflags{$element_name};
420
421 return unless $embed_docref and %$embed_docref;
422
423 my $flags = $embed_docref->{'flags'};
424 warn "embed.fnc entry '$element_name' missing 'd' flag"
425 unless $flags =~ /d/;
426
427 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
428}
429
89222628
KW
430# The section that is in effect at the beginning of the given file. If not
431# listed here, an apidoc_section line must precede any apidoc lines.
432# This allows the files listed here that generally are single-purpose, to not
433# have to worry about the autodoc section
434my %initial_file_section = (
435 'av.c' => $AV_scn,
436 'av.h' => $AV_scn,
437 'cv.h' => $CV_scn,
d05b47c2 438 'deb.c' => $debugging_scn,
189b9288 439 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => $XS_scn,
89222628
KW
440 'doio.c' => $io_scn,
441 'gv.c' => $GV_scn,
442 'gv.h' => $GV_scn,
443 'hv.h' => $HV_scn,
444 'locale.c' => $locale_scn,
445 'malloc.c' => $memory_scn,
446 'numeric.c' => $numeric_scn,
447 'opnames.h' => $optree_construction_scn,
448 'pad.h'=> $pad_scn,
449 'patchlevel.h' => $versioning_scn,
450 'perlio.h' => $io_scn,
451 'pod/perlapio.pod' => $io_scn,
452 'pod/perlcall.pod' => $callback_scn,
453 'pod/perlembed.pod' => $embedding_scn,
454 'pod/perlfilter.pod' => $filters_scn,
455 'pod/perliol.pod' => $io_scn,
456 'pod/perlmroapi.pod' => $MRO_scn,
457 'pod/perlreguts.pod' => $regexp_scn,
458 'pp_pack.c' => $pack_scn,
459 'pp_sort.c' => $SV_scn,
460 'regcomp.c' => $regexp_scn,
461 'regexp.h' => $regexp_scn,
d63f0f68
KW
462 'sv.h' => $SV_scn,
463 'sv.c' => $SV_scn,
072be79c 464 'sv_inline.h' => $SV_scn,
d8e799d8 465 'taint.c' => $tainting_scn,
89222628
KW
466 'unicode_constants.h' => $unicode_scn,
467 'utf8.c' => $unicode_scn,
468 'utf8.h' => $unicode_scn,
469 'vutil.c' => $versioning_scn,
470 );
471
94bdecf9
JH
472sub autodoc ($$) { # parse a file and extract documentation info
473 my($fh,$file) = @_;
63da8140 474 my($in, $line_num, $header, $section);
f554dfc5 475
89222628
KW
476 $section = $initial_file_section{$file}
477 if defined $initial_file_section{$file};
478
3cfa66c4
KW
479 my $file_is_C = $file =~ / \. [ch] $ /x;
480
f554dfc5 481 # Count lines easier
b1cb8a49 482 my $get_next_line = sub { $line_num++; return <$fh> };
f554dfc5 483
63da8140
KW
484 # Read the file
485 while ($in = $get_next_line->()) {
486 last unless defined $in;
3cfa66c4 487
63da8140
KW
488 next unless ( $in =~ / ^ =for [ ]+ apidoc /x
489 # =head1 lines only have effect in C files
490 || ($file_is_C && $in =~ /^=head1/));
491
492 # Here, the line introduces a portion of the input that we care about.
493 # Either it is for an API element, or heading text which we expect
494 # will be used for elements later in the file
495
496 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
183f6e59 497 my (@args, @items);
63da8140
KW
498
499 # If the line starts a new section ...
500 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
183f6e59 501
b1cb8a49 502 $section = $1;
3f620621
KW
503 if ($section =~ / ^ \$ /x) {
504 $section .= '_scn' unless $section =~ / _scn $ /;
505 $section = eval "$section";
506 die "Unknown \$section variable '$section' in $file: $@" if $@;
507 }
6927cd44
KW
508 die "Unknown section name '$section' in $file near line $.\n"
509 unless defined $valid_sections{$section};
510
3cfa66c4 511 }
63da8140
KW
512 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a
513 # plain apidoc line
183f6e59
KW
514 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
515 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
63da8140
KW
516 }
517 else { # Plain apidoc
518
519 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
520 = check_api_doc_line($file, $in);
4be3b9ca
KW
521 # Override this line with any info in embed.fnc
522 my ($embed_flags, $embed_ret_type, @embed_args)
523 = embed_override($element_name);
524 if ($embed_ret_type) {
63da8140 525 warn "embed.fnc entry overrides redundant information in"
4be3b9ca
KW
526 . " '$proto_in_file' in $file"
527 if $flags || $ret_type || @args;
528 $flags = $embed_flags;
529 $ret_type = $embed_ret_type;
530 @args = @embed_args;
531 }
9e728159
KW
532 elsif ($flags !~ /[my]/) { # Not in embed.fnc, is missing if not
533 # a macro or typedef
63da8140
KW
534 $missing{$element_name} = $file;
535 }
3cfa66c4 536
56aac0d0 537 die "flag '$1' is not legal (for function $element_name (from $file))"
746dd616 538 if $flags =~ / ( [^AabCDdEeFfGhiIMmNnTOoPpRrSsUuWXxy;#] ) /x;
151c3fe5 539
9e728159
KW
540 die "'u' flag must also have 'm' or 'y' flags' for $element_name"
541 if $flags =~ /u/ && $flags !~ /[my]/;
63da8140 542 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
31c1232f
YO
543 if $flags !~ /N/ &&
544 $element_name !~ / ^ (?:struct\s+)? [_[:alpha:]] \w* $ /x;
f554dfc5 545
746dd616
KW
546 if ($flags =~ /#/) {
547 die "Return type must be empty for '$element_name'"
548 if $ret_type;
549 $ret_type = '#ifdef';
550 }
551
63da8140
KW
552 if (exists $seen{$element_name} && $flags !~ /h/) {
553 die ("'$element_name' in $file was already documented in $seen{$element_name}");
554 }
555 else {
556 $seen{$element_name} = $file;
151c3fe5 557 }
94bdecf9 558 }
df6bd76f 559
63da8140
KW
560 # Here we have processed the initial line in the heading text or API
561 # element, and have saved the important information from it into the
562 # corresponding variables. Now accumulate the text that applies to it
563 # up to a terminating line, which is one of:
564 # 1) =cut
565 # 2) =head (in a C file only =head1)
566 # 3) an end comment line in a C file: m:^\s*\*/:
183f6e59 567 # 4) =for apidoc... (except apidoc_item lines)
63da8140
KW
568 $text = "";
569 my $head_ender_num = ($file_is_C) ? 1 : "";
570 while (defined($in = $get_next_line->())) {
571
572 last if $in =~ /^=cut/x;
573 last if $in =~ /^=head$head_ender_num/;
574
575 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
576
577 # End of comment line in C files is a fall-back terminator,
578 # but warn only if there actually is some accumulated text
579 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
580 last;
df6bd76f 581 }
df6bd76f 582
63da8140
KW
583 if ($in !~ / ^ =for [ ]+ apidoc /x) {
584 $text .= $in;
585 next;
586 }
587
183f6e59 588 # Here, the line is an apidoc line. All but apidoc_item terminate
63da8140 589 # the text being accumulated.
183f6e59
KW
590 last if $in =~ / ^ =for [ ]+ apidoc_section /x;
591
592 my ($item_name, $item_flags, $item_ret_type, $is_item,
4be3b9ca 593 $item_proto, @item_args) = check_api_doc_line($file, $in);
183f6e59
KW
594 last unless $is_item;
595
596 # Here, is an apidoc_item_line; They can only come within apidoc
597 # paragraphs.
4be3b9ca
KW
598 die "Unexpected api_doc_item line '$item_proto'"
599 unless $element_name;
183f6e59
KW
600
601 # We accept blank lines between these, but nothing else;
56aac0d0
KW
602 die "apidoc_item lines must immediately follow apidoc lines for "
603 . " '$element_name' in $file"
183f6e59 604 if $text =~ /\S/;
4be3b9ca
KW
605 # Override this line with any info in embed.fnc
606 my ($embed_flags, $embed_ret_type, @embed_args)
607 = embed_override($item_name);
608 if ($embed_ret_type) {
609 warn "embed.fnc entry overrides redundant information in"
610 . " '$item_proto' in $file"
611 if $item_flags || $item_ret_type || @item_args;
612
613 $item_flags = $embed_flags;
614 $item_ret_type = $embed_ret_type;
615 @item_args = @embed_args;
616 }
183f6e59
KW
617
618 # Use the base entry flags if none for this item; otherwise add in
619 # any non-display base entry flags.
620 if ($item_flags) {
621 $item_flags .= $flags =~ s/[$display_flags]//rg;
622 }
623 else {
624 $item_flags = $flags;
625 }
626 $item_ret_type = $ret_type unless $item_ret_type;
627 @item_args = @args unless @item_args;
628 push @items, { name => $item_name,
629 ret_type => $item_ret_type,
630 flags => $item_flags,
631 args => [ @item_args ],
632 };
633
634 # This line shows that this element is documented.
635 delete $funcflags{$item_name};
a9a7c81e 636 }
3cfa66c4 637
63da8140
KW
638 # Here, are done accumulating the text for this item. Trim it
639 $text =~ s/ ^ \s* //x;
640 $text =~ s/ \s* $ //x;
641 $text .= "\n" if $text ne "";
6523e108 642
63da8140
KW
643 # And treat all-spaces as nothing at all
644 undef $text unless $text =~ /\S/;
df6bd76f 645
63da8140 646 if ($element_name) {
df6bd76f 647
63da8140 648 # Here, we have accumulated into $text, the pod for $element_name
23c7fd2d 649 my $where = $flags =~ /A/ ? 'api' : 'intern';
5ce57792 650
63da8140
KW
651 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
652 unless defined $section;
2aa73c64
KW
653 my $is_link_only = ($flags =~ /h/);
654 if (! $is_link_only && exists $docs{$where}{$section}{$element_name}) {
63da8140
KW
655 warn "$0: duplicate API entry for '$element_name' in"
656 . " $where/$section\n";
657 next;
20046047 658 }
5ce57792 659
63da8140 660 # Override the text with just a link if the flags call for that
63da8140
KW
661 if ($is_link_only) {
662 if ($file_is_C) {
2aa73c64
KW
663 die "Can't currently handle link with items to it:\n$in"
664 if @items;
665 $docs{$where}{$section}{X_tags}{$element_name} = $file;
63da8140
KW
666 redo; # Don't put anything if C source
667 }
668
669 # Here, is an 'h' flag in pod. We add a reference to the pod (and
670 # nothing else) to perlapi/intern. (It would be better to add a
671 # reference to the correct =item,=header, but something that makes
672 # it harder is that it that might be a duplicate, like '=item *';
673 # so that is a future enhancement XXX. Another complication is
674 # there might be more than one deserving candidates.)
675 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s)
676 $podname =~ s/\.pod//;
677 $text = "Described in L<$podname>.\n";
678
679 # Don't output a usage example for linked to documentation if
680 # it is trivial (has no arguments) and we aren't to add a
681 # semicolon
3734d2f5 682 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[U;]/;
63da8140
KW
683
684 # Keep track of all the pod files that we refer to.
685 push $described_elsewhere{$podname}->@*, $podname;
686 }
5ce57792 687
5bf02caf
KW
688 $docs{$where}{$section}{$element_name}{flags} = $flags;
689 $docs{$where}{$section}{$element_name}{pod} = $text;
690 $docs{$where}{$section}{$element_name}{file} = $file;
691 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
692 push $docs{$where}{$section}{$element_name}{args}->@*, @args;
183f6e59 693 push $docs{$where}{$section}{$element_name}{items}->@*, @items;
a9a7c81e 694 }
63da8140
KW
695 elsif ($text) {
696 $valid_sections{$section}{header} = "" unless
697 defined $valid_sections{$section}{header};
698 $valid_sections{$section}{header} .= "\n$text";
a9a7c81e 699 }
63da8140
KW
700
701 # We already have the first line of what's to come in $in
702 redo;
703
704 } # End of loop through input
94bdecf9
JH
705}
706
00d68f0d
KW
707my %configs;
708my @has_defs;
709my @has_r_defs; # Reentrant symbols
710my @include_defs;
63447426 711
00d68f0d
KW
712sub parse_config_h {
713 use re '/aa'; # Everthing is ASCII in this file
714
715 # Process config.h
d3871820
KW
716 die "Can't find $config_h" unless -e $config_h;
717 open my $fh, '<', $config_h or die "Can't open $config_h: $!";
00d68f0d
KW
718 while (<$fh>) {
719
720 # Look for lines like /* FOO_BAR:
721 # By convention all config.h descriptions begin like that
722 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
723 my $name = $1;
724
725 # Here we are starting the description for $name in config.h. We
726 # accumulate the entire description for it into @description.
727 # Flowing text from one input line to another is appended into the
728 # same array element to make a single flowing line element, but
729 # verbatim lines are kept as separate elements in @description.
730 # This will facilitate later doing pattern matching without regard
731 # to line boundaries on non-verbatim text.
732
733 die "Multiple config.h entries for '$name'"
734 if defined $configs{$name}{description};
735
736 # Get first line of description
737 $_ = <$fh>;
738
739 # Each line in the description begins with blanks followed by '/*'
740 # and some spaces.
741 die "Unexpected config.h initial line for $name: '$_'"
742 unless s/ ^ ( \s* \* \s* ) //x;
743 my $initial_text = $1;
744
745 # Initialize the description with this first line (after having
746 # stripped the prefix text)
747 my @description = $_;
748
749 # The first line is used as a template for how much indentation
750 # each normal succeeding line has. Lines indented further
751 # will be considered as intended to be verbatim. But, empty lines
752 # likely won't have trailing blanks, so just strip the whole thing
753 # for them.
754 my $strip_initial_qr = qr! \s* \* \s* $
755 | \Q$initial_text\E
756 !x;
757 $configs{$name}{verbatim} = 0;
758
759 # Read in the remainder of the description
760 while (<$fh>) {
761 last if s| ^ \s* \* / ||x; # A '*/' ends it
762
763 die "Unexpected config.h description line for $name: '$_'"
764 unless s/$strip_initial_qr//;
765
766 # Fix up the few flawed lines in config.h wherein a new
767 # sentence begins with a tab (and maybe a space after that).
768 # Although none of them currently do, let it recognize
769 # something like
770 #
771 # "... text"). The next sentence ...
772 #
773 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg;
774
775 # If this line has extra indentation or looks to have columns,
776 # it should be treated as verbatim. Columns are indicated by
777 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
778 # not preceded by punctuation.
779 if ($_ !~ m/ ^ \s
780 | \S (?: \t
781 | \s{3}
782 | (*nlb:[[:punct:]]) \s{2}
783 )
784 /x)
785 {
786 # But here, is not a verbatim line. Add an empty line if
787 # this is the first non-verbatim after a run of verbatims
788 if ($description[-1] =~ /^\s/) {
789 push @description, "\n", $_;
790 }
791 else { # Otherwise, append this flowing line to the
792 # current flowing line
793 $description[-1] .= $_;
794 }
795 }
796 else {
797 $configs{$name}{verbatim} = 1;
798
799 # The first verbatim line in a run of them is separated by an
800 # empty line from the flowing lines above it
801 push @description, "\n" if $description[-1] =~ /^\S/;
802
803 $_ = Text::Tabs::expand($_);
804
805 # Only a single space so less likely to wrap
806 s/ ^ \s* / /x;
807
808 push @description, $_;
809 }
810 }
811
812 push $configs{$name}{description}->@*, @description
813
814 } # Not a description; see if it is a macro definition.
815 elsif (m! ^
816 (?: / \* )? # Optional commented-out
817 # indication
818 \# \s* define \s+ ( \w+ ) # $1 is the name
819 ( \s* ) # $2 indicates if args or not
820 ( .*? ) # $3 is any definition
821 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/
822 $
823 !x)
824 {
825 my $name = $1;
826
827 # There can be multiple definitions for a name. We want to know
828 # if any of them has arguments, and if any has a body.
829 $configs{$name}{has_args} //= $2 eq "";
830 $configs{$name}{has_args} ||= $2 eq "";
831 $configs{$name}{has_defn} //= $3 ne "";
832 $configs{$name}{has_defn} ||= $3 ne "";
833 }
834 }
835
836 # We now have stored the description and information about every #define
837 # in the file. The description is in a form convenient to operate on to
838 # convert to pod. Do that now.
839 foreach my $name (keys %configs) {
840 next unless defined $configs{$name}{description};
841
842 # All adjacent non-verbatim lines of the description are appended
843 # together in a single element in the array. This allows the patterns
844 # to work across input line boundaries.
845
846 my $pod = "";
847 while (defined ($_ = shift $configs{$name}{description}->@*)) {
848 chomp;
849
850 if (/ ^ \S /x) { # Don't edit verbatim lines
851
852 # Enclose known file/path names not already so enclosed
853 # with <...>. (Some entries in config.h are already
854 # '<path/to/file>')
855 my $file_name_qr = qr! [ \w / ]+ \.
856 (?: c | h | xs | p [lm] | pmc | PL
857 | sh | SH | exe ) \b
858 !xx;
859 my $path_name_qr = qr! (?: / \w+ )+ !x;
860 for my $re ($file_name_qr, $path_name_qr) {
861 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
862 }
863
864 # Enclose <... file/path names with F<...> (but no double
865 # angle brackets)
866 for my $re ($file_name_qr, $path_name_qr) {
867 s! < ( $re ) > !F<$1>!gxx;
868 }
869
870 # Explain metaconfig units
871 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
872
873 # Convert "See foo" to "See C<L</foo>>" if foo is described in
874 # this file. Also create a link to the known file INSTALL.
875 # And, to be more general, handle "See also foo and bar", and
876 # "See also foo, bar, and baz"
877 while (m/ \b [Ss]ee \s+
878 (?: also \s+ )? ( \w+ )
879 (?: , \s+ ( \w+ ) )?
880 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
881 my @links = $1;
882 push @links, $2 if defined $2;
883 push @links, $3 if defined $3;
884 foreach my $link (@links) {
885 if ($link eq 'INSTALL') {
886 s/ \b INSTALL \b /C<L<INSTALL>>/xg;
887 }
888 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
889 s| \b $link \b |C<L</$link>>|xg;
890 $configs{$link}{linked} = 1;
891 $configs{$name}{linked} = 1;
892 }
893 }
894 }
895
896 # Enclose what we think are symbols with C<...>.
897 no warnings 'experimental::vlb';
898 s/ (*nlb:<)
899 (
900 # Any word followed immediately with parens or
901 # brackets
902 \b \w+ (?: \( [^)]* \) # parameter list
903 | \[ [^]]* \] # or array reference
904 )
905 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols.
906 | \b (?: struct | union ) \s \w+
907
908 # Words that contain underscores (which are
909 # definitely not text) or three uppercase letters in
910 # a row. Length two ones, like IV, aren't enclosed,
911 # because they often don't look as nice.
912 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
913 )
914 (*nla:>)
915 /C<$1>/xg;
916
917 # These include foo when the name is HAS_foo. This is a
918 # heuristic which works in most cases.
919 if ($name =~ / ^ HAS_ (.*) /x) {
920 my $symbol = lc $1;
921
922 # Don't include path components, nor things already in
923 # <>, or with trailing '(', '['
924 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
925 }
926 }
927
928 $pod .= "$_\n";
929 }
930 delete $configs{$name}{description};
931
932 $configs{$name}{pod} = $pod;
933 }
934
935 # Now have converted the description to pod. We also now have enough
936 # information that we can do cross checking to find definitions without
937 # corresponding pod, and see if they are mentioned in some description;
938 # otherwise they aren't documented.
939 NAME:
940 foreach my $name (keys %configs) {
941
942 # A definition without pod
943 if (! defined $configs{$name}{pod}) {
944
945 # Leading/trailing underscore means internal to config.h, e.g.,
946 # _GNU_SOURCE
947 next if $name =~ / ^ _ /x;
948 next if $name =~ / _ $ /x;
949
950 # MiXeD case names are internal to config.h; the first 4
951 # characters are sufficient to determine this
952 next if $name =~ / ^ [[:upper:]] [[:lower:]]
953 [[:upper:]] [[:lower:]]
954 /x;
955
956 # Here, not internal to config.h. Look to see if this symbol is
957 # mentioned in the pod of some other. If so, assume it is
958 # documented.
959 foreach my $check_name (keys %configs) {
960 my $this_element = $configs{$check_name};
961 my $this_pod = $this_element->{pod};
962 if (defined $this_pod) {
963 next NAME if $this_pod =~ / \b $name \b /x;
964 }
965 }
966
967 warn "$name has no documentation\n";
968 $missing_macros{$name} = 'config.h';
969
970 next;
971 }
972
973 my $has_defn = $configs{$name}{has_defn};
974 my $has_args = $configs{$name}{has_args};
975
976 # Check if any section already has an entry for this element.
977 # If so, it better be a placeholder, in which case we replace it
978 # with this entry.
979 foreach my $section (keys $docs{'api'}->%*) {
980 if (exists $docs{'api'}{$section}{$name}) {
981 my $was = $docs{'api'}{$section}{$name}->{pod};
982 $was = "" unless $was;
983 chomp $was;
984 if ($was ne "" && $was !~ m/$link_text/) {
985 die "Multiple descriptions for $name\n"
986 . "$section contained '$was'";
987 }
988 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
989 $configs{$name}{section} = $section;
990 last;
991 }
992 }
993
994 my $handled = 0; # Haven't handled this yet
995
996 if (defined $configs{$name}{'section'}) {
997 # This has been taken care of elsewhere.
998 $handled = 1;
999 }
1000 else {
9e728159 1001 my $flags = "";
00d68f0d
KW
1002 if ($has_defn && ! $has_args) {
1003 $configs{$name}{args} = 1;
1004 }
1005
1006 # Symbols of the form I_FOO are for #include files. They have
1007 # special usage information
1008 if ($name =~ / ^ I_ ( .* ) /x) {
1009 my $file = lc $1 . '.h';
1010 $configs{$name}{usage} = <<~"EOT";
1011 #ifdef $name
1012 #include <$file>
1013 #endif
1014 EOT
1015 }
1016
1017 # Compute what section this variable should go into. This
1018 # heuristic was determined by manually inspecting the current
1019 # things in config.h, and should be adjusted as necessary as
1020 # deficiencies are found.
1021 #
1022 # This is the default section for macros with a definiton but
1023 # no arguments, meaning it is replaced unconditionally
1024 #
1025 my $sb = qr/ _ | \b /x; # segment boundary
1026 my $dash_or_spaces = qr/ - | \s+ /x;
1027 my $pod = $configs{$name}{pod};
1028 if ($name =~ / ^ USE_ /x) {
3951f999 1029 $configs{$name}{'section'} = $site_scn;
00d68f0d
KW
1030 }
1031 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
1032 {
3951f999 1033 $configs{$name}{'section'} = $time_scn;
00d68f0d 1034 }
c86b25ed
KW
1035 elsif ( $name =~ / ^ [[:alpha:]]+ f $ /x
1036 && $configs{$name}{pod} =~ m/ \b format \b /ix)
1037 {
a87f9c51 1038 $configs{$name}{'section'} = $io_formats_scn;
c86b25ed 1039 }
00d68f0d
KW
1040 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
1041 | $sb CASTFLAGS $sb
1042 | QUADMATH
1043 | $sb (?: IS )? NAN
1044 | $sb (?: IS )? FINITE
1045 /x)
1046 {
1047 $configs{$name}{'section'} =
3951f999 1048 $floating_scn;
00d68f0d
KW
1049 }
1050 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
3951f999 1051 $configs{$name}{'section'} = $filesystem_scn;
00d68f0d
KW
1052 }
1053 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
1054 || $configs{$name}{pod} =~ m/ \b align /x)
1055 {
3951f999 1056 $configs{$name}{'section'} = $compiler_scn;
00d68f0d
KW
1057 }
1058 elsif ($name =~ / ^ [IU] [ \d V ]
1059 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
1060 {
3951f999 1061 $configs{$name}{'section'} = $integer_scn;
00d68f0d
KW
1062 }
1063 elsif ($name =~ / $sb t $sb /x) {
3951f999 1064 $configs{$name}{'section'} = $typedefs_scn;
9e728159 1065 $flags .= 'y';
00d68f0d
KW
1066 }
1067 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
1068 && $configs{$name}{pod} =~ m/ \b format \b /ix)
1069 {
a87f9c51 1070 $configs{$name}{'section'} = $io_formats_scn;
00d68f0d
KW
1071 }
1072 elsif ($name =~ / BACKTRACE /x) {
d1b9805e 1073 $configs{$name}{'section'} = $debugging_scn;
00d68f0d
KW
1074 }
1075 elsif ($name =~ / ALLOC $sb /x) {
3951f999 1076 $configs{$name}{'section'} = $memory_scn;
00d68f0d
KW
1077 }
1078 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH
1079 | $sb FILE $sb
1080 | $sb DIR $sb
1081 | $sb LSEEK
1082 | $sb INO $sb
1083 | $sb OPEN
1084 | $sb CLOSE
1085 | ^ DIR
1086 | ^ INO $sb
1087 | DIR $
1088 | FILENAMES
1089 /x
1090 || $configs{$name}{pod} =~ m! I/O | stdio
1091 | file \s+ descriptor
1092 | file \s* system
1093 | statfs
1094 !x)
1095 {
3951f999 1096 $configs{$name}{'section'} = $filesystem_scn;
00d68f0d
KW
1097 }
1098 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
3951f999 1099 $configs{$name}{'section'} = $signals_scn;
00d68f0d
KW
1100 }
1101 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
3951f999 1102 $configs{$name}{'section'} = $prototypes_scn;
00d68f0d
KW
1103 }
1104 elsif ( $name =~ / ^ LOC_ /x
1105 || $configs{$name}{pod} =~ /full path/i)
1106 {
3951f999 1107 $configs{$name}{'section'} = $paths_scn;
00d68f0d
KW
1108 }
1109 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
3e294c2f 1110 $configs{$name}{'section'} = $locale_scn;
00d68f0d
KW
1111 }
1112 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) {
3951f999 1113 $configs{$name}{'section'} = $compiler_scn;
00d68f0d
KW
1114 }
1115 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
1116 {
3951f999 1117 $configs{$name}{'section'} = $password_scn;
00d68f0d
KW
1118 }
1119 elsif ($name =~ / SOCKET | $sb SOCK /x) {
3951f999 1120 $configs{$name}{'section'} = $sockets_scn;
00d68f0d
KW
1121 }
1122 elsif ( $name =~ / THREAD | MULTIPLICITY /x
1123 || $configs{$name}{pod} =~ m/ \b pthread /ix)
1124 {
3951f999 1125 $configs{$name}{'section'} = $concurrency_scn;
00d68f0d
KW
1126 }
1127 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN
1128 | VENDOR | ^ USE
1129 /x)
1130 {
3951f999 1131 $configs{$name}{'section'} = $site_scn;
00d68f0d
KW
1132 }
1133 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix
63447426
KW
1134 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
1135 || $pod =~ / \b doubles \b /ix
1136 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
00d68f0d
KW
1137 {
1138 $configs{$name}{'section'} =
3951f999 1139 $floating_scn;
00d68f0d
KW
1140 }
1141 else {
1142 # Above are the specific sections. The rest go into a
1143 # grab-bag of general configuration values. However, we put
1144 # two classes of them into lists of their names, without their
1145 # descriptions, when we think that the description doesn't add
1146 # any real value. One list contains the #include variables:
1147 # the description is basically boiler plate for each of these.
1148 # The other list contains the very many things that are of the
1149 # form HAS_foo, and \bfoo\b is contained in its description,
1150 # and there is no verbatim text in the pod or links to/from it
1151 # (which would add value). That means that it is likely the
1152 # intent of the variable can be gleaned from just its name,
1153 # and unlikely the description adds signficant value, so just
1154 # listing them suffices. Giving their descriptions would
1155 # expand this pod significantly with little added value.
1156 if ( ! $has_defn
1157 && ! $configs{$name}{verbatim}
1158 && ! $configs{$name}{linked})
1159 {
1160 if ($name =~ / ^ I_ ( .* ) /x) {
1161 push @include_defs, $name;
1162 next;
1163 }
1164 elsif ($name =~ / ^ HAS_ ( .* ) /x) {
1165 my $canonical_name = $1;
1166 $canonical_name =~ s/_//g;
1167
1168 my $canonical_pod = $configs{$name}{pod};
1169 $canonical_pod =~ s/_//g;
1170
1171 if ($canonical_pod =~ / \b $canonical_name \b /xi) {
1172 if ($name =~ / $sb R $sb /x) {
1173 push @has_r_defs, $name;
1174 }
1175 else {
1176 push @has_defs, $name;
1177 }
1178 next;
1179 }
1180 }
1181 }
1182
3951f999 1183 $configs{$name}{'section'} = $genconfig_scn;
00d68f0d
KW
1184 }
1185
1186 my $section = $configs{$name}{'section'};
6927cd44
KW
1187 die "Internal error: '$section' not in \%valid_sections"
1188 unless grep { $_ eq $section } keys %valid_sections;
9e728159 1189 $flags .= 'AdmnT';
00d68f0d 1190 $flags .= 'U' unless defined $configs{$name}{usage};
9e728159 1191
63447426 1192 # All the information has been gathered; save it
00d68f0d
KW
1193 $docs{'api'}{$section}{$name}{flags} = $flags;
1194 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1195 $docs{'api'}{$section}{$name}{ret_type} = "";
1196 $docs{'api'}{$section}{$name}{file} = 'config.h';
1197 $docs{'api'}{$section}{$name}{usage}
1198 = $configs{$name}{usage} if defined $configs{$name}{usage};
1199 push $docs{'api'}{$section}{$name}{args}->@*, ();
1200 push $docs{'api'}{$section}{$name}{items}->@*, ();
1201 }
1202 }
1203}
1204
c7a2af98
KW
1205sub format_pod_indexes($) {
1206 my $entries_ref = shift;
1207
1208 # Output the X<> references to the names, packed since they don't get
1209 # displayed, but not too many per line so that when someone is editing the
1210 # file, it doesn't run on
1211
1212 my $text ="";
1213 my $line_length = 0;
1214 for my $name (sort dictionary_order $entries_ref->@*) {
1215 my $entry = "X<$name>";
1216 my $entry_length = length $entry;
1217
1218 # Don't loop forever if we have a verrry long name, and don't go too
1219 # far to the right.
1220 if ($line_length > 0 && $line_length + $entry_length > $max_width) {
1221 $text .= "\n";
1222 $line_length = 0;
1223 }
1224
1225 $text .= $entry;
1226 $line_length += $entry_length;
1227 }
1228
1229 return $text;
1230}
1231
b63f8ed4 1232sub docout ($$$) { # output the docs for one function group
b1cb8a49 1233 my($fh, $element_name, $docref) = @_;
5bf02caf 1234
783469eb
KW
1235 # Trim trailing space
1236 $element_name =~ s/\s*$//;
1237
5bf02caf 1238 my $flags = $docref->{flags};
63da8140 1239 my $pod = $docref->{pod} // "";
5bf02caf 1240 my $file = $docref->{file};
783469eb 1241
183f6e59 1242 my @items = $docref->{items}->@*;
5bf02caf 1243
783469eb
KW
1244 # Make the main element the first of the items. This allows uniform
1245 # treatment below
1246 unshift @items, { name => $element_name,
1247 flags => $flags,
1248 ret_type => $docref->{ret_type},
1249 args => [ $docref->{args}->@* ],
1250 };
94bdecf9 1251
7a997878 1252 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
9639517b 1253
783469eb
KW
1254 print $fh "\n=over $description_indent\n";
1255 print $fh "\n=item C<$_->{name}>\n" for @items;
63da8140 1256
783469eb
KW
1257 # If we're printing only a link to an element, this isn't the major entry,
1258 # so no X<> here.
1259 if ($flags !~ /h/) {
1260 print $fh "X<$_->{name}>" for @items;
1261 print $fh "\n";
d4e99c76 1262 }
63da8140 1263
d1681de8
KW
1264 my @deprecated;
1265 my @experimental;
783469eb 1266 for my $item (@items) {
d1681de8
KW
1267 push @deprecated, "C<$item->{name}>" if $item->{flags} =~ /D/;
1268 push @experimental, "C<$item->{name}>" if $item->{flags} =~ /x/;
1269 }
54c193ae 1270
d1681de8
KW
1271 for my $which (\@deprecated, \@experimental) {
1272 if ($which->@*) {
1273 my $is;
1274 my $it;
1275 my $list;
1276
1277 if ($which->@* == 1) {
1278 $is = 'is';
1279 $it = 'it';
1280 $list = $which->[0];
1281 }
1282 elsif ($which->@* == @items) {
1283 $is = 'are';
1284 $it = 'them';
1285 $list = (@items == 2)
1286 ? "both forms"
1287 : "all these forms";
1288 }
1289 else {
1290 $is = 'are';
1291 $it = 'them';
1292 my $final = pop $which->@*;
1293 $list = "the " . join ", ", $which->@*;
1294 $list .= "," if $which->@* > 1;
1295 $list .= " and $final forms";
1296 }
54c193ae 1297
d1681de8
KW
1298 if ($which == \@deprecated) {
1299 print $fh <<~"EOT";
1300
1301 C<B<DEPRECATED!>> It is planned to remove $list
1302 from a future release of Perl. Do not use $it for
1303 new code; remove $it from existing code.
1304 EOT
1305 }
1306 else {
1307 print $fh <<~"EOT";
1308
1309 NOTE: $list $is B<experimental> and may change or be
1310 removed without notice.
1311 EOT
1312 }
783469eb 1313 }
54c193ae 1314 }
94bdecf9 1315
783469eb
KW
1316 chomp $pod; # Make sure prints pod with a single trailing \n
1317 print $fh "\n", $pod, "\n";
3cfa66c4 1318
783469eb
KW
1319 for my $item (@items) {
1320 my $item_flags = $item->{flags};
1321 my $item_name = $item->{name};
3cfa66c4 1322
783469eb
KW
1323 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1324 if $item_flags =~ /O/;
1325 # Is Perl_, but no #define foo # Perl_foo
e2d832c0 1326 if ( ($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
783469eb 1327
e2d832c0 1328 # Can't handle threaded varargs
05e20b67
KW
1329 || ( $item_flags =~ /f/
1330 && $item_flags !~ /T/
1331 && $item_name !~ /strftime/))
783469eb
KW
1332 {
1333 $item->{name} = "Perl_$item_name";
1334 print $fh <<~"EOT";
1335
1336 NOTE: C<$item_name> must be explicitly called as
1337 C<$item->{name}>
1338 EOT
1339 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1340 print $fh ".\n";
1341 }
1342 }
94bdecf9 1343
9e728159
KW
1344 if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1345 # to never warrant a usage line
3734d2f5
KW
1346 warn("U and ; flags are incompatible")
1347 if $flags =~ /U/ && $flags =~ /;/;
20046047 1348 # nothing
05ca4832 1349 } else {
783469eb
KW
1350
1351 print $fh "\n=over $usage_indent\n";
1352
00d68f0d
KW
1353 if (defined $docref->{usage}) { # An override of the usage section
1354 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1355 }
783469eb
KW
1356 else {
1357
8d1c4423
KW
1358 # Add the thread context formal parameter on expanded-out names
1359 for my $item (@items) {
1360 unshift $item->{args}->@*, (($item->{args}->@*)
1361 ? "pTHX_"
1362 : "pTHX")
1363 if $item->{flags} !~ /T/
1364 && $item->{name} =~ /^Perl_/;
1365 }
1366
783469eb 1367 # Look through all the items in this entry. If all have the same
4be56a7d
KW
1368 # return type and arguments (including thread context), only the
1369 # main entry is displayed.
783469eb
KW
1370 # Also, find the longest return type and longest name so that if
1371 # multiple ones are shown, they can be vertically aligned nicely
783469eb 1372 my $need_individual_usage = 0;
53c9fbf2 1373 my $longest_name_length = length $items[0]->{name};
783469eb 1374 my $base_ret_type = $items[0]->{ret_type};
53c9fbf2 1375 my $longest_ret = length $base_ret_type;
783469eb 1376 my @base_args = $items[0]->{args}->@*;
4be56a7d 1377 my $base_thread_context = $items[0]->{flags} =~ /T/;
53c9fbf2 1378 for (my $i = 1; $i < @items; $i++) {
53c9fbf2 1379 my $item = $items[$i];
56c95a7a
PBB
1380 my $args_are_equal = $item->{args}->@* == @base_args
1381 && !grep $item->{args}[$_] ne $base_args[$_], keys @base_args;
783469eb
KW
1382 $need_individual_usage = 1
1383 if $item->{ret_type} ne $base_ret_type
56c95a7a 1384 || ! $args_are_equal
4be56a7d
KW
1385 || ( $item->{flags} =~ /T/
1386 != $base_thread_context);
783469eb
KW
1387 my $ret_length = length $item->{ret_type};
1388 $longest_ret = $ret_length if $ret_length > $longest_ret;
1389 my $name_length = length $item->{name};
1390 $longest_name_length = $name_length
1391 if $name_length > $longest_name_length;
1ded1f42 1392 }
783469eb
KW
1393
1394 # If we're only showing one entry, only its length matters.
1395 $longest_name_length = length($items[0]->{name})
1396 unless $need_individual_usage;
1397 print $fh "\n";
1398
1399 my $indent = 1; # 1 is sufficient for verbatim; =over is used
1400 # for more
1401 my $ret_name_sep_length = 2; # spaces between return type and name
1402 my $name_indent = $indent + $longest_ret;
1403 $name_indent += $ret_name_sep_length if $longest_ret;
1404
ca711307
KW
1405 my $this_max_width =
1406 $max_width - $description_indent - $usage_indent;
1407
783469eb
KW
1408 for my $item (@items) {
1409 my $ret_type = $item->{ret_type};
1410 my @args = $item->{args}->@*;
1411 my $name = $item->{name};
1412 my $item_flags = $item->{flags};
1413
783469eb
KW
1414 # The return type
1415 print $fh (" " x $indent), $ret_type;
1416
1417 print $fh " " x ( $ret_name_sep_length
1418 + $longest_ret - length $ret_type);
1419 print $fh $name;
1420
1421 if ($item_flags =~ /n/) { # no args
1422 warn("$file: $element_name: n flag without m")
1423 unless $item_flags =~ /m/;
1424 warn("$file: $name: n flag but apparently has args")
1425 if @args;
1426 }
1427 else {
1428 # +1 for the '('
1429 my $arg_indent = $name_indent + $longest_name_length + 1;
1430
1431 # Align the argument lists of the items
1432 print $fh " " x ($longest_name_length - length($name));
1433 print $fh "(";
1434
1435 # Display as many of the arguments on the same line as
1436 # will fit.
1437 my $total_length = $arg_indent;
1438 my $first_line = 1;
1439 for (my $i = 0; $i < @args; $i++) {
1440 my $arg = $args[$i];
1441 my $arg_length = length($arg);
1442
1443 # All but the first arg are preceded by a blank
1444 my $use_blank = $i > 0;
1445
1446 # +1 here and below because either the argument has a
1447 # trailing comma or trailing ')'
1448 $total_length += $arg_length + $use_blank + 1;
1449
1450 # We want none of the arguments to be positioned so
1451 # they extend too far to the right. Ideally, they
1452 # should all start in the same column as the arguments
1453 # on the first line of the function display do. But, if
1454 # necessary, outdent them so that they all start in
1455 # another column, with the longest ending at the right
1456 # margin, like so:
1457 # void function_name(pTHX_ short1,
1458 # short2,
1459 # very_long_argument,
1460 # short3)
ca711307 1461 if ($total_length > $this_max_width) {
783469eb
KW
1462
1463 # If this is the first continuation line,
1464 # calculate the longest argument; this will be the
1465 # one we may have to outdent for.
1466 if ($first_line) {
1467 $first_line = 0;
1468
1469 # We will need at least as much as the current
1470 # argument
1471 my $longest_arg_length = $arg_length
1472 + $use_blank + 1;
1473
1474 # Look through the rest of the args to see if
1475 # any are longer than this one.
1476 for (my $j = $i + 1; $j < @args; $j++) {
1477
1478 # Include the trailing ',' or ')' in the
1479 # length. No need to concern ourselves
1480 # with a leading blank, as the argument
1481 # would be positioned first on the next
1482 # line
1483 my $peek_arg_length = length ($args[$j])
1484 + 1;
1485 $longest_arg_length = $peek_arg_length
1486 if $peek_arg_length > $longest_arg_length;
1487 }
1488
1489 # Calculate the new indent if necessary.
ca711307
KW
1490 $arg_indent =
1491 $this_max_width - $longest_arg_length
783469eb 1492 if $arg_indent + $longest_arg_length
ca711307 1493 > $this_max_width;
783469eb
KW
1494 }
1495
1496 print $fh "\n", (" " x $arg_indent);
1497 $total_length = $arg_indent + $arg_length + 1;
1498 $use_blank = 0;
1499 }
1500
1501 # Display this argument
1502 print $fh " " if $use_blank;
1503 print $fh $arg;
1504 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1505
1506 } # End of loop through args
1507
1508 print $fh ")";
1ded1f42 1509 }
783469eb 1510
3734d2f5 1511 print $fh ";" if $item_flags =~ /;/; # semicolon: "dTHR;"
783469eb
KW
1512 print $fh "\n";
1513
1514 # Only the first entry is normally displayed
1515 last unless $need_individual_usage;
1ded1f42 1516 }
1ded1f42 1517 }
783469eb
KW
1518
1519 print $fh "\n=back\n";
94bdecf9 1520 }
783469eb
KW
1521
1522 print $fh "\n=back\n";
63da8140 1523 print $fh "\n=for hackers\nFound in file $file\n";
94bdecf9
JH
1524}
1525
02c5183e 1526sub construct_missings_section {
cda0e7fb 1527 my ($missings_hdr, $missings_ref) = @_;
02c5183e
KW
1528 my $text = "";
1529
cda0e7fb 1530 $text .= "$missings_hdr\n" . format_pod_indexes($missings_ref);
02c5183e 1531
cda0e7fb
KW
1532 if ($missings_ref->@* == 0) {
1533 return $text . "\nThere are currently no items of this type\n";
1534 }
e4f5ddf4
KW
1535
1536 # Sort the elements.
1537 my @missings = sort dictionary_order $missings_ref->@*;
1538
e4f5ddf4 1539
c7a2af98 1540 $text .= "\n";
e4f5ddf4
KW
1541
1542 use integer;
1543
1544 # Look through all the elements in the list and see how many columns we
1545 # could place them in the output what will fit in the available width.
1546 my $min_spacer = 2; # Need this much space between columns
1547 my $columns;
1548 my $rows;
1549 my @col_widths;
1550
1551 COLUMN:
1552 # We start with more columns, and work down until we find a number that
1553 # can accommodate all the data. This algorithm doesn't require the
1554 # resulting columns to all have the same width. This can allow for
1555 # as tight of packing as the data will possibly allow.
d788e675 1556 for ($columns = 7; $columns >= 1; $columns--) {
e4f5ddf4
KW
1557
1558 # For this many columns, we will need this many rows (final row might
1559 # not be completely filled)
1560 $rows = (@missings + $columns - 1) / $columns;
1561
d788e675
KW
1562 # We only need to execute this final iteration to calculate the number
1563 # of rows, as we can't get fewer than a single column.
1564 last if $columns == 1;
1565
9d663ab6 1566 my $row_width = 1; # For 1 space indent
e4f5ddf4
KW
1567 my $i = 0; # Which missing element
1568
1569 # For each column ...
1570 for my $col (0 .. $columns - 1) {
1571
1572 # Calculate how wide the column needs to be, which is based on the
1573 # widest element in it
1574 $col_widths[$col] = 0;
1575
1576 # Look through all the rows to find the widest element
1577 for my $row (0 .. $rows - 1) {
1578
1579 # Skip if this row doesn't have an entry for this column
1580 last if $i >= @missings;
1581
1582 # This entry occupies this many bytes.
1583 my $this_width = length $missings[$i];
1584
1585 # All but the final column need a spacer between it and the
1586 # next column over.
1587 $this_width += $min_spacer if $col < $columns - 1;
1588
1589
1590 # This column will need to have enough width to accommodate
1591 # this element
1592 if ($this_width > $col_widths[$col]) {
1593
1594 # We can't have this many columns if the total width
1595 # exceeds the available; bail now and try fewer columns
1596 next COLUMN if $row_width + $this_width > $max_width;
1597
1598 $col_widths[$col] = $this_width;
1599 }
1600
1601 $i++; # The next row will contain the next item
1602 }
1603
1604 $row_width += $col_widths[$col];
1605 next COLUMN if $row_width > $max_width;
1606 }
1607
1608 # If we get this far, this many columns works
1609 last;
1610 }
1611
1612 # Here, have calculated the number of rows ($rows) and columns ($columns)
1613 # required to list the elements. @col_widths contains the width of each
1614 # column.
1615
fa5d437b 1616 $text .= "\n";
e4f5ddf4
KW
1617
1618 # Assemble the output
1619 for my $row (0 .. $rows - 1) {
1620 for my $col (0 .. $columns - 1) {
1621 $text .= " " if $col == 0; # Indent one to mark as verbatim
1622
1623 my $index = $row + $rows * $col; # Convert 2 dimensions to 1
1624
1625 # Skip if this row doesn't have an entry for this column
1626 next if $index >= @missings;
1627
1628 my $element = $missings[$index];
1629 $text .= $element;
1630
1631 # Add alignment spaces for all but final column
1632 $text .= " " x ($col_widths[$col] - length $element)
1633 if $col < $columns - 1;
1634 }
02c5183e 1635
e4f5ddf4 1636 $text .= "\n"; # End of row
02c5183e
KW
1637 }
1638
02c5183e
KW
1639 return $text;
1640}
1641
1642sub dictionary_order {
431ea43d
KW
1643 # Do a case-insensitive dictionary sort, falling back in stages to using
1644 # everything for determinancy. The initial comparison ignores
1645 # all non-word characters and non-trailing underscores and digits, with
1646 # trailing ones collating to after any other characters. This collation
1647 # order continues in case tie breakers are needed; sequences of digits
1648 # that do get looked at always compare numerically. The first tie
1649 # breaker takes all digits and underscores into account. The next tie
1650 # breaker uses a caseless character-by-character comparison of everything
1651 # (including non-word characters). Finally is a cased comparison.
1652 #
1653 # This gives intuitive results, but obviously could be tweaked.
1654
1655 no warnings 'non_unicode';
1656
1657 local $a = $a;
1658 local $b = $b;
1659
1660 # Convert all digit sequences to same length with leading zeros, so for
1661 # example, 8 will compare less than 16 (using a fill length value that
1662 # should be longer than any sequence in the input).
1663 $a =~ s/(\d+)/sprintf "%06d", $1/ge;
1664 $b =~ s/(\d+)/sprintf "%06d", $1/ge;
1665
1666 # Translate any underscores and digits so they compare after all Unicode
1667 # characters
1668 $a =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1669 $b =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1670
1671 use feature 'state';
1672 # Modify \w, \W to reflect the changes.
1673 state $ud = '\x{110000}-\x{11000A}'; # xlated underscore, digits
1674 state $w = "\\w$ud"; # new \w string
1675 state $mod_w = qr/[$w]/;
1676 state $mod_W = qr/[^$w]/;
1677
1678 # Only \w for initial comparison
1679 my $a_only_word = uc($a =~ s/$mod_W//gr);
1680 my $b_only_word = uc($b =~ s/$mod_W//gr);
1681
1682 # And not initial nor interior underscores nor digits (by squeezing them
1683 # out)
1684 my $a_stripped = $a_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1685 my $b_stripped = $b_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1686
1687 # If the stripped versions differ, use that as the comparison.
1688 my $cmp = $a_stripped cmp $b_stripped;
1689 return $cmp if $cmp;
1690
1691 # For the first tie breaker, repeat, but consider initial and interior
1692 # underscores and digits, again having those compare after all Unicode
1693 # characters
1694 $cmp = $a_only_word cmp $b_only_word;
1695 return $cmp if $cmp;
1696
1697 # Next tie breaker is just a caseless comparison
1698 $cmp = uc($a) cmp uc($b);
1699 return $cmp if $cmp;
1700
1701 # Finally a straight comparison
1702 return $a cmp $b;
f83c6033
KW
1703}
1704
7b73ff98 1705sub output {
cda0e7fb 1706 my ($podname, $header, $dochash, $footer, @missings_refs) = @_;
6a4c4cd4
DM
1707 #
1708 # strip leading '|' from each line which had been used to hide
1709 # pod from pod checkers.
cda0e7fb 1710 s/^\|//gm for $header, $footer, @missings_refs;
6a4c4cd4 1711
7882b24a 1712 my $fh = open_new("pod/$podname.pod", undef,
20046047 1713 {by => "$0 extracting documentation",
f1f44974 1714 from => 'the C source files'}, 1);
e0492643 1715
63da8140 1716 print $fh $header, "\n";
e0492643 1717
02c5183e 1718 for my $section_name (sort dictionary_order keys %valid_sections) {
7a997878 1719 my $section_info = $dochash->{$section_name};
36ea2d2f 1720
5781dcac
KW
1721 # We allow empty sections in perlintern.
1722 if (! $section_info && $podname eq 'perlapi') {
36ea2d2f 1723 warn "Empty section '$section_name'; skipped";
36ea2d2f
KW
1724 next;
1725 }
1726
63da8140
KW
1727 print $fh "\n=head1 $section_name\n";
1728
2aa73c64 1729 if ($section_info->{X_tags}) {
52a83b1c 1730 print $fh "X<$_>" for sort keys $section_info->{X_tags}->%*;
2aa73c64
KW
1731 print $fh "\n";
1732 delete $section_info->{X_tags};
1733 }
1734
5781dcac
KW
1735 if ($podname eq 'perlapi') {
1736 print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1737 if defined $valid_sections{$section_name}{header};
151c3fe5 1738
5781dcac
KW
1739 # Output any heading-level documentation and delete so won't get in
1740 # the way later
1741 if (exists $section_info->{""}) {
1742 print $fh "\n", $section_info->{""}, "\n";
1743 delete $section_info->{""};
1744 }
151c3fe5 1745 }
151c3fe5 1746
74ab7b79 1747 if ($section_info && keys $section_info->%*) {
02c5183e 1748 for my $function_name (sort dictionary_order keys %$section_info) {
5781dcac
KW
1749 docout($fh, $function_name, $section_info->{$function_name});
1750 }
1751 }
1752 else {
2aa73c64
KW
1753 my $pod_type = ($podname eq 'api') ? "public" : "internal";
1754 print $fh "\nThere are currently no $pod_type API items in ",
1755 $section_name, "\n";
20046047 1756 }
00b0fb83
KW
1757
1758 print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
5781dcac
KW
1759 if $podname eq 'perlapi'
1760 && defined $valid_sections{$section_name}{footer};
7b73ff98
NC
1761 }
1762
cda0e7fb
KW
1763
1764 my $first_time = 1;
1765 while (1) {
1766 my $missings_hdr = shift @missings_refs or last;
1767 my $missings_ref = shift @missings_refs or die "Foo";
1768
1769 if ($first_time) {
1770 $first_time = 0;
1771 print $fh <<~EOT;
1772
1773 =head1 $undocumented_scn
1774
1775 EOT
1776 }
1777
1778 print $fh construct_missings_section($missings_hdr, $missings_ref);
1779 }
63da8140
KW
1780
1781 print $fh "\n$footer\n=cut\n";
5a0155e6 1782
7882b24a 1783 read_only_bottom_close_and_rename($fh);
cd093254
MM
1784}
1785
e8e591c9 1786foreach (@{(setup_embed())[0]}) {
69e2d40a
YO
1787 my $embed= $_->{embed}
1788 or next;
1789 my ($flags, $ret_type, $func, $args) = @{$embed}{qw(flags return_type name args)};
1790 my @munged_args= @$args;
1791 s/\b(?:NN|NULLOK)\b\s+//g for @munged_args;
bc350081 1792
5ce57792 1793 $funcflags{$func} = {
20046047 1794 flags => $flags,
b1cb8a49 1795 ret_type => $ret_type,
69e2d40a 1796 args => \@munged_args,
20046047 1797 };
5ce57792
NC
1798}
1799
5ce57792
NC
1800# glob() picks up docs from extra .c or .h files that may be in unclean
1801# development trees.
741c0772
NC
1802open my $fh, '<', 'MANIFEST'
1803 or die "Can't open MANIFEST: $!";
1804while (my $line = <$fh>) {
b87d9527 1805 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
5ce57792 1806
e10306bf
KW
1807 # Don't pick up pods from these.
1808 next if $file =~ m! ^ ( cpan | dist | ext ) / !x
1809 && ! defined $extra_input_pods{$file};
7cb0b6c4 1810
1ae6ead9 1811 open F, '<', $file or die "Cannot open $file for docs: $!\n";
5ce57792
NC
1812 autodoc(\*F,$file);
1813 close F or die "Error closing $file: $!\n";
1814}
741c0772 1815close $fh or die "Error whilst reading MANIFEST: $!";
5ce57792 1816
00d68f0d
KW
1817parse_config_h();
1818
5ce57792
NC
1819for (sort keys %funcflags) {
1820 next unless $funcflags{$_}{flags} =~ /d/;
6523e108 1821 next if $funcflags{$_}{flags} =~ /h/;
573cf2be 1822 warn "no docs for $_\n";
bc350081 1823}
94bdecf9 1824
5ce57792 1825foreach (sort keys %missing) {
5ce57792 1826 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
94bdecf9
JH
1827}
1828
ff5af78d
KW
1829# List of funcs in the public API that aren't also marked as core-only,
1830# experimental nor deprecated.
cda0e7fb
KW
1831
1832my @undocumented_api = grep { $funcflags{$_}{flags} =~ /A/
1833 && ! $docs{api}{$_}
1834 } keys %funcflags;
1835my @undocumented_intern = grep { $funcflags{$_}{flags} !~ /[AS]/
1836 && ! $docs{intern}{$_}
1837 } keys %funcflags;
1838my @undocumented_deprecated_api = grep { $funcflags{$_}{flags} =~ /D/ }
1839 @undocumented_api;
1840my @undocumented_deprecated_intern = grep { $funcflags{$_}{flags} =~ /D/ }
1841 @undocumented_intern;
1842my @undocumented_experimental_api = grep { $funcflags{$_}{flags} =~ /x/ }
1843 @undocumented_api;
1844my @undocumented_experimental_intern = grep { $funcflags{$_}{flags} =~ /x/ }
1845 @undocumented_intern;
1846my @missing_api = grep { $funcflags{$_}{flags} !~ /[xD]/ } @undocumented_api;
00d68f0d 1847push @missing_api, keys %missing_macros;
a026e430 1848
cda0e7fb
KW
1849my @missing_intern = grep { $funcflags{$_}{flags} !~ /[xD]/ }
1850 @undocumented_intern;
1851
e10306bf 1852my @other_places = ( qw(perlclib ), keys %described_elsewhere );
ea7c1d7a
KW
1853my $places_other_than_intern = join ", ",
1854 map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
1855my $places_other_than_api = join ", ",
1856 map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
a026e430 1857
00d68f0d 1858# The S< > makes things less densely packed, hence more readable
02c5183e
KW
1859my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
1860my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs;
3951f999
KW
1861$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1862$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
00d68f0d 1863
02c5183e 1864my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
3951f999 1865$valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
00d68f0d 1866
55d5a9fe
KW
1867my $section_list = join "\n\n", map { "=item L</$_>" }
1868 sort(dictionary_order keys %valid_sections),
1869 $undocumented_scn; # Keep last
7260d4ba 1870
cda0e7fb
KW
1871# Leading '|' is to hide these lines from pod checkers. khw is unsure if this
1872# is still needed.
1873my $api_hdr = <<"_EOB_";
6a4c4cd4
DM
1874|=encoding UTF-8
1875|
1876|=head1 NAME
1877|
1878|perlapi - autogenerated documentation for the perl public API
1879|
1880|=head1 DESCRIPTION
1881|X<Perl API> X<API> X<api>
1882|
b87d9527
KW
1883|This file contains most of the documentation of the perl public API, as
1884|generated by F<embed.pl>. Specifically, it is a listing of functions,
a026e430
KW
1885|macros, flags, and variables that may be used by extension writers. Besides
1886|L<perlintern> and F<config.h>, some items are listed here as being actually
1887|documented in another pod.
b87d9527 1888|
55d5a9fe 1889|L<At the end|/$undocumented_scn> is a list of functions which have yet
b87d9527
KW
1890|to be documented. Patches welcome! The interfaces of these are subject to
1891|change without notice.
1892|
6927cd44
KW
1893|Some of the functions documented here are consolidated so that a single entry
1894|serves for multiple functions which all do basically the same thing, but have
1895|some slight differences. For example, one form might process magic, while
1896|another doesn't. The name of each variation is listed at the top of the
1897|single entry. But if all have the same signature (arguments and return type)
1898|except for their names, only the usage for the base form is shown. If any
1899|one of the forms has a different signature (such as returning C<const> or
1900|not) every function's signature is explicitly displayed.
7260d4ba 1901|
a026e430
KW
1902|Anything not listed here or in the other mentioned pods is not part of the
1903|public API, and should not be used by extension writers at all. For these
1904|reasons, blindly using functions listed in F<proto.h> is to be avoided when
1905|writing extensions.
6a4c4cd4
DM
1906|
1907|In Perl, unlike C, a string of characters may generally contain embedded
1908|C<NUL> characters. Sometimes in the documentation a Perl string is referred
1909|to as a "buffer" to distinguish it from a C string, but sometimes they are
1910|both just referred to as strings.
1911|
1912|Note that all Perl API global variables must be referenced with the C<PL_>
1913|prefix. Again, those not listed here are not to be used by extension writers,
c5321c96 1914|and may be changed or removed without notice; same with macros.
6a4c4cd4
DM
1915|Some macros are provided for compatibility with the older,
1916|unadorned names, but this support may be disabled in a future release.
1917|
1918|Perl was originally written to handle US-ASCII only (that is characters
1919|whose ordinal numbers are in the range 0 - 127).
1920|And documentation and comments may still use the term ASCII, when
1921|sometimes in fact the entire range from 0 - 255 is meant.
1922|
1923|The non-ASCII characters below 256 can have various meanings, depending on
1924|various things. (See, most notably, L<perllocale>.) But usually the whole
1925|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
1926|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
1927|"Latin1" as referring just to the characters in the range 128 through 255, or
2de859ce 1928|sometimes from 160 through 255.
6a4c4cd4
DM
1929|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1930|
1931|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1932|L<perlebcdic>). Most of the documentation (and even comments in the code)
1933|ignore the EBCDIC possibility.
1934|For almost all purposes the differences are transparent.
1935|As an example, under EBCDIC,
1936|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1937|whenever this documentation refers to C<utf8>
1938|(and variants of that name, including in function names),
1939|it also (essentially transparently) means C<UTF-EBCDIC>.
1940|But the ordinals of characters differ between ASCII, EBCDIC, and
1941|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1942|number of bytes than in UTF-8.
1943|
6927cd44
KW
1944|The organization of this document is tentative and subject to change.
1945|Suggestions and patches welcome
1946|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1947|
1948|The sections in this document currently are
1949|
014f1aa3 1950|=over $standard_indent
6927cd44
KW
1951
1952|$section_list
1953|
1954|=back
1955|
6a4c4cd4 1956|The listing below is alphabetical, case insensitive.
94bdecf9 1957_EOB_
cda0e7fb
KW
1958
1959my $api_footer = <<"_EOE_";
6a4c4cd4
DM
1960|=head1 AUTHORS
1961|
1962|Until May 1997, this document was maintained by Jeff Okamoto
7b1f0a98 1963|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
6a4c4cd4
DM
1964|
1965|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1966|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1967|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1968|Stephen McCamant, and Gurusamy Sarathy.
1969|
7b1f0a98 1970|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
6a4c4cd4
DM
1971|
1972|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1973|
1974|=head1 SEE ALSO
1975|
ea7c1d7a 1976|F<config.h>, $places_other_than_api
94bdecf9
JH
1977_EOE_
1978
cda0e7fb
KW
1979my $api_missings_hdr = <<'_EOT_';
1980|The following functions have been flagged as part of the public
1981|API, but are currently undocumented. Use them at your own risk,
1982|as the interfaces are subject to change. Functions that are not
1983|listed in this document are not intended for public use, and
1984|should NOT be used under any circumstances.
1985|
1986|If you feel you need to use one of these functions, first send
1987|email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
1988|It may be that there is a good reason for the function not being
1989|documented, and it should be removed from this list; or it may
1990|just be that no one has gotten around to documenting it. In the
1991|latter case, you will be asked to submit a patch to document the
1992|function. Once your patch is accepted, it will indicate that the
1993|interface is stable (unless it is explicitly marked otherwise) and
1994|usable by you.
1995_EOT_
1996
1997my $api_experimental_hdr = <<"_EOT_";
1998|
1999|Next are the API-flagged elements that are considered experimental. Using one
2000|of these is even more risky than plain undocumented ones. They are listed
2001|here because they should be listed somewhere (so their existence doesn't get
2002|lost) and this is the best place for them.
2003_EOT_
2004
2005my $api_deprecated_hdr = <<"_EOT_";
2006|
2007|Finally are deprecated undocumented API elements.
2008|Do not use any for new code; remove all occurrences of all of these from
2009|existing code.
2010_EOT_
2011
2012output('perlapi', $api_hdr, $docs{api}, $api_footer,
2013 $api_missings_hdr, \@missing_api,
2014 $api_experimental_hdr, \@undocumented_experimental_api,
2015 $api_deprecated_hdr, \@undocumented_deprecated_api);
5a0155e6 2016
cda0e7fb 2017my $intern_hdr = <<"_EOB_";
6a4c4cd4
DM
2018|=head1 NAME
2019|
2020|perlintern - autogenerated documentation of purely B<internal>
20046047 2021|Perl functions
6a4c4cd4
DM
2022|
2023|=head1 DESCRIPTION
2024|X<internal Perl functions> X<interpreter functions>
2025|
2026|This file is the autogenerated documentation of functions in the
2027|Perl interpreter that are documented using Perl's internal documentation
2028|format but are not marked as part of the Perl API. In other words,
2029|B<they are not for use in extensions>!
5781dcac
KW
2030
2031|It has the same sections as L<perlapi>, though some may be empty.
6a4c4cd4 2032|
7b1f0a98 2033_EOB_
cda0e7fb
KW
2034
2035my $intern_footer = <<"_EOE_";
6a4c4cd4
DM
2036|
2037|=head1 AUTHORS
2038|
2039|The autodocumentation system was originally added to the Perl core by
2040|Benjamin Stuhl. Documentation is by whoever was kind enough to
2041|document their functions.
2042|
2043|=head1 SEE ALSO
2044|
ea7c1d7a 2045|F<config.h>, $places_other_than_intern
7b1f0a98 2046_EOE_
cda0e7fb
KW
2047
2048my $intern_missings_hdr = <<"_EOT_";
2049|
2050|This section lists the elements that are otherwise undocumented. If you use
2051|any of them, please consider creating and submitting documentation for it.
2052|
2053|Experimental and deprecated undocumented elements are listed separately at the
2054|end.
2055|
2056_EOT_
2057
2058my $intern_experimental_hdr = <<"_EOT_";
2059|
2060|Next are the experimental undocumented elements
2061|
2062_EOT_
2063
2064my $intern_deprecated_hdr = <<"_EOT_";
2065|
2066|Finally are the deprecated undocumented elements.
2067|Do not use any for new code; remove all occurrences of all of these from
2068|existing code.
2069|
2070_EOT_
2071
2072output('perlintern', $intern_hdr, $docs{intern}, $intern_footer,
2073 $intern_missings_hdr, \@missing_intern,
2074 $intern_experimental_hdr, \@undocumented_experimental_intern,
2075 $intern_deprecated_hdr, \@undocumented_deprecated_intern
2076 );