This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Promote v5.36 usage and feature bundles doc
[perl5.git] / autodoc.pl
CommitLineData
94bdecf9 1#!/usr/bin/perl -w
00d68f0d
KW
2
3use Text::Tabs;
a9a7c81e 4#
6294c161
DM
5# Unconditionally regenerate:
6#
7# pod/perlintern.pod
8# pod/perlapi.pod
9#
10# from information stored in
11#
12# embed.fnc
9b983db7 13# plus all the core .c, .h, and .pod files listed in MANIFEST
e10306bf
KW
14# plus %extra_input_pods
15
16my %extra_input_pods = ( 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => 1 );
17
6294c161 18# Has an optional arg, which is the directory to chdir to before reading
9b983db7 19# MANIFEST and the files
6294c161 20#
52a9d53b 21# This script is invoked as part of 'make all'
151c3fe5 22#
9b983db7
KW
23# The generated pod consists of sections of related elements, functions,
24# macros, and variables. The keys of %valid_sections give the current legal
25# ones. Just add a new key to add a section.
26#
27# Throughout the files read by this script are lines like
28#
29# =for apidoc_section Section Name
3f620621 30# =for apidoc_section $section_name_variable
9b983db7
KW
31#
32# "Section Name" (after having been stripped of leading space) must be one of
3f620621
KW
33# the legal section names, or an error is thrown. $section_name_variable must
34# be one of the legal section name variables defined below; these expand to
35# legal section names. This form is used so that minor wording changes in
1e45e087 36# these titles can be confined to this file. All the names of the variables
3f620621
KW
37# end in '_scn'; this suffix is optional in the apidoc_section lines.
38#
39# All API elements defined between this line and the next 'apidoc_section'
40# line will go into the section "Section Name" (or $section_name_variable),
41# sorted by dictionary order within it. perlintern and perlapi are parallel
42# documents, each potentially with a section "Section Name". Each element is
43# marked as to which document it goes into. If there are none for a
44# particular section in perlapi, that section is omitted.
9b983db7
KW
45#
46# Also, in .[ch] files, there may be
47#
48# =head1 Section Name
49#
50# lines in comments. These are also used by this program to switch to section
51# "Section Name". The difference is that if there are any lines after the
52# =head1, inside the same comment, and before any =for apidoc-ish lines, they
53# are used as a heading for section "Section Name" (in both perlintern and
54# perlapi). This includes any =head[2-5]. If more than one '=head1 Section
55# Name' line has content, they appear in the generated pod in an undefined
3f620621 56# order. Note that you can't use a $section_name_variable in =head1 lines
9b983db7
KW
57#
58# The next =head1, =for apidoc_section, or file end terminates what goes into
59# the current section
60#
61# The %valid_sections hash below also can have header content, which will
62# appear before any =head1 content. The hash can also have footer content
63# content, which will appear at the end of the section, after all the
64# elements.
65#
66# The lines that define the actual functions, etc are documented in embed.fnc,
67# because they have flags which must be kept in sync with that file.
1fcde0e9 68
56a0c332 69use strict;
04adaf08 70use warnings;
a64c954a 71
fa5d437b
KW
72my $nroff_min_indent = 4; # for non-heading lines
73# 80 column terminal - 2 for pager adding 2 columns;
74my $max_width = 80 - 2 - $nroff_min_indent;
75my $standard_indent = 4; # Any additional indentations
e4f5ddf4 76
7882b24a
NC
77if (@ARGV) {
78 my $workdir = shift;
79 chdir $workdir
80 or die "Couldn't chdir to '$workdir': $!";
81}
3d7c117d
MB
82require './regen/regen_lib.pl';
83require './regen/embed_lib.pl';
7882b24a 84
a026e430 85my %described_elsewhere;
b87d9527 86
94bdecf9 87#
346f75ff 88# See database of global and static function prototypes in embed.fnc
94bdecf9
JH
89# This is used to generate prototype headers under various configurations,
90# export symbols lists for different platforms, and macros to provide an
91# implicit interpreter context argument.
92#
93
6a235718 94my %docs;
df6bd76f 95my %seen;
5ce57792 96my %funcflags;
5ce57792 97my %missing;
00d68f0d
KW
98my %missing_macros;
99
100my $link_text = "Described in";
101
783469eb
KW
102my $description_indent = 4;
103my $usage_indent = 3; # + initial blank yields 4 total
104
3e294c2f 105my $AV_scn = 'AV Handling';
3951f999
KW
106my $callback_scn = 'Callback Functions';
107my $casting_scn = 'Casting';
108my $casing_scn = 'Character case changing';
109my $classification_scn = 'Character classification';
110my $names_scn = 'Character names';
111my $scope_scn = 'Compile-time scope hooks';
112my $compiler_scn = 'Compiler and Preprocessor information';
113my $directives_scn = 'Compiler directives';
114my $concurrency_scn = 'Concurrency';
240d081e 115my $COP_scn = 'COPs and Hint Hashes';
3951f999
KW
116my $CV_scn = 'CV Handling';
117my $custom_scn = 'Custom Operators';
d1b9805e
KW
118my $debugging_scn = 'Debugging';
119my $display_scn = 'Display functions';
9b1c4c45 120my $embedding_scn = 'Embedding, Threads, and Interpreter Cloning';
3951f999
KW
121my $errno_scn = 'Errno';
122my $exceptions_scn = 'Exception Handling (simple) Macros';
123my $filesystem_scn = 'Filesystem configuration values';
b6f27e58 124my $filters_scn = 'Source Filters';
baa5caa0 125my $floating_scn = 'Floating point';
3951f999
KW
126my $genconfig_scn = 'General Configuration';
127my $globals_scn = 'Global Variables';
f3d866f2 128my $GV_scn = 'GV Handling and Stashes';
3951f999
KW
129my $hook_scn = 'Hook manipulation';
130my $HV_scn = 'HV Handling';
131my $io_scn = 'Input/Output';
a87f9c51 132my $io_formats_scn = 'I/O Formats';
baa5caa0 133my $integer_scn = 'Integer';
3951f999 134my $lexer_scn = 'Lexer interface';
3e294c2f 135my $locale_scn = 'Locales';
3951f999
KW
136my $magic_scn = 'Magic';
137my $memory_scn = 'Memory Management';
3e294c2f 138my $MRO_scn = 'MRO';
3951f999
KW
139my $multicall_scn = 'Multicall Functions';
140my $numeric_scn = 'Numeric Functions';
72957953
KW
141
142# Now combined, as unclear which functions go where, but separate names kept
143# to avoid 1) other code changes; 2) in case it seems better to split again
144my $optrees_scn = 'Optrees';
145my $optree_construction_scn = $optrees_scn; # Was 'Optree construction';
146my $optree_manipulation_scn = $optrees_scn; # Was 'Optree Manipulation Functions'
3951f999
KW
147my $pack_scn = 'Pack and Unpack';
148my $pad_scn = 'Pad Data Structures';
149my $password_scn = 'Password and Group access';
6ef63541 150my $reports_scn = 'Reports and Formats';
3951f999 151my $paths_scn = 'Paths to system commands';
3951f999
KW
152my $prototypes_scn = 'Prototype information';
153my $regexp_scn = 'REGEXP Functions';
154my $signals_scn = 'Signals';
155my $site_scn = 'Site configuration';
156my $sockets_scn = 'Sockets configuration values';
3951f999
KW
157my $stack_scn = 'Stack Manipulation Macros';
158my $string_scn = 'String Handling';
159my $SV_flags_scn = 'SV Flags';
160my $SV_scn = 'SV Handling';
d8e799d8 161my $tainting_scn = 'Tainting';
3951f999
KW
162my $time_scn = 'Time';
163my $typedefs_scn = 'Typedef names';
164my $unicode_scn = 'Unicode Support';
165my $utility_scn = 'Utility Functions';
166my $versioning_scn = 'Versioning';
167my $warning_scn = 'Warning and Dieing';
3e294c2f 168my $XS_scn = 'XS';
3951f999 169
55d5a9fe
KW
170# Kept separate at end
171my $undocumented_scn = 'Undocumented elements';
172
00d68f0d 173my %valid_sections = (
3e294c2f 174 $AV_scn => {},
3951f999
KW
175 $callback_scn => {},
176 $casting_scn => {},
177 $casing_scn => {},
178 $classification_scn => {},
179 $scope_scn => {},
180 $compiler_scn => {},
181 $directives_scn => {},
182 $concurrency_scn => {},
183 $COP_scn => {},
89222628
KW
184 $CV_scn => {
185 header => <<~'EOT',
186 This section documents functions to manipulate CVs which are
187 code-values, meaning subroutines. For more information, see
188 L<perlguts>.
189 EOT
190 },
191
3951f999 192 $custom_scn => {},
d1b9805e
KW
193 $debugging_scn => {},
194 $display_scn => {},
3951f999
KW
195 $embedding_scn => {},
196 $errno_scn => {},
197 $exceptions_scn => {},
198 $filesystem_scn => {
6927cd44
KW
199 header => <<~'EOT',
200 Also see L</List of capability HAS_foo symbols>.
201 EOT
202 },
b6f27e58 203 $filters_scn => {},
3951f999 204 $floating_scn => {
00d68f0d 205 header => <<~'EOT',
6927cd44
KW
206 Also L</List of capability HAS_foo symbols> lists capabilities
207 that arent in this section. For example C<HAS_ASINH>, for the
208 hyperbolic sine function.
209 EOT
210 },
3951f999 211 $genconfig_scn => {
00d68f0d
KW
212 header => <<~'EOT',
213 This section contains configuration information not otherwise
214 found in the more specialized sections of this document. At the
215 end is a list of C<#defines> whose name should be enough to tell
216 you what they do, and a list of #defines which tell you if you
217 need to C<#include> files to get the corresponding functionality.
218 EOT
219
014f1aa3 220 footer => <<~EOT,
00d68f0d
KW
221
222 =head2 List of capability C<HAS_I<foo>> symbols
223
6927cd44
KW
224 This is a list of those symbols that dont appear elsewhere in ths
225 document that indicate if the current platform has a certain
226 capability. Their names all begin with C<HAS_>. Only those
227 symbols whose capability is directly derived from the name are
228 listed here. All others have their meaning expanded out elsewhere
229 in this document. This (relatively) compact list is because we
230 think that the expansion would add little or no value and take up
231 a lot of space (because there are so many). If you think certain
232 ones should be expanded, send email to
014f1aa3 233 L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
00d68f0d
KW
234
235 Each symbol here will be C<#define>d if and only if the platform
236 has the capability. If you need more detail, see the
6927cd44
KW
237 corresponding entry in F<config.h>. For convenience, the list is
238 split so that the ones that indicate there is a reentrant version
239 of a capability are listed separately
00d68f0d
KW
240
241 __HAS_LIST__
242
6927cd44
KW
243 And, the reentrant capabilities:
244
245 __HAS_R_LIST__
246
00d68f0d
KW
247 Example usage:
248
014f1aa3 249 =over $standard_indent
00d68f0d
KW
250
251 #ifdef HAS_STRNLEN
252 use strnlen()
253 #else
254 use an alternative implementation
255 #endif
256
257 =back
258
259 =head2 List of C<#include> needed symbols
260
261 This list contains symbols that indicate if certain C<#include>
262 files are present on the platform. If your code accesses the
263 functionality that one of these is for, you will need to
264 C<#include> it if the symbol on this list is C<#define>d. For
265 more detail, see the corresponding entry in F<config.h>.
266
267 __INCLUDE_LIST__
268
269 Example usage:
270
014f1aa3 271 =over $standard_indent
00d68f0d
KW
272
273 #ifdef I_WCHAR
274 #include <wchar.h>
275 #endif
276
277 =back
278 EOT
279 },
3951f999
KW
280 $globals_scn => {},
281 $GV_scn => {},
282 $hook_scn => {},
283 $HV_scn => {},
284 $io_scn => {},
a87f9c51
KW
285 $io_formats_scn => {
286 header => <<~'EOT',
287 These are used for formatting the corresponding type For example,
288 instead of saying
289
290 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
291
292 use
293
294 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
295
296 This keeps you from having to know if, say an IV, needs to be
297 printed as C<%d>, C<%ld>, or something else.
298 EOT
299 },
3951f999
KW
300 $integer_scn => {},
301 $lexer_scn => {},
3e294c2f 302 $locale_scn => {},
3951f999
KW
303 $magic_scn => {},
304 $memory_scn => {},
3e294c2f 305 $MRO_scn => {},
3951f999
KW
306 $multicall_scn => {},
307 $numeric_scn => {},
72957953 308 $optrees_scn => {},
3951f999
KW
309 $optree_construction_scn => {},
310 $optree_manipulation_scn => {},
311 $pack_scn => {},
312 $pad_scn => {},
313 $password_scn => {},
314 $paths_scn => {},
3951f999
KW
315 $prototypes_scn => {},
316 $regexp_scn => {},
6ef63541
KW
317 $reports_scn => {
318 header => <<~"EOT",
319 These are used in the simple report generation feature of Perl.
320 See L<perlform>.
321 EOT
322 },
3951f999
KW
323 $signals_scn => {},
324 $site_scn => {
6927cd44
KW
325 header => <<~'EOT',
326 These variables give details as to where various libraries,
327 installation destinations, I<etc.>, go, as well as what various
328 installation options were selected
329 EOT
330 },
3951f999 331 $sockets_scn => {},
3951f999
KW
332 $stack_scn => {},
333 $string_scn => {
89222628
KW
334 header => <<~EOT,
335 See also C<L</$unicode_scn>>.
6927cd44
KW
336 EOT
337 },
3951f999
KW
338 $SV_flags_scn => {},
339 $SV_scn => {},
d8e799d8 340 $tainting_scn => {},
3951f999
KW
341 $time_scn => {},
342 $typedefs_scn => {},
89222628
KW
343 $unicode_scn => {
344 header => <<~EOT,
345 L<perlguts/Unicode Support> has an introduction to this API.
346
347 See also C<L</$classification_scn>>,
348 C<L</$casing_scn>>,
349 and C<L</$string_scn>>.
350 Various functions outside this section also work specially with
351 Unicode. Search for the string "utf8" in this document.
352 EOT
353 },
3951f999
KW
354 $utility_scn => {},
355 $versioning_scn => {},
356 $warning_scn => {},
3e294c2f 357 $XS_scn => {},
00d68f0d 358);
94bdecf9 359
9e019b59
KW
360# Somewhat loose match for an apidoc line so we can catch minor typos.
361# Parentheses are used to capture portions so that below we verify
362# that things are the actual correct syntax.
363my $apidoc_re = qr/ ^ (\s*) # $1
364 (=?) # $2
365 (\s*) # $3
366 for (\s*) # $4
367 apidoc (_item)? # $5
368 (\s*) # $6
369 (.*?) # $7
370 \s* \n /x;
183f6e59 371# Only certain flags, dealing with display, are acceptable for apidoc_item
3734d2f5 372my $display_flags = "fFnDopTx;";
9e019b59
KW
373
374sub check_api_doc_line ($$) {
375 my ($file, $in) = @_;
376
377 return unless $in =~ $apidoc_re;
378
379 my $is_item = defined $5;
380 my $is_in_proper_form = length $1 == 0
381 && length $2 > 0
382 && length $3 == 0
383 && length $4 > 0
6e93196a
KW
384 && length $7 > 0
385 && ( length $6 > 0
386 || ($is_item && substr($7, 0, 1) eq '|'));
9e019b59
KW
387 my $proto_in_file = $7;
388 my $proto = $proto_in_file;
389 $proto = "||$proto" if $proto !~ /\|/;
390 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
391
392 $name && $is_in_proper_form or die <<EOS;
393Bad apidoc at $file line $.:
394 $in
395Expected:
396 =for apidoc flags|returntype|name|arg|arg|...
397 =for apidoc flags|returntype|name
398 =for apidoc name
399(or 'apidoc_item')
400EOS
401
56aac0d0 402 die "Only [$display_flags] allowed in apidoc_item:\n$in"
783469eb
KW
403 if $is_item && $flags =~ /[^$display_flags]/;
404
9e019b59
KW
405 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
406}
94bdecf9 407
4be3b9ca
KW
408sub embed_override($) {
409 my ($element_name) = shift;
410
411 # If the entry is also in embed.fnc, it should be defined
412 # completely there, but not here
413 my $embed_docref = delete $funcflags{$element_name};
414
415 return unless $embed_docref and %$embed_docref;
416
417 my $flags = $embed_docref->{'flags'};
418 warn "embed.fnc entry '$element_name' missing 'd' flag"
419 unless $flags =~ /d/;
420
421 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
422}
423
89222628
KW
424# The section that is in effect at the beginning of the given file. If not
425# listed here, an apidoc_section line must precede any apidoc lines.
426# This allows the files listed here that generally are single-purpose, to not
427# have to worry about the autodoc section
428my %initial_file_section = (
429 'av.c' => $AV_scn,
430 'av.h' => $AV_scn,
431 'cv.h' => $CV_scn,
d05b47c2 432 'deb.c' => $debugging_scn,
189b9288 433 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => $XS_scn,
89222628
KW
434 'doio.c' => $io_scn,
435 'gv.c' => $GV_scn,
436 'gv.h' => $GV_scn,
437 'hv.h' => $HV_scn,
438 'locale.c' => $locale_scn,
439 'malloc.c' => $memory_scn,
440 'numeric.c' => $numeric_scn,
441 'opnames.h' => $optree_construction_scn,
442 'pad.h'=> $pad_scn,
443 'patchlevel.h' => $versioning_scn,
444 'perlio.h' => $io_scn,
445 'pod/perlapio.pod' => $io_scn,
446 'pod/perlcall.pod' => $callback_scn,
447 'pod/perlembed.pod' => $embedding_scn,
448 'pod/perlfilter.pod' => $filters_scn,
449 'pod/perliol.pod' => $io_scn,
450 'pod/perlmroapi.pod' => $MRO_scn,
451 'pod/perlreguts.pod' => $regexp_scn,
452 'pp_pack.c' => $pack_scn,
453 'pp_sort.c' => $SV_scn,
454 'regcomp.c' => $regexp_scn,
455 'regexp.h' => $regexp_scn,
d63f0f68
KW
456 'sv.h' => $SV_scn,
457 'sv.c' => $SV_scn,
072be79c 458 'sv_inline.h' => $SV_scn,
d8e799d8 459 'taint.c' => $tainting_scn,
89222628
KW
460 'unicode_constants.h' => $unicode_scn,
461 'utf8.c' => $unicode_scn,
462 'utf8.h' => $unicode_scn,
463 'vutil.c' => $versioning_scn,
464 );
465
94bdecf9
JH
466sub autodoc ($$) { # parse a file and extract documentation info
467 my($fh,$file) = @_;
63da8140 468 my($in, $line_num, $header, $section);
f554dfc5 469
89222628
KW
470 $section = $initial_file_section{$file}
471 if defined $initial_file_section{$file};
472
3cfa66c4
KW
473 my $file_is_C = $file =~ / \. [ch] $ /x;
474
f554dfc5 475 # Count lines easier
b1cb8a49 476 my $get_next_line = sub { $line_num++; return <$fh> };
f554dfc5 477
63da8140
KW
478 # Read the file
479 while ($in = $get_next_line->()) {
480 last unless defined $in;
3cfa66c4 481
63da8140
KW
482 next unless ( $in =~ / ^ =for [ ]+ apidoc /x
483 # =head1 lines only have effect in C files
484 || ($file_is_C && $in =~ /^=head1/));
485
486 # Here, the line introduces a portion of the input that we care about.
487 # Either it is for an API element, or heading text which we expect
488 # will be used for elements later in the file
489
490 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
183f6e59 491 my (@args, @items);
63da8140
KW
492
493 # If the line starts a new section ...
494 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
183f6e59 495
b1cb8a49 496 $section = $1;
3f620621
KW
497 if ($section =~ / ^ \$ /x) {
498 $section .= '_scn' unless $section =~ / _scn $ /;
499 $section = eval "$section";
500 die "Unknown \$section variable '$section' in $file: $@" if $@;
501 }
6927cd44
KW
502 die "Unknown section name '$section' in $file near line $.\n"
503 unless defined $valid_sections{$section};
504
3cfa66c4 505 }
63da8140
KW
506 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a
507 # plain apidoc line
183f6e59
KW
508 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
509 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
63da8140
KW
510 }
511 else { # Plain apidoc
512
513 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
514 = check_api_doc_line($file, $in);
4be3b9ca
KW
515 # Override this line with any info in embed.fnc
516 my ($embed_flags, $embed_ret_type, @embed_args)
517 = embed_override($element_name);
518 if ($embed_ret_type) {
63da8140 519 warn "embed.fnc entry overrides redundant information in"
4be3b9ca
KW
520 . " '$proto_in_file' in $file"
521 if $flags || $ret_type || @args;
522 $flags = $embed_flags;
523 $ret_type = $embed_ret_type;
524 @args = @embed_args;
525 }
9e728159
KW
526 elsif ($flags !~ /[my]/) { # Not in embed.fnc, is missing if not
527 # a macro or typedef
63da8140
KW
528 $missing{$element_name} = $file;
529 }
3cfa66c4 530
56aac0d0 531 die "flag '$1' is not legal (for function $element_name (from $file))"
746dd616 532 if $flags =~ / ( [^AabCDdEeFfGhiIMmNnTOoPpRrSsUuWXxy;#] ) /x;
151c3fe5 533
9e728159
KW
534 die "'u' flag must also have 'm' or 'y' flags' for $element_name"
535 if $flags =~ /u/ && $flags !~ /[my]/;
63da8140
KW
536 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
537 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
f554dfc5 538
746dd616
KW
539 if ($flags =~ /#/) {
540 die "Return type must be empty for '$element_name'"
541 if $ret_type;
542 $ret_type = '#ifdef';
543 }
544
63da8140
KW
545 if (exists $seen{$element_name} && $flags !~ /h/) {
546 die ("'$element_name' in $file was already documented in $seen{$element_name}");
547 }
548 else {
549 $seen{$element_name} = $file;
151c3fe5 550 }
94bdecf9 551 }
df6bd76f 552
63da8140
KW
553 # Here we have processed the initial line in the heading text or API
554 # element, and have saved the important information from it into the
555 # corresponding variables. Now accumulate the text that applies to it
556 # up to a terminating line, which is one of:
557 # 1) =cut
558 # 2) =head (in a C file only =head1)
559 # 3) an end comment line in a C file: m:^\s*\*/:
183f6e59 560 # 4) =for apidoc... (except apidoc_item lines)
63da8140
KW
561 $text = "";
562 my $head_ender_num = ($file_is_C) ? 1 : "";
563 while (defined($in = $get_next_line->())) {
564
565 last if $in =~ /^=cut/x;
566 last if $in =~ /^=head$head_ender_num/;
567
568 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
569
570 # End of comment line in C files is a fall-back terminator,
571 # but warn only if there actually is some accumulated text
572 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
573 last;
df6bd76f 574 }
df6bd76f 575
63da8140
KW
576 if ($in !~ / ^ =for [ ]+ apidoc /x) {
577 $text .= $in;
578 next;
579 }
580
183f6e59 581 # Here, the line is an apidoc line. All but apidoc_item terminate
63da8140 582 # the text being accumulated.
183f6e59
KW
583 last if $in =~ / ^ =for [ ]+ apidoc_section /x;
584
585 my ($item_name, $item_flags, $item_ret_type, $is_item,
4be3b9ca 586 $item_proto, @item_args) = check_api_doc_line($file, $in);
183f6e59
KW
587 last unless $is_item;
588
589 # Here, is an apidoc_item_line; They can only come within apidoc
590 # paragraphs.
4be3b9ca
KW
591 die "Unexpected api_doc_item line '$item_proto'"
592 unless $element_name;
183f6e59
KW
593
594 # We accept blank lines between these, but nothing else;
56aac0d0
KW
595 die "apidoc_item lines must immediately follow apidoc lines for "
596 . " '$element_name' in $file"
183f6e59 597 if $text =~ /\S/;
4be3b9ca
KW
598 # Override this line with any info in embed.fnc
599 my ($embed_flags, $embed_ret_type, @embed_args)
600 = embed_override($item_name);
601 if ($embed_ret_type) {
602 warn "embed.fnc entry overrides redundant information in"
603 . " '$item_proto' in $file"
604 if $item_flags || $item_ret_type || @item_args;
605
606 $item_flags = $embed_flags;
607 $item_ret_type = $embed_ret_type;
608 @item_args = @embed_args;
609 }
183f6e59
KW
610
611 # Use the base entry flags if none for this item; otherwise add in
612 # any non-display base entry flags.
613 if ($item_flags) {
614 $item_flags .= $flags =~ s/[$display_flags]//rg;
615 }
616 else {
617 $item_flags = $flags;
618 }
619 $item_ret_type = $ret_type unless $item_ret_type;
620 @item_args = @args unless @item_args;
621 push @items, { name => $item_name,
622 ret_type => $item_ret_type,
623 flags => $item_flags,
624 args => [ @item_args ],
625 };
626
627 # This line shows that this element is documented.
628 delete $funcflags{$item_name};
a9a7c81e 629 }
3cfa66c4 630
63da8140
KW
631 # Here, are done accumulating the text for this item. Trim it
632 $text =~ s/ ^ \s* //x;
633 $text =~ s/ \s* $ //x;
634 $text .= "\n" if $text ne "";
6523e108 635
63da8140
KW
636 # And treat all-spaces as nothing at all
637 undef $text unless $text =~ /\S/;
df6bd76f 638
63da8140 639 if ($element_name) {
df6bd76f 640
63da8140 641 # Here, we have accumulated into $text, the pod for $element_name
23c7fd2d 642 my $where = $flags =~ /A/ ? 'api' : 'intern';
5ce57792 643
63da8140
KW
644 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
645 unless defined $section;
2aa73c64
KW
646 my $is_link_only = ($flags =~ /h/);
647 if (! $is_link_only && exists $docs{$where}{$section}{$element_name}) {
63da8140
KW
648 warn "$0: duplicate API entry for '$element_name' in"
649 . " $where/$section\n";
650 next;
20046047 651 }
5ce57792 652
63da8140 653 # Override the text with just a link if the flags call for that
63da8140
KW
654 if ($is_link_only) {
655 if ($file_is_C) {
2aa73c64
KW
656 die "Can't currently handle link with items to it:\n$in"
657 if @items;
658 $docs{$where}{$section}{X_tags}{$element_name} = $file;
63da8140
KW
659 redo; # Don't put anything if C source
660 }
661
662 # Here, is an 'h' flag in pod. We add a reference to the pod (and
663 # nothing else) to perlapi/intern. (It would be better to add a
664 # reference to the correct =item,=header, but something that makes
665 # it harder is that it that might be a duplicate, like '=item *';
666 # so that is a future enhancement XXX. Another complication is
667 # there might be more than one deserving candidates.)
668 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s)
669 $podname =~ s/\.pod//;
670 $text = "Described in L<$podname>.\n";
671
672 # Don't output a usage example for linked to documentation if
673 # it is trivial (has no arguments) and we aren't to add a
674 # semicolon
3734d2f5 675 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[U;]/;
63da8140
KW
676
677 # Keep track of all the pod files that we refer to.
678 push $described_elsewhere{$podname}->@*, $podname;
679 }
5ce57792 680
5bf02caf
KW
681 $docs{$where}{$section}{$element_name}{flags} = $flags;
682 $docs{$where}{$section}{$element_name}{pod} = $text;
683 $docs{$where}{$section}{$element_name}{file} = $file;
684 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
685 push $docs{$where}{$section}{$element_name}{args}->@*, @args;
183f6e59 686 push $docs{$where}{$section}{$element_name}{items}->@*, @items;
a9a7c81e 687 }
63da8140
KW
688 elsif ($text) {
689 $valid_sections{$section}{header} = "" unless
690 defined $valid_sections{$section}{header};
691 $valid_sections{$section}{header} .= "\n$text";
a9a7c81e 692 }
63da8140
KW
693
694 # We already have the first line of what's to come in $in
695 redo;
696
697 } # End of loop through input
94bdecf9
JH
698}
699
00d68f0d
KW
700my %configs;
701my @has_defs;
702my @has_r_defs; # Reentrant symbols
703my @include_defs;
63447426 704
00d68f0d
KW
705sub parse_config_h {
706 use re '/aa'; # Everthing is ASCII in this file
707
708 # Process config.h
d3871820
KW
709 my $config_h = 'config.h';
710 $config_h = 'win32/config.h' unless -e $config_h;
711 die "Can't find $config_h" unless -e $config_h;
712 open my $fh, '<', $config_h or die "Can't open $config_h: $!";
00d68f0d
KW
713 while (<$fh>) {
714
715 # Look for lines like /* FOO_BAR:
716 # By convention all config.h descriptions begin like that
717 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
718 my $name = $1;
719
720 # Here we are starting the description for $name in config.h. We
721 # accumulate the entire description for it into @description.
722 # Flowing text from one input line to another is appended into the
723 # same array element to make a single flowing line element, but
724 # verbatim lines are kept as separate elements in @description.
725 # This will facilitate later doing pattern matching without regard
726 # to line boundaries on non-verbatim text.
727
728 die "Multiple config.h entries for '$name'"
729 if defined $configs{$name}{description};
730
731 # Get first line of description
732 $_ = <$fh>;
733
734 # Each line in the description begins with blanks followed by '/*'
735 # and some spaces.
736 die "Unexpected config.h initial line for $name: '$_'"
737 unless s/ ^ ( \s* \* \s* ) //x;
738 my $initial_text = $1;
739
740 # Initialize the description with this first line (after having
741 # stripped the prefix text)
742 my @description = $_;
743
744 # The first line is used as a template for how much indentation
745 # each normal succeeding line has. Lines indented further
746 # will be considered as intended to be verbatim. But, empty lines
747 # likely won't have trailing blanks, so just strip the whole thing
748 # for them.
749 my $strip_initial_qr = qr! \s* \* \s* $
750 | \Q$initial_text\E
751 !x;
752 $configs{$name}{verbatim} = 0;
753
754 # Read in the remainder of the description
755 while (<$fh>) {
756 last if s| ^ \s* \* / ||x; # A '*/' ends it
757
758 die "Unexpected config.h description line for $name: '$_'"
759 unless s/$strip_initial_qr//;
760
761 # Fix up the few flawed lines in config.h wherein a new
762 # sentence begins with a tab (and maybe a space after that).
763 # Although none of them currently do, let it recognize
764 # something like
765 #
766 # "... text"). The next sentence ...
767 #
768 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg;
769
770 # If this line has extra indentation or looks to have columns,
771 # it should be treated as verbatim. Columns are indicated by
772 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
773 # not preceded by punctuation.
774 if ($_ !~ m/ ^ \s
775 | \S (?: \t
776 | \s{3}
777 | (*nlb:[[:punct:]]) \s{2}
778 )
779 /x)
780 {
781 # But here, is not a verbatim line. Add an empty line if
782 # this is the first non-verbatim after a run of verbatims
783 if ($description[-1] =~ /^\s/) {
784 push @description, "\n", $_;
785 }
786 else { # Otherwise, append this flowing line to the
787 # current flowing line
788 $description[-1] .= $_;
789 }
790 }
791 else {
792 $configs{$name}{verbatim} = 1;
793
794 # The first verbatim line in a run of them is separated by an
795 # empty line from the flowing lines above it
796 push @description, "\n" if $description[-1] =~ /^\S/;
797
798 $_ = Text::Tabs::expand($_);
799
800 # Only a single space so less likely to wrap
801 s/ ^ \s* / /x;
802
803 push @description, $_;
804 }
805 }
806
807 push $configs{$name}{description}->@*, @description
808
809 } # Not a description; see if it is a macro definition.
810 elsif (m! ^
811 (?: / \* )? # Optional commented-out
812 # indication
813 \# \s* define \s+ ( \w+ ) # $1 is the name
814 ( \s* ) # $2 indicates if args or not
815 ( .*? ) # $3 is any definition
816 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/
817 $
818 !x)
819 {
820 my $name = $1;
821
822 # There can be multiple definitions for a name. We want to know
823 # if any of them has arguments, and if any has a body.
824 $configs{$name}{has_args} //= $2 eq "";
825 $configs{$name}{has_args} ||= $2 eq "";
826 $configs{$name}{has_defn} //= $3 ne "";
827 $configs{$name}{has_defn} ||= $3 ne "";
828 }
829 }
830
831 # We now have stored the description and information about every #define
832 # in the file. The description is in a form convenient to operate on to
833 # convert to pod. Do that now.
834 foreach my $name (keys %configs) {
835 next unless defined $configs{$name}{description};
836
837 # All adjacent non-verbatim lines of the description are appended
838 # together in a single element in the array. This allows the patterns
839 # to work across input line boundaries.
840
841 my $pod = "";
842 while (defined ($_ = shift $configs{$name}{description}->@*)) {
843 chomp;
844
845 if (/ ^ \S /x) { # Don't edit verbatim lines
846
847 # Enclose known file/path names not already so enclosed
848 # with <...>. (Some entries in config.h are already
849 # '<path/to/file>')
850 my $file_name_qr = qr! [ \w / ]+ \.
851 (?: c | h | xs | p [lm] | pmc | PL
852 | sh | SH | exe ) \b
853 !xx;
854 my $path_name_qr = qr! (?: / \w+ )+ !x;
855 for my $re ($file_name_qr, $path_name_qr) {
856 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
857 }
858
859 # Enclose <... file/path names with F<...> (but no double
860 # angle brackets)
861 for my $re ($file_name_qr, $path_name_qr) {
862 s! < ( $re ) > !F<$1>!gxx;
863 }
864
865 # Explain metaconfig units
866 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
867
868 # Convert "See foo" to "See C<L</foo>>" if foo is described in
869 # this file. Also create a link to the known file INSTALL.
870 # And, to be more general, handle "See also foo and bar", and
871 # "See also foo, bar, and baz"
872 while (m/ \b [Ss]ee \s+
873 (?: also \s+ )? ( \w+ )
874 (?: , \s+ ( \w+ ) )?
875 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
876 my @links = $1;
877 push @links, $2 if defined $2;
878 push @links, $3 if defined $3;
879 foreach my $link (@links) {
880 if ($link eq 'INSTALL') {
881 s/ \b INSTALL \b /C<L<INSTALL>>/xg;
882 }
883 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
884 s| \b $link \b |C<L</$link>>|xg;
885 $configs{$link}{linked} = 1;
886 $configs{$name}{linked} = 1;
887 }
888 }
889 }
890
891 # Enclose what we think are symbols with C<...>.
892 no warnings 'experimental::vlb';
893 s/ (*nlb:<)
894 (
895 # Any word followed immediately with parens or
896 # brackets
897 \b \w+ (?: \( [^)]* \) # parameter list
898 | \[ [^]]* \] # or array reference
899 )
900 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols.
901 | \b (?: struct | union ) \s \w+
902
903 # Words that contain underscores (which are
904 # definitely not text) or three uppercase letters in
905 # a row. Length two ones, like IV, aren't enclosed,
906 # because they often don't look as nice.
907 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
908 )
909 (*nla:>)
910 /C<$1>/xg;
911
912 # These include foo when the name is HAS_foo. This is a
913 # heuristic which works in most cases.
914 if ($name =~ / ^ HAS_ (.*) /x) {
915 my $symbol = lc $1;
916
917 # Don't include path components, nor things already in
918 # <>, or with trailing '(', '['
919 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
920 }
921 }
922
923 $pod .= "$_\n";
924 }
925 delete $configs{$name}{description};
926
927 $configs{$name}{pod} = $pod;
928 }
929
930 # Now have converted the description to pod. We also now have enough
931 # information that we can do cross checking to find definitions without
932 # corresponding pod, and see if they are mentioned in some description;
933 # otherwise they aren't documented.
934 NAME:
935 foreach my $name (keys %configs) {
936
937 # A definition without pod
938 if (! defined $configs{$name}{pod}) {
939
940 # Leading/trailing underscore means internal to config.h, e.g.,
941 # _GNU_SOURCE
942 next if $name =~ / ^ _ /x;
943 next if $name =~ / _ $ /x;
944
945 # MiXeD case names are internal to config.h; the first 4
946 # characters are sufficient to determine this
947 next if $name =~ / ^ [[:upper:]] [[:lower:]]
948 [[:upper:]] [[:lower:]]
949 /x;
950
951 # Here, not internal to config.h. Look to see if this symbol is
952 # mentioned in the pod of some other. If so, assume it is
953 # documented.
954 foreach my $check_name (keys %configs) {
955 my $this_element = $configs{$check_name};
956 my $this_pod = $this_element->{pod};
957 if (defined $this_pod) {
958 next NAME if $this_pod =~ / \b $name \b /x;
959 }
960 }
961
962 warn "$name has no documentation\n";
963 $missing_macros{$name} = 'config.h';
964
965 next;
966 }
967
968 my $has_defn = $configs{$name}{has_defn};
969 my $has_args = $configs{$name}{has_args};
970
971 # Check if any section already has an entry for this element.
972 # If so, it better be a placeholder, in which case we replace it
973 # with this entry.
974 foreach my $section (keys $docs{'api'}->%*) {
975 if (exists $docs{'api'}{$section}{$name}) {
976 my $was = $docs{'api'}{$section}{$name}->{pod};
977 $was = "" unless $was;
978 chomp $was;
979 if ($was ne "" && $was !~ m/$link_text/) {
980 die "Multiple descriptions for $name\n"
981 . "$section contained '$was'";
982 }
983 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
984 $configs{$name}{section} = $section;
985 last;
986 }
987 }
988
989 my $handled = 0; # Haven't handled this yet
990
991 if (defined $configs{$name}{'section'}) {
992 # This has been taken care of elsewhere.
993 $handled = 1;
994 }
995 else {
9e728159 996 my $flags = "";
00d68f0d
KW
997 if ($has_defn && ! $has_args) {
998 $configs{$name}{args} = 1;
999 }
1000
1001 # Symbols of the form I_FOO are for #include files. They have
1002 # special usage information
1003 if ($name =~ / ^ I_ ( .* ) /x) {
1004 my $file = lc $1 . '.h';
1005 $configs{$name}{usage} = <<~"EOT";
1006 #ifdef $name
1007 #include <$file>
1008 #endif
1009 EOT
1010 }
1011
1012 # Compute what section this variable should go into. This
1013 # heuristic was determined by manually inspecting the current
1014 # things in config.h, and should be adjusted as necessary as
1015 # deficiencies are found.
1016 #
1017 # This is the default section for macros with a definiton but
1018 # no arguments, meaning it is replaced unconditionally
1019 #
1020 my $sb = qr/ _ | \b /x; # segment boundary
1021 my $dash_or_spaces = qr/ - | \s+ /x;
1022 my $pod = $configs{$name}{pod};
1023 if ($name =~ / ^ USE_ /x) {
3951f999 1024 $configs{$name}{'section'} = $site_scn;
00d68f0d
KW
1025 }
1026 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
1027 {
3951f999 1028 $configs{$name}{'section'} = $time_scn;
00d68f0d 1029 }
c86b25ed
KW
1030 elsif ( $name =~ / ^ [[:alpha:]]+ f $ /x
1031 && $configs{$name}{pod} =~ m/ \b format \b /ix)
1032 {
a87f9c51 1033 $configs{$name}{'section'} = $io_formats_scn;
c86b25ed 1034 }
00d68f0d
KW
1035 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
1036 | $sb CASTFLAGS $sb
1037 | QUADMATH
1038 | $sb (?: IS )? NAN
1039 | $sb (?: IS )? FINITE
1040 /x)
1041 {
1042 $configs{$name}{'section'} =
3951f999 1043 $floating_scn;
00d68f0d
KW
1044 }
1045 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
3951f999 1046 $configs{$name}{'section'} = $filesystem_scn;
00d68f0d
KW
1047 }
1048 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
1049 || $configs{$name}{pod} =~ m/ \b align /x)
1050 {
3951f999 1051 $configs{$name}{'section'} = $compiler_scn;
00d68f0d
KW
1052 }
1053 elsif ($name =~ / ^ [IU] [ \d V ]
1054 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
1055 {
3951f999 1056 $configs{$name}{'section'} = $integer_scn;
00d68f0d
KW
1057 }
1058 elsif ($name =~ / $sb t $sb /x) {
3951f999 1059 $configs{$name}{'section'} = $typedefs_scn;
9e728159 1060 $flags .= 'y';
00d68f0d
KW
1061 }
1062 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
1063 && $configs{$name}{pod} =~ m/ \b format \b /ix)
1064 {
a87f9c51 1065 $configs{$name}{'section'} = $io_formats_scn;
00d68f0d
KW
1066 }
1067 elsif ($name =~ / BACKTRACE /x) {
d1b9805e 1068 $configs{$name}{'section'} = $debugging_scn;
00d68f0d
KW
1069 }
1070 elsif ($name =~ / ALLOC $sb /x) {
3951f999 1071 $configs{$name}{'section'} = $memory_scn;
00d68f0d
KW
1072 }
1073 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH
1074 | $sb FILE $sb
1075 | $sb DIR $sb
1076 | $sb LSEEK
1077 | $sb INO $sb
1078 | $sb OPEN
1079 | $sb CLOSE
1080 | ^ DIR
1081 | ^ INO $sb
1082 | DIR $
1083 | FILENAMES
1084 /x
1085 || $configs{$name}{pod} =~ m! I/O | stdio
1086 | file \s+ descriptor
1087 | file \s* system
1088 | statfs
1089 !x)
1090 {
3951f999 1091 $configs{$name}{'section'} = $filesystem_scn;
00d68f0d
KW
1092 }
1093 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
3951f999 1094 $configs{$name}{'section'} = $signals_scn;
00d68f0d
KW
1095 }
1096 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
3951f999 1097 $configs{$name}{'section'} = $prototypes_scn;
00d68f0d
KW
1098 }
1099 elsif ( $name =~ / ^ LOC_ /x
1100 || $configs{$name}{pod} =~ /full path/i)
1101 {
3951f999 1102 $configs{$name}{'section'} = $paths_scn;
00d68f0d
KW
1103 }
1104 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
3e294c2f 1105 $configs{$name}{'section'} = $locale_scn;
00d68f0d
KW
1106 }
1107 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) {
3951f999 1108 $configs{$name}{'section'} = $compiler_scn;
00d68f0d
KW
1109 }
1110 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
1111 {
3951f999 1112 $configs{$name}{'section'} = $password_scn;
00d68f0d
KW
1113 }
1114 elsif ($name =~ / SOCKET | $sb SOCK /x) {
3951f999 1115 $configs{$name}{'section'} = $sockets_scn;
00d68f0d
KW
1116 }
1117 elsif ( $name =~ / THREAD | MULTIPLICITY /x
1118 || $configs{$name}{pod} =~ m/ \b pthread /ix)
1119 {
3951f999 1120 $configs{$name}{'section'} = $concurrency_scn;
00d68f0d
KW
1121 }
1122 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN
1123 | VENDOR | ^ USE
1124 /x)
1125 {
3951f999 1126 $configs{$name}{'section'} = $site_scn;
00d68f0d
KW
1127 }
1128 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix
63447426
KW
1129 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
1130 || $pod =~ / \b doubles \b /ix
1131 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
00d68f0d
KW
1132 {
1133 $configs{$name}{'section'} =
3951f999 1134 $floating_scn;
00d68f0d
KW
1135 }
1136 else {
1137 # Above are the specific sections. The rest go into a
1138 # grab-bag of general configuration values. However, we put
1139 # two classes of them into lists of their names, without their
1140 # descriptions, when we think that the description doesn't add
1141 # any real value. One list contains the #include variables:
1142 # the description is basically boiler plate for each of these.
1143 # The other list contains the very many things that are of the
1144 # form HAS_foo, and \bfoo\b is contained in its description,
1145 # and there is no verbatim text in the pod or links to/from it
1146 # (which would add value). That means that it is likely the
1147 # intent of the variable can be gleaned from just its name,
1148 # and unlikely the description adds signficant value, so just
1149 # listing them suffices. Giving their descriptions would
1150 # expand this pod significantly with little added value.
1151 if ( ! $has_defn
1152 && ! $configs{$name}{verbatim}
1153 && ! $configs{$name}{linked})
1154 {
1155 if ($name =~ / ^ I_ ( .* ) /x) {
1156 push @include_defs, $name;
1157 next;
1158 }
1159 elsif ($name =~ / ^ HAS_ ( .* ) /x) {
1160 my $canonical_name = $1;
1161 $canonical_name =~ s/_//g;
1162
1163 my $canonical_pod = $configs{$name}{pod};
1164 $canonical_pod =~ s/_//g;
1165
1166 if ($canonical_pod =~ / \b $canonical_name \b /xi) {
1167 if ($name =~ / $sb R $sb /x) {
1168 push @has_r_defs, $name;
1169 }
1170 else {
1171 push @has_defs, $name;
1172 }
1173 next;
1174 }
1175 }
1176 }
1177
3951f999 1178 $configs{$name}{'section'} = $genconfig_scn;
00d68f0d
KW
1179 }
1180
1181 my $section = $configs{$name}{'section'};
6927cd44
KW
1182 die "Internal error: '$section' not in \%valid_sections"
1183 unless grep { $_ eq $section } keys %valid_sections;
9e728159 1184 $flags .= 'AdmnT';
00d68f0d 1185 $flags .= 'U' unless defined $configs{$name}{usage};
9e728159 1186
63447426 1187 # All the information has been gathered; save it
00d68f0d
KW
1188 $docs{'api'}{$section}{$name}{flags} = $flags;
1189 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1190 $docs{'api'}{$section}{$name}{ret_type} = "";
1191 $docs{'api'}{$section}{$name}{file} = 'config.h';
1192 $docs{'api'}{$section}{$name}{usage}
1193 = $configs{$name}{usage} if defined $configs{$name}{usage};
1194 push $docs{'api'}{$section}{$name}{args}->@*, ();
1195 push $docs{'api'}{$section}{$name}{items}->@*, ();
1196 }
1197 }
1198}
1199
c7a2af98
KW
1200sub format_pod_indexes($) {
1201 my $entries_ref = shift;
1202
1203 # Output the X<> references to the names, packed since they don't get
1204 # displayed, but not too many per line so that when someone is editing the
1205 # file, it doesn't run on
1206
1207 my $text ="";
1208 my $line_length = 0;
1209 for my $name (sort dictionary_order $entries_ref->@*) {
1210 my $entry = "X<$name>";
1211 my $entry_length = length $entry;
1212
1213 # Don't loop forever if we have a verrry long name, and don't go too
1214 # far to the right.
1215 if ($line_length > 0 && $line_length + $entry_length > $max_width) {
1216 $text .= "\n";
1217 $line_length = 0;
1218 }
1219
1220 $text .= $entry;
1221 $line_length += $entry_length;
1222 }
1223
1224 return $text;
1225}
1226
b63f8ed4 1227sub docout ($$$) { # output the docs for one function group
b1cb8a49 1228 my($fh, $element_name, $docref) = @_;
5bf02caf 1229
783469eb
KW
1230 # Trim trailing space
1231 $element_name =~ s/\s*$//;
1232
5bf02caf 1233 my $flags = $docref->{flags};
63da8140 1234 my $pod = $docref->{pod} // "";
5bf02caf 1235 my $file = $docref->{file};
783469eb 1236
183f6e59 1237 my @items = $docref->{items}->@*;
5bf02caf 1238
783469eb
KW
1239 # Make the main element the first of the items. This allows uniform
1240 # treatment below
1241 unshift @items, { name => $element_name,
1242 flags => $flags,
1243 ret_type => $docref->{ret_type},
1244 args => [ $docref->{args}->@* ],
1245 };
94bdecf9 1246
7a997878 1247 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
9639517b 1248
783469eb
KW
1249 print $fh "\n=over $description_indent\n";
1250 print $fh "\n=item C<$_->{name}>\n" for @items;
63da8140 1251
783469eb
KW
1252 # If we're printing only a link to an element, this isn't the major entry,
1253 # so no X<> here.
1254 if ($flags !~ /h/) {
1255 print $fh "X<$_->{name}>" for @items;
1256 print $fh "\n";
d4e99c76 1257 }
63da8140 1258
d1681de8
KW
1259 my @deprecated;
1260 my @experimental;
783469eb 1261 for my $item (@items) {
d1681de8
KW
1262 push @deprecated, "C<$item->{name}>" if $item->{flags} =~ /D/;
1263 push @experimental, "C<$item->{name}>" if $item->{flags} =~ /x/;
1264 }
54c193ae 1265
d1681de8
KW
1266 for my $which (\@deprecated, \@experimental) {
1267 if ($which->@*) {
1268 my $is;
1269 my $it;
1270 my $list;
1271
1272 if ($which->@* == 1) {
1273 $is = 'is';
1274 $it = 'it';
1275 $list = $which->[0];
1276 }
1277 elsif ($which->@* == @items) {
1278 $is = 'are';
1279 $it = 'them';
1280 $list = (@items == 2)
1281 ? "both forms"
1282 : "all these forms";
1283 }
1284 else {
1285 $is = 'are';
1286 $it = 'them';
1287 my $final = pop $which->@*;
1288 $list = "the " . join ", ", $which->@*;
1289 $list .= "," if $which->@* > 1;
1290 $list .= " and $final forms";
1291 }
54c193ae 1292
d1681de8
KW
1293 if ($which == \@deprecated) {
1294 print $fh <<~"EOT";
1295
1296 C<B<DEPRECATED!>> It is planned to remove $list
1297 from a future release of Perl. Do not use $it for
1298 new code; remove $it from existing code.
1299 EOT
1300 }
1301 else {
1302 print $fh <<~"EOT";
1303
1304 NOTE: $list $is B<experimental> and may change or be
1305 removed without notice.
1306 EOT
1307 }
783469eb 1308 }
54c193ae 1309 }
94bdecf9 1310
783469eb
KW
1311 chomp $pod; # Make sure prints pod with a single trailing \n
1312 print $fh "\n", $pod, "\n";
3cfa66c4 1313
783469eb
KW
1314 for my $item (@items) {
1315 my $item_flags = $item->{flags};
1316 my $item_name = $item->{name};
3cfa66c4 1317
783469eb
KW
1318 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1319 if $item_flags =~ /O/;
1320 # Is Perl_, but no #define foo # Perl_foo
e2d832c0 1321 if ( ($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
783469eb 1322
e2d832c0
KW
1323 # Can't handle threaded varargs
1324 || ($item_flags =~ /f/ && $item_flags !~ /T/))
783469eb
KW
1325 {
1326 $item->{name} = "Perl_$item_name";
1327 print $fh <<~"EOT";
1328
1329 NOTE: C<$item_name> must be explicitly called as
1330 C<$item->{name}>
1331 EOT
1332 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1333 print $fh ".\n";
1334 }
1335 }
94bdecf9 1336
9e728159
KW
1337 if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1338 # to never warrant a usage line
3734d2f5
KW
1339 warn("U and ; flags are incompatible")
1340 if $flags =~ /U/ && $flags =~ /;/;
20046047 1341 # nothing
05ca4832 1342 } else {
783469eb
KW
1343
1344 print $fh "\n=over $usage_indent\n";
1345
00d68f0d
KW
1346 if (defined $docref->{usage}) { # An override of the usage section
1347 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1348 }
783469eb
KW
1349 else {
1350
8d1c4423
KW
1351 # Add the thread context formal parameter on expanded-out names
1352 for my $item (@items) {
1353 unshift $item->{args}->@*, (($item->{args}->@*)
1354 ? "pTHX_"
1355 : "pTHX")
1356 if $item->{flags} !~ /T/
1357 && $item->{name} =~ /^Perl_/;
1358 }
1359
783469eb 1360 # Look through all the items in this entry. If all have the same
4be56a7d
KW
1361 # return type and arguments (including thread context), only the
1362 # main entry is displayed.
783469eb
KW
1363 # Also, find the longest return type and longest name so that if
1364 # multiple ones are shown, they can be vertically aligned nicely
783469eb 1365 my $need_individual_usage = 0;
53c9fbf2 1366 my $longest_name_length = length $items[0]->{name};
783469eb 1367 my $base_ret_type = $items[0]->{ret_type};
53c9fbf2 1368 my $longest_ret = length $base_ret_type;
783469eb 1369 my @base_args = $items[0]->{args}->@*;
4be56a7d 1370 my $base_thread_context = $items[0]->{flags} =~ /T/;
53c9fbf2 1371 for (my $i = 1; $i < @items; $i++) {
783469eb 1372 no warnings 'experimental::smartmatch';
53c9fbf2 1373 my $item = $items[$i];
783469eb
KW
1374 $need_individual_usage = 1
1375 if $item->{ret_type} ne $base_ret_type
4be56a7d
KW
1376 || ! ($item->{args}->@* ~~ @base_args)
1377 || ( $item->{flags} =~ /T/
1378 != $base_thread_context);
783469eb
KW
1379 my $ret_length = length $item->{ret_type};
1380 $longest_ret = $ret_length if $ret_length > $longest_ret;
1381 my $name_length = length $item->{name};
1382 $longest_name_length = $name_length
1383 if $name_length > $longest_name_length;
1ded1f42 1384 }
783469eb
KW
1385
1386 # If we're only showing one entry, only its length matters.
1387 $longest_name_length = length($items[0]->{name})
1388 unless $need_individual_usage;
1389 print $fh "\n";
1390
1391 my $indent = 1; # 1 is sufficient for verbatim; =over is used
1392 # for more
1393 my $ret_name_sep_length = 2; # spaces between return type and name
1394 my $name_indent = $indent + $longest_ret;
1395 $name_indent += $ret_name_sep_length if $longest_ret;
1396
ca711307
KW
1397 my $this_max_width =
1398 $max_width - $description_indent - $usage_indent;
1399
783469eb
KW
1400 for my $item (@items) {
1401 my $ret_type = $item->{ret_type};
1402 my @args = $item->{args}->@*;
1403 my $name = $item->{name};
1404 my $item_flags = $item->{flags};
1405
783469eb
KW
1406 # The return type
1407 print $fh (" " x $indent), $ret_type;
1408
1409 print $fh " " x ( $ret_name_sep_length
1410 + $longest_ret - length $ret_type);
1411 print $fh $name;
1412
1413 if ($item_flags =~ /n/) { # no args
1414 warn("$file: $element_name: n flag without m")
1415 unless $item_flags =~ /m/;
1416 warn("$file: $name: n flag but apparently has args")
1417 if @args;
1418 }
1419 else {
1420 # +1 for the '('
1421 my $arg_indent = $name_indent + $longest_name_length + 1;
1422
1423 # Align the argument lists of the items
1424 print $fh " " x ($longest_name_length - length($name));
1425 print $fh "(";
1426
1427 # Display as many of the arguments on the same line as
1428 # will fit.
1429 my $total_length = $arg_indent;
1430 my $first_line = 1;
1431 for (my $i = 0; $i < @args; $i++) {
1432 my $arg = $args[$i];
1433 my $arg_length = length($arg);
1434
1435 # All but the first arg are preceded by a blank
1436 my $use_blank = $i > 0;
1437
1438 # +1 here and below because either the argument has a
1439 # trailing comma or trailing ')'
1440 $total_length += $arg_length + $use_blank + 1;
1441
1442 # We want none of the arguments to be positioned so
1443 # they extend too far to the right. Ideally, they
1444 # should all start in the same column as the arguments
1445 # on the first line of the function display do. But, if
1446 # necessary, outdent them so that they all start in
1447 # another column, with the longest ending at the right
1448 # margin, like so:
1449 # void function_name(pTHX_ short1,
1450 # short2,
1451 # very_long_argument,
1452 # short3)
ca711307 1453 if ($total_length > $this_max_width) {
783469eb
KW
1454
1455 # If this is the first continuation line,
1456 # calculate the longest argument; this will be the
1457 # one we may have to outdent for.
1458 if ($first_line) {
1459 $first_line = 0;
1460
1461 # We will need at least as much as the current
1462 # argument
1463 my $longest_arg_length = $arg_length
1464 + $use_blank + 1;
1465
1466 # Look through the rest of the args to see if
1467 # any are longer than this one.
1468 for (my $j = $i + 1; $j < @args; $j++) {
1469
1470 # Include the trailing ',' or ')' in the
1471 # length. No need to concern ourselves
1472 # with a leading blank, as the argument
1473 # would be positioned first on the next
1474 # line
1475 my $peek_arg_length = length ($args[$j])
1476 + 1;
1477 $longest_arg_length = $peek_arg_length
1478 if $peek_arg_length > $longest_arg_length;
1479 }
1480
1481 # Calculate the new indent if necessary.
ca711307
KW
1482 $arg_indent =
1483 $this_max_width - $longest_arg_length
783469eb 1484 if $arg_indent + $longest_arg_length
ca711307 1485 > $this_max_width;
783469eb
KW
1486 }
1487
1488 print $fh "\n", (" " x $arg_indent);
1489 $total_length = $arg_indent + $arg_length + 1;
1490 $use_blank = 0;
1491 }
1492
1493 # Display this argument
1494 print $fh " " if $use_blank;
1495 print $fh $arg;
1496 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1497
1498 } # End of loop through args
1499
1500 print $fh ")";
1ded1f42 1501 }
783469eb 1502
3734d2f5 1503 print $fh ";" if $item_flags =~ /;/; # semicolon: "dTHR;"
783469eb
KW
1504 print $fh "\n";
1505
1506 # Only the first entry is normally displayed
1507 last unless $need_individual_usage;
1ded1f42 1508 }
1ded1f42 1509 }
783469eb
KW
1510
1511 print $fh "\n=back\n";
94bdecf9 1512 }
783469eb
KW
1513
1514 print $fh "\n=back\n";
63da8140 1515 print $fh "\n=for hackers\nFound in file $file\n";
94bdecf9
JH
1516}
1517
02c5183e 1518sub construct_missings_section {
cda0e7fb 1519 my ($missings_hdr, $missings_ref) = @_;
02c5183e
KW
1520 my $text = "";
1521
cda0e7fb 1522 $text .= "$missings_hdr\n" . format_pod_indexes($missings_ref);
02c5183e 1523
cda0e7fb
KW
1524 if ($missings_ref->@* == 0) {
1525 return $text . "\nThere are currently no items of this type\n";
1526 }
e4f5ddf4
KW
1527
1528 # Sort the elements.
1529 my @missings = sort dictionary_order $missings_ref->@*;
1530
e4f5ddf4 1531
c7a2af98 1532 $text .= "\n";
e4f5ddf4
KW
1533
1534 use integer;
1535
1536 # Look through all the elements in the list and see how many columns we
1537 # could place them in the output what will fit in the available width.
1538 my $min_spacer = 2; # Need this much space between columns
1539 my $columns;
1540 my $rows;
1541 my @col_widths;
1542
1543 COLUMN:
1544 # We start with more columns, and work down until we find a number that
1545 # can accommodate all the data. This algorithm doesn't require the
1546 # resulting columns to all have the same width. This can allow for
1547 # as tight of packing as the data will possibly allow.
d788e675 1548 for ($columns = 7; $columns >= 1; $columns--) {
e4f5ddf4
KW
1549
1550 # For this many columns, we will need this many rows (final row might
1551 # not be completely filled)
1552 $rows = (@missings + $columns - 1) / $columns;
1553
d788e675
KW
1554 # We only need to execute this final iteration to calculate the number
1555 # of rows, as we can't get fewer than a single column.
1556 last if $columns == 1;
1557
9d663ab6 1558 my $row_width = 1; # For 1 space indent
e4f5ddf4
KW
1559 my $i = 0; # Which missing element
1560
1561 # For each column ...
1562 for my $col (0 .. $columns - 1) {
1563
1564 # Calculate how wide the column needs to be, which is based on the
1565 # widest element in it
1566 $col_widths[$col] = 0;
1567
1568 # Look through all the rows to find the widest element
1569 for my $row (0 .. $rows - 1) {
1570
1571 # Skip if this row doesn't have an entry for this column
1572 last if $i >= @missings;
1573
1574 # This entry occupies this many bytes.
1575 my $this_width = length $missings[$i];
1576
1577 # All but the final column need a spacer between it and the
1578 # next column over.
1579 $this_width += $min_spacer if $col < $columns - 1;
1580
1581
1582 # This column will need to have enough width to accommodate
1583 # this element
1584 if ($this_width > $col_widths[$col]) {
1585
1586 # We can't have this many columns if the total width
1587 # exceeds the available; bail now and try fewer columns
1588 next COLUMN if $row_width + $this_width > $max_width;
1589
1590 $col_widths[$col] = $this_width;
1591 }
1592
1593 $i++; # The next row will contain the next item
1594 }
1595
1596 $row_width += $col_widths[$col];
1597 next COLUMN if $row_width > $max_width;
1598 }
1599
1600 # If we get this far, this many columns works
1601 last;
1602 }
1603
1604 # Here, have calculated the number of rows ($rows) and columns ($columns)
1605 # required to list the elements. @col_widths contains the width of each
1606 # column.
1607
fa5d437b 1608 $text .= "\n";
e4f5ddf4
KW
1609
1610 # Assemble the output
1611 for my $row (0 .. $rows - 1) {
1612 for my $col (0 .. $columns - 1) {
1613 $text .= " " if $col == 0; # Indent one to mark as verbatim
1614
1615 my $index = $row + $rows * $col; # Convert 2 dimensions to 1
1616
1617 # Skip if this row doesn't have an entry for this column
1618 next if $index >= @missings;
1619
1620 my $element = $missings[$index];
1621 $text .= $element;
1622
1623 # Add alignment spaces for all but final column
1624 $text .= " " x ($col_widths[$col] - length $element)
1625 if $col < $columns - 1;
1626 }
02c5183e 1627
e4f5ddf4 1628 $text .= "\n"; # End of row
02c5183e
KW
1629 }
1630
02c5183e
KW
1631 return $text;
1632}
1633
1634sub dictionary_order {
431ea43d
KW
1635 # Do a case-insensitive dictionary sort, falling back in stages to using
1636 # everything for determinancy. The initial comparison ignores
1637 # all non-word characters and non-trailing underscores and digits, with
1638 # trailing ones collating to after any other characters. This collation
1639 # order continues in case tie breakers are needed; sequences of digits
1640 # that do get looked at always compare numerically. The first tie
1641 # breaker takes all digits and underscores into account. The next tie
1642 # breaker uses a caseless character-by-character comparison of everything
1643 # (including non-word characters). Finally is a cased comparison.
1644 #
1645 # This gives intuitive results, but obviously could be tweaked.
1646
1647 no warnings 'non_unicode';
1648
1649 local $a = $a;
1650 local $b = $b;
1651
1652 # Convert all digit sequences to same length with leading zeros, so for
1653 # example, 8 will compare less than 16 (using a fill length value that
1654 # should be longer than any sequence in the input).
1655 $a =~ s/(\d+)/sprintf "%06d", $1/ge;
1656 $b =~ s/(\d+)/sprintf "%06d", $1/ge;
1657
1658 # Translate any underscores and digits so they compare after all Unicode
1659 # characters
1660 $a =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1661 $b =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1662
1663 use feature 'state';
1664 # Modify \w, \W to reflect the changes.
1665 state $ud = '\x{110000}-\x{11000A}'; # xlated underscore, digits
1666 state $w = "\\w$ud"; # new \w string
1667 state $mod_w = qr/[$w]/;
1668 state $mod_W = qr/[^$w]/;
1669
1670 # Only \w for initial comparison
1671 my $a_only_word = uc($a =~ s/$mod_W//gr);
1672 my $b_only_word = uc($b =~ s/$mod_W//gr);
1673
1674 # And not initial nor interior underscores nor digits (by squeezing them
1675 # out)
1676 my $a_stripped = $a_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1677 my $b_stripped = $b_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1678
1679 # If the stripped versions differ, use that as the comparison.
1680 my $cmp = $a_stripped cmp $b_stripped;
1681 return $cmp if $cmp;
1682
1683 # For the first tie breaker, repeat, but consider initial and interior
1684 # underscores and digits, again having those compare after all Unicode
1685 # characters
1686 $cmp = $a_only_word cmp $b_only_word;
1687 return $cmp if $cmp;
1688
1689 # Next tie breaker is just a caseless comparison
1690 $cmp = uc($a) cmp uc($b);
1691 return $cmp if $cmp;
1692
1693 # Finally a straight comparison
1694 return $a cmp $b;
f83c6033
KW
1695}
1696
7b73ff98 1697sub output {
cda0e7fb 1698 my ($podname, $header, $dochash, $footer, @missings_refs) = @_;
6a4c4cd4
DM
1699 #
1700 # strip leading '|' from each line which had been used to hide
1701 # pod from pod checkers.
cda0e7fb 1702 s/^\|//gm for $header, $footer, @missings_refs;
6a4c4cd4 1703
7882b24a 1704 my $fh = open_new("pod/$podname.pod", undef,
20046047 1705 {by => "$0 extracting documentation",
f1f44974 1706 from => 'the C source files'}, 1);
e0492643 1707
63da8140 1708 print $fh $header, "\n";
e0492643 1709
02c5183e 1710 for my $section_name (sort dictionary_order keys %valid_sections) {
7a997878 1711 my $section_info = $dochash->{$section_name};
36ea2d2f 1712
5781dcac
KW
1713 # We allow empty sections in perlintern.
1714 if (! $section_info && $podname eq 'perlapi') {
36ea2d2f 1715 warn "Empty section '$section_name'; skipped";
36ea2d2f
KW
1716 next;
1717 }
1718
63da8140
KW
1719 print $fh "\n=head1 $section_name\n";
1720
2aa73c64 1721 if ($section_info->{X_tags}) {
52a83b1c 1722 print $fh "X<$_>" for sort keys $section_info->{X_tags}->%*;
2aa73c64
KW
1723 print $fh "\n";
1724 delete $section_info->{X_tags};
1725 }
1726
5781dcac
KW
1727 if ($podname eq 'perlapi') {
1728 print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1729 if defined $valid_sections{$section_name}{header};
151c3fe5 1730
5781dcac
KW
1731 # Output any heading-level documentation and delete so won't get in
1732 # the way later
1733 if (exists $section_info->{""}) {
1734 print $fh "\n", $section_info->{""}, "\n";
1735 delete $section_info->{""};
1736 }
151c3fe5 1737 }
151c3fe5 1738
74ab7b79 1739 if ($section_info && keys $section_info->%*) {
02c5183e 1740 for my $function_name (sort dictionary_order keys %$section_info) {
5781dcac
KW
1741 docout($fh, $function_name, $section_info->{$function_name});
1742 }
1743 }
1744 else {
2aa73c64
KW
1745 my $pod_type = ($podname eq 'api') ? "public" : "internal";
1746 print $fh "\nThere are currently no $pod_type API items in ",
1747 $section_name, "\n";
20046047 1748 }
00b0fb83
KW
1749
1750 print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
5781dcac
KW
1751 if $podname eq 'perlapi'
1752 && defined $valid_sections{$section_name}{footer};
7b73ff98
NC
1753 }
1754
cda0e7fb
KW
1755
1756 my $first_time = 1;
1757 while (1) {
1758 my $missings_hdr = shift @missings_refs or last;
1759 my $missings_ref = shift @missings_refs or die "Foo";
1760
1761 if ($first_time) {
1762 $first_time = 0;
1763 print $fh <<~EOT;
1764
1765 =head1 $undocumented_scn
1766
1767 EOT
1768 }
1769
1770 print $fh construct_missings_section($missings_hdr, $missings_ref);
1771 }
63da8140
KW
1772
1773 print $fh "\n$footer\n=cut\n";
5a0155e6 1774
7882b24a 1775 read_only_bottom_close_and_rename($fh);
cd093254
MM
1776}
1777
e8e591c9
NC
1778foreach (@{(setup_embed())[0]}) {
1779 next if @$_ < 2;
b1cb8a49 1780 my ($flags, $ret_type, $func, @args) = @$_;
e8e591c9 1781 s/\b(?:NN|NULLOK)\b\s+//g for @args;
bc350081 1782
5ce57792 1783 $funcflags{$func} = {
20046047 1784 flags => $flags,
b1cb8a49 1785 ret_type => $ret_type,
20046047
KE
1786 args => \@args,
1787 };
5ce57792
NC
1788}
1789
5ce57792
NC
1790# glob() picks up docs from extra .c or .h files that may be in unclean
1791# development trees.
741c0772
NC
1792open my $fh, '<', 'MANIFEST'
1793 or die "Can't open MANIFEST: $!";
1794while (my $line = <$fh>) {
b87d9527 1795 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
5ce57792 1796
e10306bf
KW
1797 # Don't pick up pods from these.
1798 next if $file =~ m! ^ ( cpan | dist | ext ) / !x
1799 && ! defined $extra_input_pods{$file};
7cb0b6c4 1800
1ae6ead9 1801 open F, '<', $file or die "Cannot open $file for docs: $!\n";
5ce57792
NC
1802 autodoc(\*F,$file);
1803 close F or die "Error closing $file: $!\n";
1804}
741c0772 1805close $fh or die "Error whilst reading MANIFEST: $!";
5ce57792 1806
00d68f0d
KW
1807parse_config_h();
1808
5ce57792
NC
1809for (sort keys %funcflags) {
1810 next unless $funcflags{$_}{flags} =~ /d/;
6523e108 1811 next if $funcflags{$_}{flags} =~ /h/;
573cf2be 1812 warn "no docs for $_\n";
bc350081 1813}
94bdecf9 1814
5ce57792 1815foreach (sort keys %missing) {
5ce57792 1816 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
94bdecf9
JH
1817}
1818
ff5af78d
KW
1819# List of funcs in the public API that aren't also marked as core-only,
1820# experimental nor deprecated.
cda0e7fb
KW
1821
1822my @undocumented_api = grep { $funcflags{$_}{flags} =~ /A/
1823 && ! $docs{api}{$_}
1824 } keys %funcflags;
1825my @undocumented_intern = grep { $funcflags{$_}{flags} !~ /[AS]/
1826 && ! $docs{intern}{$_}
1827 } keys %funcflags;
1828my @undocumented_deprecated_api = grep { $funcflags{$_}{flags} =~ /D/ }
1829 @undocumented_api;
1830my @undocumented_deprecated_intern = grep { $funcflags{$_}{flags} =~ /D/ }
1831 @undocumented_intern;
1832my @undocumented_experimental_api = grep { $funcflags{$_}{flags} =~ /x/ }
1833 @undocumented_api;
1834my @undocumented_experimental_intern = grep { $funcflags{$_}{flags} =~ /x/ }
1835 @undocumented_intern;
1836my @missing_api = grep { $funcflags{$_}{flags} !~ /[xD]/ } @undocumented_api;
00d68f0d 1837push @missing_api, keys %missing_macros;
a026e430 1838
cda0e7fb
KW
1839my @missing_intern = grep { $funcflags{$_}{flags} !~ /[xD]/ }
1840 @undocumented_intern;
1841
e10306bf 1842my @other_places = ( qw(perlclib ), keys %described_elsewhere );
ea7c1d7a
KW
1843my $places_other_than_intern = join ", ",
1844 map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
1845my $places_other_than_api = join ", ",
1846 map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
a026e430 1847
00d68f0d 1848# The S< > makes things less densely packed, hence more readable
02c5183e
KW
1849my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
1850my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs;
3951f999
KW
1851$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1852$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
00d68f0d 1853
02c5183e 1854my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
3951f999 1855$valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
00d68f0d 1856
55d5a9fe
KW
1857my $section_list = join "\n\n", map { "=item L</$_>" }
1858 sort(dictionary_order keys %valid_sections),
1859 $undocumented_scn; # Keep last
7260d4ba 1860
cda0e7fb
KW
1861# Leading '|' is to hide these lines from pod checkers. khw is unsure if this
1862# is still needed.
1863my $api_hdr = <<"_EOB_";
6a4c4cd4
DM
1864|=encoding UTF-8
1865|
1866|=head1 NAME
1867|
1868|perlapi - autogenerated documentation for the perl public API
1869|
1870|=head1 DESCRIPTION
1871|X<Perl API> X<API> X<api>
1872|
b87d9527
KW
1873|This file contains most of the documentation of the perl public API, as
1874|generated by F<embed.pl>. Specifically, it is a listing of functions,
a026e430
KW
1875|macros, flags, and variables that may be used by extension writers. Besides
1876|L<perlintern> and F<config.h>, some items are listed here as being actually
1877|documented in another pod.
b87d9527 1878|
55d5a9fe 1879|L<At the end|/$undocumented_scn> is a list of functions which have yet
b87d9527
KW
1880|to be documented. Patches welcome! The interfaces of these are subject to
1881|change without notice.
1882|
6927cd44
KW
1883|Some of the functions documented here are consolidated so that a single entry
1884|serves for multiple functions which all do basically the same thing, but have
1885|some slight differences. For example, one form might process magic, while
1886|another doesn't. The name of each variation is listed at the top of the
1887|single entry. But if all have the same signature (arguments and return type)
1888|except for their names, only the usage for the base form is shown. If any
1889|one of the forms has a different signature (such as returning C<const> or
1890|not) every function's signature is explicitly displayed.
7260d4ba 1891|
a026e430
KW
1892|Anything not listed here or in the other mentioned pods is not part of the
1893|public API, and should not be used by extension writers at all. For these
1894|reasons, blindly using functions listed in F<proto.h> is to be avoided when
1895|writing extensions.
6a4c4cd4
DM
1896|
1897|In Perl, unlike C, a string of characters may generally contain embedded
1898|C<NUL> characters. Sometimes in the documentation a Perl string is referred
1899|to as a "buffer" to distinguish it from a C string, but sometimes they are
1900|both just referred to as strings.
1901|
1902|Note that all Perl API global variables must be referenced with the C<PL_>
1903|prefix. Again, those not listed here are not to be used by extension writers,
c5321c96 1904|and may be changed or removed without notice; same with macros.
6a4c4cd4
DM
1905|Some macros are provided for compatibility with the older,
1906|unadorned names, but this support may be disabled in a future release.
1907|
1908|Perl was originally written to handle US-ASCII only (that is characters
1909|whose ordinal numbers are in the range 0 - 127).
1910|And documentation and comments may still use the term ASCII, when
1911|sometimes in fact the entire range from 0 - 255 is meant.
1912|
1913|The non-ASCII characters below 256 can have various meanings, depending on
1914|various things. (See, most notably, L<perllocale>.) But usually the whole
1915|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
1916|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
1917|"Latin1" as referring just to the characters in the range 128 through 255, or
2de859ce 1918|sometimes from 160 through 255.
6a4c4cd4
DM
1919|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1920|
1921|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1922|L<perlebcdic>). Most of the documentation (and even comments in the code)
1923|ignore the EBCDIC possibility.
1924|For almost all purposes the differences are transparent.
1925|As an example, under EBCDIC,
1926|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1927|whenever this documentation refers to C<utf8>
1928|(and variants of that name, including in function names),
1929|it also (essentially transparently) means C<UTF-EBCDIC>.
1930|But the ordinals of characters differ between ASCII, EBCDIC, and
1931|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1932|number of bytes than in UTF-8.
1933|
6927cd44
KW
1934|The organization of this document is tentative and subject to change.
1935|Suggestions and patches welcome
1936|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1937|
1938|The sections in this document currently are
1939|
014f1aa3 1940|=over $standard_indent
6927cd44
KW
1941
1942|$section_list
1943|
1944|=back
1945|
6a4c4cd4 1946|The listing below is alphabetical, case insensitive.
94bdecf9 1947_EOB_
cda0e7fb
KW
1948
1949my $api_footer = <<"_EOE_";
6a4c4cd4
DM
1950|=head1 AUTHORS
1951|
1952|Until May 1997, this document was maintained by Jeff Okamoto
7b1f0a98 1953|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
6a4c4cd4
DM
1954|
1955|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1956|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1957|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1958|Stephen McCamant, and Gurusamy Sarathy.
1959|
7b1f0a98 1960|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
6a4c4cd4
DM
1961|
1962|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1963|
1964|=head1 SEE ALSO
1965|
ea7c1d7a 1966|F<config.h>, $places_other_than_api
94bdecf9
JH
1967_EOE_
1968
cda0e7fb
KW
1969my $api_missings_hdr = <<'_EOT_';
1970|The following functions have been flagged as part of the public
1971|API, but are currently undocumented. Use them at your own risk,
1972|as the interfaces are subject to change. Functions that are not
1973|listed in this document are not intended for public use, and
1974|should NOT be used under any circumstances.
1975|
1976|If you feel you need to use one of these functions, first send
1977|email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
1978|It may be that there is a good reason for the function not being
1979|documented, and it should be removed from this list; or it may
1980|just be that no one has gotten around to documenting it. In the
1981|latter case, you will be asked to submit a patch to document the
1982|function. Once your patch is accepted, it will indicate that the
1983|interface is stable (unless it is explicitly marked otherwise) and
1984|usable by you.
1985_EOT_
1986
1987my $api_experimental_hdr = <<"_EOT_";
1988|
1989|Next are the API-flagged elements that are considered experimental. Using one
1990|of these is even more risky than plain undocumented ones. They are listed
1991|here because they should be listed somewhere (so their existence doesn't get
1992|lost) and this is the best place for them.
1993_EOT_
1994
1995my $api_deprecated_hdr = <<"_EOT_";
1996|
1997|Finally are deprecated undocumented API elements.
1998|Do not use any for new code; remove all occurrences of all of these from
1999|existing code.
2000_EOT_
2001
2002output('perlapi', $api_hdr, $docs{api}, $api_footer,
2003 $api_missings_hdr, \@missing_api,
2004 $api_experimental_hdr, \@undocumented_experimental_api,
2005 $api_deprecated_hdr, \@undocumented_deprecated_api);
5a0155e6 2006
cda0e7fb 2007my $intern_hdr = <<"_EOB_";
6a4c4cd4
DM
2008|=head1 NAME
2009|
2010|perlintern - autogenerated documentation of purely B<internal>
20046047 2011|Perl functions
6a4c4cd4
DM
2012|
2013|=head1 DESCRIPTION
2014|X<internal Perl functions> X<interpreter functions>
2015|
2016|This file is the autogenerated documentation of functions in the
2017|Perl interpreter that are documented using Perl's internal documentation
2018|format but are not marked as part of the Perl API. In other words,
2019|B<they are not for use in extensions>!
5781dcac
KW
2020
2021|It has the same sections as L<perlapi>, though some may be empty.
6a4c4cd4 2022|
7b1f0a98 2023_EOB_
cda0e7fb
KW
2024
2025my $intern_footer = <<"_EOE_";
6a4c4cd4
DM
2026|
2027|=head1 AUTHORS
2028|
2029|The autodocumentation system was originally added to the Perl core by
2030|Benjamin Stuhl. Documentation is by whoever was kind enough to
2031|document their functions.
2032|
2033|=head1 SEE ALSO
2034|
ea7c1d7a 2035|F<config.h>, $places_other_than_intern
7b1f0a98 2036_EOE_
cda0e7fb
KW
2037
2038my $intern_missings_hdr = <<"_EOT_";
2039|
2040|This section lists the elements that are otherwise undocumented. If you use
2041|any of them, please consider creating and submitting documentation for it.
2042|
2043|Experimental and deprecated undocumented elements are listed separately at the
2044|end.
2045|
2046_EOT_
2047
2048my $intern_experimental_hdr = <<"_EOT_";
2049|
2050|Next are the experimental undocumented elements
2051|
2052_EOT_
2053
2054my $intern_deprecated_hdr = <<"_EOT_";
2055|
2056|Finally are the deprecated undocumented elements.
2057|Do not use any for new code; remove all occurrences of all of these from
2058|existing code.
2059|
2060_EOT_
2061
2062output('perlintern', $intern_hdr, $docs{intern}, $intern_footer,
2063 $intern_missings_hdr, \@missing_intern,
2064 $intern_experimental_hdr, \@undocumented_experimental_intern,
2065 $intern_deprecated_hdr, \@undocumented_deprecated_intern
2066 );