This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Takai Kousuke is now a Perl author
[perl5.git] / autodoc.pl
CommitLineData
94bdecf9 1#!/usr/bin/perl -w
00d68f0d
KW
2
3use Text::Tabs;
a9a7c81e 4#
6294c161
DM
5# Unconditionally regenerate:
6#
7# pod/perlintern.pod
8# pod/perlapi.pod
9#
10# from information stored in
11#
12# embed.fnc
9b983db7 13# plus all the core .c, .h, and .pod files listed in MANIFEST
6294c161
DM
14#
15# Has an optional arg, which is the directory to chdir to before reading
9b983db7 16# MANIFEST and the files
6294c161 17#
52a9d53b 18# This script is invoked as part of 'make all'
151c3fe5 19#
9b983db7
KW
20# The generated pod consists of sections of related elements, functions,
21# macros, and variables. The keys of %valid_sections give the current legal
22# ones. Just add a new key to add a section.
23#
24# Throughout the files read by this script are lines like
25#
26# =for apidoc_section Section Name
27#
28# "Section Name" (after having been stripped of leading space) must be one of
29# the legal section names, or an error is thrown. All API elements defined
30# between this line and the next 'apidoc_section' line will go into the
31# section "Section Name", sorted by dictionary order within it. perlintern
32# and perlapi are parallel documents, each potentially with a section "Section
33# Name". Each element is marked as to which document it goes into. If there
34# are none for a particular section in perlapi, that section is
35# omitted.
36#
37# Also, in .[ch] files, there may be
38#
39# =head1 Section Name
40#
41# lines in comments. These are also used by this program to switch to section
42# "Section Name". The difference is that if there are any lines after the
43# =head1, inside the same comment, and before any =for apidoc-ish lines, they
44# are used as a heading for section "Section Name" (in both perlintern and
45# perlapi). This includes any =head[2-5]. If more than one '=head1 Section
46# Name' line has content, they appear in the generated pod in an undefined
47# order.
48#
49# The next =head1, =for apidoc_section, or file end terminates what goes into
50# the current section
51#
52# The %valid_sections hash below also can have header content, which will
53# appear before any =head1 content. The hash can also have footer content
54# content, which will appear at the end of the section, after all the
55# elements.
56#
57# The lines that define the actual functions, etc are documented in embed.fnc,
58# because they have flags which must be kept in sync with that file.
1fcde0e9 59
56a0c332 60use strict;
04adaf08 61use warnings;
a64c954a 62
7882b24a
NC
63if (@ARGV) {
64 my $workdir = shift;
65 chdir $workdir
66 or die "Couldn't chdir to '$workdir': $!";
67}
3d7c117d
MB
68require './regen/regen_lib.pl';
69require './regen/embed_lib.pl';
7882b24a 70
a026e430 71my %described_elsewhere;
b87d9527 72
94bdecf9 73#
346f75ff 74# See database of global and static function prototypes in embed.fnc
94bdecf9
JH
75# This is used to generate prototype headers under various configurations,
76# export symbols lists for different platforms, and macros to provide an
77# implicit interpreter context argument.
78#
79
6a235718 80my %docs;
df6bd76f 81my %seen;
5ce57792 82my %funcflags;
5ce57792 83my %missing;
00d68f0d
KW
84my %missing_macros;
85
86my $link_text = "Described in";
87
783469eb
KW
88my $description_indent = 4;
89my $usage_indent = 3; # + initial blank yields 4 total
90
00d68f0d 91my %valid_sections = (
6927cd44
KW
92 'AV Handling' => {},
93 'Callback Functions' => {},
94 'Casting' => {},
95 'Character case changing' => {},
96 'Character classification' => {},
97 'Compile-time scope hooks' => {},
98 'Compiler and Preprocessor information' => {},
99 'Compiler directives' => {},
100 'Concurrency' => {},
101 'COP Hint Hashes' => {},
102 'CV Handling' => {},
103 'Custom Operators' => {},
104 'Display and Dump functions' => {},
105 'Embedding and Interpreter Cloning' => {},
106 'Errno' => {},
107 'Exception Handling (simple) Macros' => {},
108 'Filesystem configuration values' => {
109 header => <<~'EOT',
110 Also see L</List of capability HAS_foo symbols>.
111 EOT
112 },
00d68f0d
KW
113 'Floating point configuration values' => {
114 header => <<~'EOT',
6927cd44
KW
115 Also L</List of capability HAS_foo symbols> lists capabilities
116 that arent in this section. For example C<HAS_ASINH>, for the
117 hyperbolic sine function.
118 EOT
119 },
120 'Formats' => {
121 header => <<~'EOT',
122 These are used for formatting the corresponding type For example,
123 instead of saying
124
125 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
126
127 use
128
129 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
130
131 This keeps you from having to know if, say an IV, needs to be
132 printed as C<%d>, C<%ld>, or something else.
00d68f0d
KW
133 EOT
134 },
135 'General Configuration' => {
136 header => <<~'EOT',
137 This section contains configuration information not otherwise
138 found in the more specialized sections of this document. At the
139 end is a list of C<#defines> whose name should be enough to tell
140 you what they do, and a list of #defines which tell you if you
141 need to C<#include> files to get the corresponding functionality.
142 EOT
143
144 footer => <<~'EOT',
145
146 =head2 List of capability C<HAS_I<foo>> symbols
147
6927cd44
KW
148 This is a list of those symbols that dont appear elsewhere in ths
149 document that indicate if the current platform has a certain
150 capability. Their names all begin with C<HAS_>. Only those
151 symbols whose capability is directly derived from the name are
152 listed here. All others have their meaning expanded out elsewhere
153 in this document. This (relatively) compact list is because we
154 think that the expansion would add little or no value and take up
155 a lot of space (because there are so many). If you think certain
156 ones should be expanded, send email to
157 L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
00d68f0d
KW
158
159 Each symbol here will be C<#define>d if and only if the platform
160 has the capability. If you need more detail, see the
6927cd44
KW
161 corresponding entry in F<config.h>. For convenience, the list is
162 split so that the ones that indicate there is a reentrant version
163 of a capability are listed separately
00d68f0d
KW
164
165 __HAS_LIST__
166
6927cd44
KW
167 And, the reentrant capabilities:
168
169 __HAS_R_LIST__
170
00d68f0d
KW
171 Example usage:
172
173 =over
174
175 #ifdef HAS_STRNLEN
176 use strnlen()
177 #else
178 use an alternative implementation
179 #endif
180
181 =back
182
183 =head2 List of C<#include> needed symbols
184
185 This list contains symbols that indicate if certain C<#include>
186 files are present on the platform. If your code accesses the
187 functionality that one of these is for, you will need to
188 C<#include> it if the symbol on this list is C<#define>d. For
189 more detail, see the corresponding entry in F<config.h>.
190
191 __INCLUDE_LIST__
192
193 Example usage:
194
195 =over
196
197 #ifdef I_WCHAR
198 #include <wchar.h>
199 #endif
200
201 =back
202 EOT
203 },
6927cd44
KW
204 'Global Variables' => {},
205 'GV Handling' => {},
206 'Hook manipulation' => {},
207 'HV Handling' => {},
208 'Input/Output' => {},
209 'Integer configuration values' => {},
210 'Lexer interface' => {},
211 'Locales' => {},
212 'Magic' => {},
213 'Memory Management' => {},
214 'MRO' => {},
215 'Multicall Functions' => {},
216 'Numeric Functions' => {},
217 'Optree construction' => {},
218 'Optree Manipulation Functions' => {},
219 'Pack and Unpack' => {},
220 'Pad Data Structures' => {},
221 'Password and Group access' => {},
222 'Paths to system commands' => {},
223 'Per-Interpreter Variables' => {},
224 'Prototype information' => {},
225 'REGEXP Functions' => {},
226 'Signals' => {},
227 'Site configuration' => {
228 header => <<~'EOT',
229 These variables give details as to where various libraries,
230 installation destinations, I<etc.>, go, as well as what various
231 installation options were selected
232 EOT
233 },
234 'Sockets configuration values' => {},
235 'Source Filters' => {},
236 'Stack Manipulation Macros' => {},
237 'String Handling' => {
238 header => <<~'EOT',
239 See also C<L</Unicode Support>>.
240 EOT
241 },
242 'SV Flags' => {},
243 'SV Handling' => {},
244 'Time' => {},
245 'Typedef names' => {},
246 'Unicode Support' => {},
247 'Utility Functions' => {},
248 'Versioning' => {},
249 'Warning and Dieing' => {},
250 'XS' => {},
00d68f0d 251);
94bdecf9 252
9e019b59
KW
253# Somewhat loose match for an apidoc line so we can catch minor typos.
254# Parentheses are used to capture portions so that below we verify
255# that things are the actual correct syntax.
256my $apidoc_re = qr/ ^ (\s*) # $1
257 (=?) # $2
258 (\s*) # $3
259 for (\s*) # $4
260 apidoc (_item)? # $5
261 (\s*) # $6
262 (.*?) # $7
263 \s* \n /x;
183f6e59
KW
264# Only certain flags, dealing with display, are acceptable for apidoc_item
265my $display_flags = "fFnDopsT";
9e019b59
KW
266
267sub check_api_doc_line ($$) {
268 my ($file, $in) = @_;
269
270 return unless $in =~ $apidoc_re;
271
272 my $is_item = defined $5;
273 my $is_in_proper_form = length $1 == 0
274 && length $2 > 0
275 && length $3 == 0
276 && length $4 > 0
277 && length $6 > 0
278 && length $7 > 0;
279 my $proto_in_file = $7;
280 my $proto = $proto_in_file;
281 $proto = "||$proto" if $proto !~ /\|/;
282 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
283
284 $name && $is_in_proper_form or die <<EOS;
285Bad apidoc at $file line $.:
286 $in
287Expected:
288 =for apidoc flags|returntype|name|arg|arg|...
289 =for apidoc flags|returntype|name
290 =for apidoc name
291(or 'apidoc_item')
292EOS
293
783469eb
KW
294 die "Only [$display_flags] allowed in apidoc_item"
295 if $is_item && $flags =~ /[^$display_flags]/;
296
9e019b59
KW
297 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
298}
94bdecf9 299
4be3b9ca
KW
300sub embed_override($) {
301 my ($element_name) = shift;
302
303 # If the entry is also in embed.fnc, it should be defined
304 # completely there, but not here
305 my $embed_docref = delete $funcflags{$element_name};
306
307 return unless $embed_docref and %$embed_docref;
308
309 my $flags = $embed_docref->{'flags'};
310 warn "embed.fnc entry '$element_name' missing 'd' flag"
311 unless $flags =~ /d/;
312
313 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
314}
315
94bdecf9
JH
316sub autodoc ($$) { # parse a file and extract documentation info
317 my($fh,$file) = @_;
63da8140 318 my($in, $line_num, $header, $section);
f554dfc5 319
3cfa66c4
KW
320 my $file_is_C = $file =~ / \. [ch] $ /x;
321
f554dfc5 322 # Count lines easier
b1cb8a49 323 my $get_next_line = sub { $line_num++; return <$fh> };
f554dfc5 324
63da8140
KW
325 # Read the file
326 while ($in = $get_next_line->()) {
327 last unless defined $in;
3cfa66c4 328
63da8140
KW
329 next unless ( $in =~ / ^ =for [ ]+ apidoc /x
330 # =head1 lines only have effect in C files
331 || ($file_is_C && $in =~ /^=head1/));
332
333 # Here, the line introduces a portion of the input that we care about.
334 # Either it is for an API element, or heading text which we expect
335 # will be used for elements later in the file
336
337 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
183f6e59 338 my (@args, @items);
63da8140
KW
339
340 # If the line starts a new section ...
341 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
183f6e59 342
b1cb8a49 343 $section = $1;
6927cd44
KW
344 die "Unknown section name '$section' in $file near line $.\n"
345 unless defined $valid_sections{$section};
346
3cfa66c4 347 }
63da8140
KW
348 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a
349 # plain apidoc line
183f6e59
KW
350 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
351 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
63da8140
KW
352 }
353 else { # Plain apidoc
354
355 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
356 = check_api_doc_line($file, $in);
4be3b9ca
KW
357 # Override this line with any info in embed.fnc
358 my ($embed_flags, $embed_ret_type, @embed_args)
359 = embed_override($element_name);
360 if ($embed_ret_type) {
63da8140 361 warn "embed.fnc entry overrides redundant information in"
4be3b9ca
KW
362 . " '$proto_in_file' in $file"
363 if $flags || $ret_type || @args;
364 $flags = $embed_flags;
365 $ret_type = $embed_ret_type;
366 @args = @embed_args;
367 }
368 elsif ($flags !~ /m/) { # Not in embed.fnc, is missing if not a macro
63da8140
KW
369 $missing{$element_name} = $file;
370 }
3cfa66c4 371
63da8140
KW
372 die "flag $1 is not legal (for function $element_name (from $file))"
373 if $flags =~ / ( [^AabCDdEeFfhiMmNnTOoPpRrSsUuWXx] ) /x;
151c3fe5 374
63da8140
KW
375 die "'u' flag must also have 'm' flag' for $element_name"
376 if $flags =~ /u/ && $flags !~ /m/;
377 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
378 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
f554dfc5 379
63da8140
KW
380 if (exists $seen{$element_name} && $flags !~ /h/) {
381 die ("'$element_name' in $file was already documented in $seen{$element_name}");
382 }
383 else {
384 $seen{$element_name} = $file;
151c3fe5 385 }
94bdecf9 386 }
df6bd76f 387
63da8140
KW
388 # Here we have processed the initial line in the heading text or API
389 # element, and have saved the important information from it into the
390 # corresponding variables. Now accumulate the text that applies to it
391 # up to a terminating line, which is one of:
392 # 1) =cut
393 # 2) =head (in a C file only =head1)
394 # 3) an end comment line in a C file: m:^\s*\*/:
183f6e59 395 # 4) =for apidoc... (except apidoc_item lines)
63da8140
KW
396 $text = "";
397 my $head_ender_num = ($file_is_C) ? 1 : "";
398 while (defined($in = $get_next_line->())) {
399
400 last if $in =~ /^=cut/x;
401 last if $in =~ /^=head$head_ender_num/;
402
403 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
404
405 # End of comment line in C files is a fall-back terminator,
406 # but warn only if there actually is some accumulated text
407 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
408 last;
df6bd76f 409 }
df6bd76f 410
63da8140
KW
411 if ($in !~ / ^ =for [ ]+ apidoc /x) {
412 $text .= $in;
413 next;
414 }
415
183f6e59 416 # Here, the line is an apidoc line. All but apidoc_item terminate
63da8140 417 # the text being accumulated.
183f6e59
KW
418 last if $in =~ / ^ =for [ ]+ apidoc_section /x;
419
420 my ($item_name, $item_flags, $item_ret_type, $is_item,
4be3b9ca 421 $item_proto, @item_args) = check_api_doc_line($file, $in);
183f6e59
KW
422 last unless $is_item;
423
424 # Here, is an apidoc_item_line; They can only come within apidoc
425 # paragraphs.
4be3b9ca
KW
426 die "Unexpected api_doc_item line '$item_proto'"
427 unless $element_name;
183f6e59
KW
428
429 # We accept blank lines between these, but nothing else;
430 die "apidoc_item lines must immediately follow apidoc lines"
431 if $text =~ /\S/;
4be3b9ca
KW
432 # Override this line with any info in embed.fnc
433 my ($embed_flags, $embed_ret_type, @embed_args)
434 = embed_override($item_name);
435 if ($embed_ret_type) {
436 warn "embed.fnc entry overrides redundant information in"
437 . " '$item_proto' in $file"
438 if $item_flags || $item_ret_type || @item_args;
439
440 $item_flags = $embed_flags;
441 $item_ret_type = $embed_ret_type;
442 @item_args = @embed_args;
443 }
183f6e59
KW
444
445 # Use the base entry flags if none for this item; otherwise add in
446 # any non-display base entry flags.
447 if ($item_flags) {
448 $item_flags .= $flags =~ s/[$display_flags]//rg;
449 }
450 else {
451 $item_flags = $flags;
452 }
453 $item_ret_type = $ret_type unless $item_ret_type;
454 @item_args = @args unless @item_args;
455 push @items, { name => $item_name,
456 ret_type => $item_ret_type,
457 flags => $item_flags,
458 args => [ @item_args ],
459 };
460
461 # This line shows that this element is documented.
462 delete $funcflags{$item_name};
a9a7c81e 463 }
3cfa66c4 464
63da8140
KW
465 # Here, are done accumulating the text for this item. Trim it
466 $text =~ s/ ^ \s* //x;
467 $text =~ s/ \s* $ //x;
468 $text .= "\n" if $text ne "";
6523e108 469
63da8140
KW
470 # And treat all-spaces as nothing at all
471 undef $text unless $text =~ /\S/;
df6bd76f 472
63da8140 473 if ($element_name) {
df6bd76f 474
63da8140
KW
475 # Here, we have accumulated into $text, the pod for $element_name
476 my $where = $flags =~ /A/ ? 'api' : 'guts';
5ce57792 477
63da8140
KW
478 $section = "Functions in file $file" unless defined $section;
479 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
480 unless defined $section;
481 if (exists $docs{$where}{$section}{$element_name}) {
482 warn "$0: duplicate API entry for '$element_name' in"
483 . " $where/$section\n";
484 next;
20046047 485 }
5ce57792 486
63da8140
KW
487 # Override the text with just a link if the flags call for that
488 my $is_link_only = ($flags =~ /h/);
489 if ($is_link_only) {
490 if ($file_is_C) {
183f6e59 491 die "Can't currently handle link with items to it" if @items;
63da8140
KW
492 redo; # Don't put anything if C source
493 }
494
495 # Here, is an 'h' flag in pod. We add a reference to the pod (and
496 # nothing else) to perlapi/intern. (It would be better to add a
497 # reference to the correct =item,=header, but something that makes
498 # it harder is that it that might be a duplicate, like '=item *';
499 # so that is a future enhancement XXX. Another complication is
500 # there might be more than one deserving candidates.)
501 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s)
502 $podname =~ s/\.pod//;
503 $text = "Described in L<$podname>.\n";
504
505 # Don't output a usage example for linked to documentation if
506 # it is trivial (has no arguments) and we aren't to add a
507 # semicolon
508 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
509
510 # Keep track of all the pod files that we refer to.
511 push $described_elsewhere{$podname}->@*, $podname;
512 }
5ce57792 513
5bf02caf
KW
514 $docs{$where}{$section}{$element_name}{flags} = $flags;
515 $docs{$where}{$section}{$element_name}{pod} = $text;
516 $docs{$where}{$section}{$element_name}{file} = $file;
517 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
518 push $docs{$where}{$section}{$element_name}{args}->@*, @args;
183f6e59 519 push $docs{$where}{$section}{$element_name}{items}->@*, @items;
a9a7c81e 520 }
63da8140
KW
521 elsif ($text) {
522 $valid_sections{$section}{header} = "" unless
523 defined $valid_sections{$section}{header};
524 $valid_sections{$section}{header} .= "\n$text";
a9a7c81e 525 }
63da8140
KW
526
527 # We already have the first line of what's to come in $in
528 redo;
529
530 } # End of loop through input
94bdecf9
JH
531}
532
00d68f0d
KW
533my %configs;
534my @has_defs;
535my @has_r_defs; # Reentrant symbols
536my @include_defs;
537sub parse_config_h {
538 use re '/aa'; # Everthing is ASCII in this file
539
540 # Process config.h
d3871820
KW
541 my $config_h = 'config.h';
542 $config_h = 'win32/config.h' unless -e $config_h;
543 die "Can't find $config_h" unless -e $config_h;
544 open my $fh, '<', $config_h or die "Can't open $config_h: $!";
00d68f0d
KW
545 while (<$fh>) {
546
547 # Look for lines like /* FOO_BAR:
548 # By convention all config.h descriptions begin like that
549 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
550 my $name = $1;
551
552 # Here we are starting the description for $name in config.h. We
553 # accumulate the entire description for it into @description.
554 # Flowing text from one input line to another is appended into the
555 # same array element to make a single flowing line element, but
556 # verbatim lines are kept as separate elements in @description.
557 # This will facilitate later doing pattern matching without regard
558 # to line boundaries on non-verbatim text.
559
560 die "Multiple config.h entries for '$name'"
561 if defined $configs{$name}{description};
562
563 # Get first line of description
564 $_ = <$fh>;
565
566 # Each line in the description begins with blanks followed by '/*'
567 # and some spaces.
568 die "Unexpected config.h initial line for $name: '$_'"
569 unless s/ ^ ( \s* \* \s* ) //x;
570 my $initial_text = $1;
571
572 # Initialize the description with this first line (after having
573 # stripped the prefix text)
574 my @description = $_;
575
576 # The first line is used as a template for how much indentation
577 # each normal succeeding line has. Lines indented further
578 # will be considered as intended to be verbatim. But, empty lines
579 # likely won't have trailing blanks, so just strip the whole thing
580 # for them.
581 my $strip_initial_qr = qr! \s* \* \s* $
582 | \Q$initial_text\E
583 !x;
584 $configs{$name}{verbatim} = 0;
585
586 # Read in the remainder of the description
587 while (<$fh>) {
588 last if s| ^ \s* \* / ||x; # A '*/' ends it
589
590 die "Unexpected config.h description line for $name: '$_'"
591 unless s/$strip_initial_qr//;
592
593 # Fix up the few flawed lines in config.h wherein a new
594 # sentence begins with a tab (and maybe a space after that).
595 # Although none of them currently do, let it recognize
596 # something like
597 #
598 # "... text"). The next sentence ...
599 #
600 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg;
601
602 # If this line has extra indentation or looks to have columns,
603 # it should be treated as verbatim. Columns are indicated by
604 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
605 # not preceded by punctuation.
606 if ($_ !~ m/ ^ \s
607 | \S (?: \t
608 | \s{3}
609 | (*nlb:[[:punct:]]) \s{2}
610 )
611 /x)
612 {
613 # But here, is not a verbatim line. Add an empty line if
614 # this is the first non-verbatim after a run of verbatims
615 if ($description[-1] =~ /^\s/) {
616 push @description, "\n", $_;
617 }
618 else { # Otherwise, append this flowing line to the
619 # current flowing line
620 $description[-1] .= $_;
621 }
622 }
623 else {
624 $configs{$name}{verbatim} = 1;
625
626 # The first verbatim line in a run of them is separated by an
627 # empty line from the flowing lines above it
628 push @description, "\n" if $description[-1] =~ /^\S/;
629
630 $_ = Text::Tabs::expand($_);
631
632 # Only a single space so less likely to wrap
633 s/ ^ \s* / /x;
634
635 push @description, $_;
636 }
637 }
638
639 push $configs{$name}{description}->@*, @description
640
641 } # Not a description; see if it is a macro definition.
642 elsif (m! ^
643 (?: / \* )? # Optional commented-out
644 # indication
645 \# \s* define \s+ ( \w+ ) # $1 is the name
646 ( \s* ) # $2 indicates if args or not
647 ( .*? ) # $3 is any definition
648 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/
649 $
650 !x)
651 {
652 my $name = $1;
653
654 # There can be multiple definitions for a name. We want to know
655 # if any of them has arguments, and if any has a body.
656 $configs{$name}{has_args} //= $2 eq "";
657 $configs{$name}{has_args} ||= $2 eq "";
658 $configs{$name}{has_defn} //= $3 ne "";
659 $configs{$name}{has_defn} ||= $3 ne "";
660 }
661 }
662
663 # We now have stored the description and information about every #define
664 # in the file. The description is in a form convenient to operate on to
665 # convert to pod. Do that now.
666 foreach my $name (keys %configs) {
667 next unless defined $configs{$name}{description};
668
669 # All adjacent non-verbatim lines of the description are appended
670 # together in a single element in the array. This allows the patterns
671 # to work across input line boundaries.
672
673 my $pod = "";
674 while (defined ($_ = shift $configs{$name}{description}->@*)) {
675 chomp;
676
677 if (/ ^ \S /x) { # Don't edit verbatim lines
678
679 # Enclose known file/path names not already so enclosed
680 # with <...>. (Some entries in config.h are already
681 # '<path/to/file>')
682 my $file_name_qr = qr! [ \w / ]+ \.
683 (?: c | h | xs | p [lm] | pmc | PL
684 | sh | SH | exe ) \b
685 !xx;
686 my $path_name_qr = qr! (?: / \w+ )+ !x;
687 for my $re ($file_name_qr, $path_name_qr) {
688 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
689 }
690
691 # Enclose <... file/path names with F<...> (but no double
692 # angle brackets)
693 for my $re ($file_name_qr, $path_name_qr) {
694 s! < ( $re ) > !F<$1>!gxx;
695 }
696
697 # Explain metaconfig units
698 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
699
700 # Convert "See foo" to "See C<L</foo>>" if foo is described in
701 # this file. Also create a link to the known file INSTALL.
702 # And, to be more general, handle "See also foo and bar", and
703 # "See also foo, bar, and baz"
704 while (m/ \b [Ss]ee \s+
705 (?: also \s+ )? ( \w+ )
706 (?: , \s+ ( \w+ ) )?
707 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
708 my @links = $1;
709 push @links, $2 if defined $2;
710 push @links, $3 if defined $3;
711 foreach my $link (@links) {
712 if ($link eq 'INSTALL') {
713 s/ \b INSTALL \b /C<L<INSTALL>>/xg;
714 }
715 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
716 s| \b $link \b |C<L</$link>>|xg;
717 $configs{$link}{linked} = 1;
718 $configs{$name}{linked} = 1;
719 }
720 }
721 }
722
723 # Enclose what we think are symbols with C<...>.
724 no warnings 'experimental::vlb';
725 s/ (*nlb:<)
726 (
727 # Any word followed immediately with parens or
728 # brackets
729 \b \w+ (?: \( [^)]* \) # parameter list
730 | \[ [^]]* \] # or array reference
731 )
732 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols.
733 | \b (?: struct | union ) \s \w+
734
735 # Words that contain underscores (which are
736 # definitely not text) or three uppercase letters in
737 # a row. Length two ones, like IV, aren't enclosed,
738 # because they often don't look as nice.
739 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
740 )
741 (*nla:>)
742 /C<$1>/xg;
743
744 # These include foo when the name is HAS_foo. This is a
745 # heuristic which works in most cases.
746 if ($name =~ / ^ HAS_ (.*) /x) {
747 my $symbol = lc $1;
748
749 # Don't include path components, nor things already in
750 # <>, or with trailing '(', '['
751 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
752 }
753 }
754
755 $pod .= "$_\n";
756 }
757 delete $configs{$name}{description};
758
759 $configs{$name}{pod} = $pod;
760 }
761
762 # Now have converted the description to pod. We also now have enough
763 # information that we can do cross checking to find definitions without
764 # corresponding pod, and see if they are mentioned in some description;
765 # otherwise they aren't documented.
766 NAME:
767 foreach my $name (keys %configs) {
768
769 # A definition without pod
770 if (! defined $configs{$name}{pod}) {
771
772 # Leading/trailing underscore means internal to config.h, e.g.,
773 # _GNU_SOURCE
774 next if $name =~ / ^ _ /x;
775 next if $name =~ / _ $ /x;
776
777 # MiXeD case names are internal to config.h; the first 4
778 # characters are sufficient to determine this
779 next if $name =~ / ^ [[:upper:]] [[:lower:]]
780 [[:upper:]] [[:lower:]]
781 /x;
782
783 # Here, not internal to config.h. Look to see if this symbol is
784 # mentioned in the pod of some other. If so, assume it is
785 # documented.
786 foreach my $check_name (keys %configs) {
787 my $this_element = $configs{$check_name};
788 my $this_pod = $this_element->{pod};
789 if (defined $this_pod) {
790 next NAME if $this_pod =~ / \b $name \b /x;
791 }
792 }
793
794 warn "$name has no documentation\n";
795 $missing_macros{$name} = 'config.h';
796
797 next;
798 }
799
800 my $has_defn = $configs{$name}{has_defn};
801 my $has_args = $configs{$name}{has_args};
802
803 # Check if any section already has an entry for this element.
804 # If so, it better be a placeholder, in which case we replace it
805 # with this entry.
806 foreach my $section (keys $docs{'api'}->%*) {
807 if (exists $docs{'api'}{$section}{$name}) {
808 my $was = $docs{'api'}{$section}{$name}->{pod};
809 $was = "" unless $was;
810 chomp $was;
811 if ($was ne "" && $was !~ m/$link_text/) {
812 die "Multiple descriptions for $name\n"
813 . "$section contained '$was'";
814 }
815 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
816 $configs{$name}{section} = $section;
817 last;
818 }
819 }
820
821 my $handled = 0; # Haven't handled this yet
822
823 if (defined $configs{$name}{'section'}) {
824 # This has been taken care of elsewhere.
825 $handled = 1;
826 }
827 else {
828 if ($has_defn && ! $has_args) {
829 $configs{$name}{args} = 1;
830 }
831
832 # Symbols of the form I_FOO are for #include files. They have
833 # special usage information
834 if ($name =~ / ^ I_ ( .* ) /x) {
835 my $file = lc $1 . '.h';
836 $configs{$name}{usage} = <<~"EOT";
837 #ifdef $name
838 #include <$file>
839 #endif
840 EOT
841 }
842
843 # Compute what section this variable should go into. This
844 # heuristic was determined by manually inspecting the current
845 # things in config.h, and should be adjusted as necessary as
846 # deficiencies are found.
847 #
848 # This is the default section for macros with a definiton but
849 # no arguments, meaning it is replaced unconditionally
850 #
851 my $sb = qr/ _ | \b /x; # segment boundary
852 my $dash_or_spaces = qr/ - | \s+ /x;
853 my $pod = $configs{$name}{pod};
854 if ($name =~ / ^ USE_ /x) {
855 $configs{$name}{'section'} = 'Site configuration';
856 }
857 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
858 {
859 $configs{$name}{'section'} = 'Time';
860 }
861 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
862 | $sb CASTFLAGS $sb
863 | QUADMATH
864 | $sb (?: IS )? NAN
865 | $sb (?: IS )? FINITE
866 /x)
867 {
868 $configs{$name}{'section'} =
869 'Floating point configuration values';
870 }
871 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
872 $configs{$name}{'section'} = 'Filesystem configuration values';
873 }
874 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
875 || $configs{$name}{pod} =~ m/ \b align /x)
876 {
877 $configs{$name}{'section'} = 'Compiler and Preprocessor information';
878 }
879 elsif ($name =~ / ^ [IU] [ \d V ]
880 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
881 {
882 $configs{$name}{'section'} = 'Integer configuration values';
883 }
884 elsif ($name =~ / $sb t $sb /x) {
885 $configs{$name}{'section'} = 'Typedef names';
886 }
887 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
888 && $configs{$name}{pod} =~ m/ \b format \b /ix)
889 {
890 $configs{$name}{'section'} = 'Formats';
891 }
892 elsif ($name =~ / BACKTRACE /x) {
893 $configs{$name}{'section'} = 'Display and Dump functions';
894 }
895 elsif ($name =~ / ALLOC $sb /x) {
896 $configs{$name}{'section'} = 'Memory Management';
897 }
898 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH
899 | $sb FILE $sb
900 | $sb DIR $sb
901 | $sb LSEEK
902 | $sb INO $sb
903 | $sb OPEN
904 | $sb CLOSE
905 | ^ DIR
906 | ^ INO $sb
907 | DIR $
908 | FILENAMES
909 /x
910 || $configs{$name}{pod} =~ m! I/O | stdio
911 | file \s+ descriptor
912 | file \s* system
913 | statfs
914 !x)
915 {
916 $configs{$name}{'section'} = 'Filesystem configuration values';
917 }
918 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
919 $configs{$name}{'section'} = 'Signals';
920 }
921 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
922 $configs{$name}{'section'} = 'Prototype information';
923 }
924 elsif ( $name =~ / ^ LOC_ /x
925 || $configs{$name}{pod} =~ /full path/i)
926 {
927 $configs{$name}{'section'} = 'Paths to system commands';
928 }
929 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
930 $configs{$name}{'section'} = 'Locales';
931 }
932 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) {
933 $configs{$name}{'section'} = 'Compiler and Preprocessor information';
934 }
935 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
936 {
937 $configs{$name}{'section'} = 'Password and Group access';
938 }
939 elsif ($name =~ / SOCKET | $sb SOCK /x) {
940 $configs{$name}{'section'} = 'Sockets configuration values';
941 }
942 elsif ( $name =~ / THREAD | MULTIPLICITY /x
943 || $configs{$name}{pod} =~ m/ \b pthread /ix)
944 {
945 $configs{$name}{'section'} = 'Concurrency';
946 }
947 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN
948 | VENDOR | ^ USE
949 /x)
950 {
951 $configs{$name}{'section'} = 'Site configuration';
952 }
953 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix
954 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
955 || $pod =~ / \b doubles \b /ix
956 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
957 {
958 $configs{$name}{'section'} =
959 'Floating point configuration values';
960 }
961 else {
962 # Above are the specific sections. The rest go into a
963 # grab-bag of general configuration values. However, we put
964 # two classes of them into lists of their names, without their
965 # descriptions, when we think that the description doesn't add
966 # any real value. One list contains the #include variables:
967 # the description is basically boiler plate for each of these.
968 # The other list contains the very many things that are of the
969 # form HAS_foo, and \bfoo\b is contained in its description,
970 # and there is no verbatim text in the pod or links to/from it
971 # (which would add value). That means that it is likely the
972 # intent of the variable can be gleaned from just its name,
973 # and unlikely the description adds signficant value, so just
974 # listing them suffices. Giving their descriptions would
975 # expand this pod significantly with little added value.
976 if ( ! $has_defn
977 && ! $configs{$name}{verbatim}
978 && ! $configs{$name}{linked})
979 {
980 if ($name =~ / ^ I_ ( .* ) /x) {
981 push @include_defs, $name;
982 next;
983 }
984 elsif ($name =~ / ^ HAS_ ( .* ) /x) {
985 my $canonical_name = $1;
986 $canonical_name =~ s/_//g;
987
988 my $canonical_pod = $configs{$name}{pod};
989 $canonical_pod =~ s/_//g;
990
991 if ($canonical_pod =~ / \b $canonical_name \b /xi) {
992 if ($name =~ / $sb R $sb /x) {
993 push @has_r_defs, $name;
994 }
995 else {
996 push @has_defs, $name;
997 }
998 next;
999 }
1000 }
1001 }
1002
1003 $configs{$name}{'section'} = 'General Configuration';
1004 }
1005
1006 my $section = $configs{$name}{'section'};
6927cd44
KW
1007 die "Internal error: '$section' not in \%valid_sections"
1008 unless grep { $_ eq $section } keys %valid_sections;
00d68f0d
KW
1009 my $flags = 'AdmnT';
1010 $flags .= 'U' unless defined $configs{$name}{usage};
1011 $docs{'api'}{$section}{$name}{flags} = $flags;
1012 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1013 $docs{'api'}{$section}{$name}{ret_type} = "";
1014 $docs{'api'}{$section}{$name}{file} = 'config.h';
1015 $docs{'api'}{$section}{$name}{usage}
1016 = $configs{$name}{usage} if defined $configs{$name}{usage};
1017 push $docs{'api'}{$section}{$name}{args}->@*, ();
1018 push $docs{'api'}{$section}{$name}{items}->@*, ();
1019 }
1020 }
1021}
1022
94bdecf9 1023sub docout ($$$) { # output the docs for one function
b1cb8a49 1024 my($fh, $element_name, $docref) = @_;
5bf02caf 1025
783469eb
KW
1026 # Trim trailing space
1027 $element_name =~ s/\s*$//;
1028
5bf02caf 1029 my $flags = $docref->{flags};
63da8140 1030 my $pod = $docref->{pod} // "";
5bf02caf 1031 my $file = $docref->{file};
783469eb 1032
183f6e59 1033 my @items = $docref->{items}->@*;
5bf02caf 1034
783469eb
KW
1035 # Make the main element the first of the items. This allows uniform
1036 # treatment below
1037 unshift @items, { name => $element_name,
1038 flags => $flags,
1039 ret_type => $docref->{ret_type},
1040 args => [ $docref->{args}->@* ],
1041 };
94bdecf9 1042
7a997878 1043 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
9639517b 1044
783469eb
KW
1045 print $fh "\n=over $description_indent\n";
1046 print $fh "\n=item C<$_->{name}>\n" for @items;
63da8140 1047
783469eb
KW
1048 # If we're printing only a link to an element, this isn't the major entry,
1049 # so no X<> here.
1050 if ($flags !~ /h/) {
1051 print $fh "X<$_->{name}>" for @items;
1052 print $fh "\n";
d4e99c76 1053 }
63da8140 1054
783469eb
KW
1055 for my $item (@items) {
1056 if ($item->{flags} =~ /D/) {
1057 print $fh <<~"EOT";
54c193ae 1058
783469eb
KW
1059 C<B<DEPRECATED!>> It is planned to remove C<$item->{name}> from a
1060 future release of Perl. Do not use it for new code; remove it from
1061 existing code.
1062 EOT
1063 }
1064 elsif ($item->{flags} =~ /x/) {
1065 print $fh <<~"EOT";
54c193ae 1066
783469eb
KW
1067 NOTE: C<$item->{name}> is B<experimental> and may change or be
1068 removed without notice.
1069 EOT
1070 }
54c193ae 1071 }
94bdecf9 1072
783469eb
KW
1073 chomp $pod; # Make sure prints pod with a single trailing \n
1074 print $fh "\n", $pod, "\n";
3cfa66c4 1075
783469eb
KW
1076 for my $item (@items) {
1077 my $item_flags = $item->{flags};
1078 my $item_name = $item->{name};
3cfa66c4 1079
783469eb
KW
1080 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1081 if $item_flags =~ /O/;
1082 # Is Perl_, but no #define foo # Perl_foo
1083 if (($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1084
1085 # Can't handle threaded varargs
1086 || ($item_flags =~ /f/ && $item_flags !~ /T/))
1087 {
1088 $item->{name} = "Perl_$item_name";
1089 print $fh <<~"EOT";
1090
1091 NOTE: C<$item_name> must be explicitly called as
1092 C<$item->{name}>
1093 EOT
1094 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1095 print $fh ".\n";
1096 }
1097 }
94bdecf9
JH
1098
1099 if ($flags =~ /U/) { # no usage
8b5ff177 1100 warn("U and s flags are incompatible") if $flags =~ /s/;
20046047 1101 # nothing
05ca4832 1102 } else {
783469eb
KW
1103
1104 print $fh "\n=over $usage_indent\n";
1105
00d68f0d
KW
1106 if (defined $docref->{usage}) { # An override of the usage section
1107 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1108 }
783469eb
KW
1109 else {
1110
8d1c4423
KW
1111 # Add the thread context formal parameter on expanded-out names
1112 for my $item (@items) {
1113 unshift $item->{args}->@*, (($item->{args}->@*)
1114 ? "pTHX_"
1115 : "pTHX")
1116 if $item->{flags} !~ /T/
1117 && $item->{name} =~ /^Perl_/;
1118 }
1119
783469eb
KW
1120 # Look through all the items in this entry. If all have the same
1121 # return type and arguments, only the main entry is displayed.
1122 # Also, find the longest return type and longest name so that if
1123 # multiple ones are shown, they can be vertically aligned nicely
783469eb 1124 my $need_individual_usage = 0;
53c9fbf2 1125 my $longest_name_length = length $items[0]->{name};
783469eb 1126 my $base_ret_type = $items[0]->{ret_type};
53c9fbf2 1127 my $longest_ret = length $base_ret_type;
783469eb 1128 my @base_args = $items[0]->{args}->@*;
53c9fbf2 1129 for (my $i = 1; $i < @items; $i++) {
783469eb 1130 no warnings 'experimental::smartmatch';
53c9fbf2 1131 my $item = $items[$i];
783469eb
KW
1132 $need_individual_usage = 1
1133 if $item->{ret_type} ne $base_ret_type
1134 || ! ($item->{args}->@* ~~ @base_args);
1135 my $ret_length = length $item->{ret_type};
1136 $longest_ret = $ret_length if $ret_length > $longest_ret;
1137 my $name_length = length $item->{name};
1138 $longest_name_length = $name_length
1139 if $name_length > $longest_name_length;
1ded1f42 1140 }
783469eb
KW
1141
1142 # If we're only showing one entry, only its length matters.
1143 $longest_name_length = length($items[0]->{name})
1144 unless $need_individual_usage;
1145 print $fh "\n";
1146
1147 my $indent = 1; # 1 is sufficient for verbatim; =over is used
1148 # for more
1149 my $ret_name_sep_length = 2; # spaces between return type and name
1150 my $name_indent = $indent + $longest_ret;
1151 $name_indent += $ret_name_sep_length if $longest_ret;
1152
1153 # 80 column terminal - 1 for pager adding a column; -7 for nroff
1154 # indent;
1155 my $max_length = 80 - 1 - 7 - $description_indent - $usage_indent;
1156
1157 for my $item (@items) {
1158 my $ret_type = $item->{ret_type};
1159 my @args = $item->{args}->@*;
1160 my $name = $item->{name};
1161 my $item_flags = $item->{flags};
1162
783469eb
KW
1163 # The return type
1164 print $fh (" " x $indent), $ret_type;
1165
1166 print $fh " " x ( $ret_name_sep_length
1167 + $longest_ret - length $ret_type);
1168 print $fh $name;
1169
1170 if ($item_flags =~ /n/) { # no args
1171 warn("$file: $element_name: n flag without m")
1172 unless $item_flags =~ /m/;
1173 warn("$file: $name: n flag but apparently has args")
1174 if @args;
1175 }
1176 else {
1177 # +1 for the '('
1178 my $arg_indent = $name_indent + $longest_name_length + 1;
1179
1180 # Align the argument lists of the items
1181 print $fh " " x ($longest_name_length - length($name));
1182 print $fh "(";
1183
1184 # Display as many of the arguments on the same line as
1185 # will fit.
1186 my $total_length = $arg_indent;
1187 my $first_line = 1;
1188 for (my $i = 0; $i < @args; $i++) {
1189 my $arg = $args[$i];
1190 my $arg_length = length($arg);
1191
1192 # All but the first arg are preceded by a blank
1193 my $use_blank = $i > 0;
1194
1195 # +1 here and below because either the argument has a
1196 # trailing comma or trailing ')'
1197 $total_length += $arg_length + $use_blank + 1;
1198
1199 # We want none of the arguments to be positioned so
1200 # they extend too far to the right. Ideally, they
1201 # should all start in the same column as the arguments
1202 # on the first line of the function display do. But, if
1203 # necessary, outdent them so that they all start in
1204 # another column, with the longest ending at the right
1205 # margin, like so:
1206 # void function_name(pTHX_ short1,
1207 # short2,
1208 # very_long_argument,
1209 # short3)
1210 if ($total_length > $max_length) {
1211
1212 # If this is the first continuation line,
1213 # calculate the longest argument; this will be the
1214 # one we may have to outdent for.
1215 if ($first_line) {
1216 $first_line = 0;
1217
1218 # We will need at least as much as the current
1219 # argument
1220 my $longest_arg_length = $arg_length
1221 + $use_blank + 1;
1222
1223 # Look through the rest of the args to see if
1224 # any are longer than this one.
1225 for (my $j = $i + 1; $j < @args; $j++) {
1226
1227 # Include the trailing ',' or ')' in the
1228 # length. No need to concern ourselves
1229 # with a leading blank, as the argument
1230 # would be positioned first on the next
1231 # line
1232 my $peek_arg_length = length ($args[$j])
1233 + 1;
1234 $longest_arg_length = $peek_arg_length
1235 if $peek_arg_length > $longest_arg_length;
1236 }
1237
1238 # Calculate the new indent if necessary.
1239 $arg_indent = $max_length - $longest_arg_length
1240 if $arg_indent + $longest_arg_length
1241 > $max_length;
1242 }
1243
1244 print $fh "\n", (" " x $arg_indent);
1245 $total_length = $arg_indent + $arg_length + 1;
1246 $use_blank = 0;
1247 }
1248
1249 # Display this argument
1250 print $fh " " if $use_blank;
1251 print $fh $arg;
1252 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1253
1254 } # End of loop through args
1255
1256 print $fh ")";
1ded1f42 1257 }
783469eb
KW
1258
1259 print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;"
1260 print $fh "\n";
1261
1262 # Only the first entry is normally displayed
1263 last unless $need_individual_usage;
1ded1f42 1264 }
1ded1f42 1265 }
783469eb
KW
1266
1267 print $fh "\n=back\n";
94bdecf9 1268 }
783469eb
KW
1269
1270 print $fh "\n=back\n";
63da8140 1271 print $fh "\n=for hackers\nFound in file $file\n";
94bdecf9
JH
1272}
1273
f83c6033
KW
1274sub sort_helper {
1275 # Do a case-insensitive dictionary sort, with only alphabetics
1276 # significant, falling back to using everything for determinancy
1354d57e 1277 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
f83c6033
KW
1278 || uc($a) cmp uc($b)
1279 || $a cmp $b;
1280}
1281
7b73ff98 1282sub output {
5a0155e6 1283 my ($podname, $header, $dochash, $missing, $footer) = @_;
6a4c4cd4
DM
1284 #
1285 # strip leading '|' from each line which had been used to hide
1286 # pod from pod checkers.
1287 s/^\|//gm for $header, $footer;
1288
7882b24a 1289 my $fh = open_new("pod/$podname.pod", undef,
20046047 1290 {by => "$0 extracting documentation",
f1f44974 1291 from => 'the C source files'}, 1);
e0492643 1292
63da8140 1293 print $fh $header, "\n";
e0492643 1294
5781dcac 1295 for my $section_name (sort sort_helper keys %valid_sections) {
7a997878 1296 my $section_info = $dochash->{$section_name};
36ea2d2f 1297
5781dcac
KW
1298 # We allow empty sections in perlintern.
1299 if (! $section_info && $podname eq 'perlapi') {
36ea2d2f 1300 warn "Empty section '$section_name'; skipped";
36ea2d2f
KW
1301 next;
1302 }
1303
63da8140
KW
1304 print $fh "\n=head1 $section_name\n";
1305
5781dcac
KW
1306 if ($podname eq 'perlapi') {
1307 print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1308 if defined $valid_sections{$section_name}{header};
151c3fe5 1309
5781dcac
KW
1310 # Output any heading-level documentation and delete so won't get in
1311 # the way later
1312 if (exists $section_info->{""}) {
1313 print $fh "\n", $section_info->{""}, "\n";
1314 delete $section_info->{""};
1315 }
151c3fe5 1316 }
151c3fe5 1317
5781dcac
KW
1318
1319 if ($section_info) {
1320 for my $function_name (sort sort_helper keys %$section_info) {
1321 docout($fh, $function_name, $section_info->{$function_name});
1322 }
1323 }
1324 else {
1325 print $fh "\nThere are only public API items currently in $section_name\n";
20046047 1326 }
00b0fb83
KW
1327
1328 print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
5781dcac
KW
1329 if $podname eq 'perlapi'
1330 && defined $valid_sections{$section_name}{footer};
7b73ff98
NC
1331 }
1332
5a0155e6 1333 if (@$missing) {
63da8140
KW
1334 print $fh "\n=head1 Undocumented functions\n";
1335 print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
1336
474d0ac8 1337The following functions have been flagged as part of the public API,
72d33970 1338but are currently undocumented. Use them at your own risk, as the
ba4591a5
KW
1339interfaces are subject to change. Functions that are not listed in this
1340document are not intended for public use, and should NOT be used under any
1341circumstances.
1342
5a4fed09
KW
1343If you feel you need to use one of these functions, first send email to
1344L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. It may be
1345that there is a good reason for the function not being documented, and it
1346should be removed from this list; or it may just be that no one has gotten
1347around to documenting it. In the latter case, you will be asked to submit a
1348patch to document the function. Once your patch is accepted, it will indicate
1349that the interface is stable (unless it is explicitly marked otherwise) and
1350usable by you.
63da8140 1351
cf5f2f8f 1352_EOB_
2616800a
FC
1353The following functions are currently undocumented. If you use one of
1354them, you may wish to consider creating and submitting documentation for
1355it.
63da8140 1356
2616800a 1357_EOB_
783469eb 1358 print $fh "\n=over $description_indent\n";
6a4c4cd4 1359
63da8140
KW
1360 for my $missing (sort sort_helper @$missing) {
1361 print $fh "\n=item C<$missing>\nX<$missing>\n";
1362 }
1363 print $fh "\n=back\n";
5a0155e6 1364 }
63da8140
KW
1365
1366 print $fh "\n$footer\n=cut\n";
5a0155e6 1367
7882b24a 1368 read_only_bottom_close_and_rename($fh);
cd093254
MM
1369}
1370
e8e591c9
NC
1371foreach (@{(setup_embed())[0]}) {
1372 next if @$_ < 2;
b1cb8a49 1373 my ($flags, $ret_type, $func, @args) = @$_;
e8e591c9 1374 s/\b(?:NN|NULLOK)\b\s+//g for @args;
bc350081 1375
5ce57792 1376 $funcflags{$func} = {
20046047 1377 flags => $flags,
b1cb8a49 1378 ret_type => $ret_type,
20046047
KE
1379 args => \@args,
1380 };
5ce57792
NC
1381}
1382
5ce57792
NC
1383# glob() picks up docs from extra .c or .h files that may be in unclean
1384# development trees.
741c0772
NC
1385open my $fh, '<', 'MANIFEST'
1386 or die "Can't open MANIFEST: $!";
1387while (my $line = <$fh>) {
b87d9527 1388 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
5ce57792 1389
7cb0b6c4
KW
1390 # Don't pick up pods from these. (We may pick up generated stuff from
1391 # /lib though)
1392 next if $file =~ m! ^ ( cpan | dist | ext ) / !x;
1393
1ae6ead9 1394 open F, '<', $file or die "Cannot open $file for docs: $!\n";
5ce57792
NC
1395 autodoc(\*F,$file);
1396 close F or die "Error closing $file: $!\n";
1397}
741c0772 1398close $fh or die "Error whilst reading MANIFEST: $!";
5ce57792 1399
00d68f0d
KW
1400parse_config_h();
1401
5ce57792
NC
1402for (sort keys %funcflags) {
1403 next unless $funcflags{$_}{flags} =~ /d/;
6523e108 1404 next if $funcflags{$_}{flags} =~ /h/;
573cf2be 1405 warn "no docs for $_\n";
bc350081 1406}
94bdecf9 1407
5ce57792 1408foreach (sort keys %missing) {
5ce57792 1409 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
94bdecf9
JH
1410}
1411
ff5af78d
KW
1412# List of funcs in the public API that aren't also marked as core-only,
1413# experimental nor deprecated.
b87d9527
KW
1414my @missing_api = grep $funcflags{$_}{flags} =~ /A/
1415 && $funcflags{$_}{flags} !~ /[xD]/
1416 && !$docs{api}{$_}, keys %funcflags;
00d68f0d 1417push @missing_api, keys %missing_macros;
a026e430
KW
1418
1419my $other_places = join ", ", map { "L<$_>" } sort sort_helper qw( perlclib perlxs),
1420 keys %described_elsewhere;
1421
00d68f0d
KW
1422# The S< > makes things less densely packed, hence more readable
1423my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @has_defs;
1424my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @has_r_defs;
1425$valid_sections{'General Configuration'}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1426$valid_sections{'General Configuration'}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1427
1428my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @include_defs;
1429$valid_sections{'General Configuration'}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1430
7260d4ba
KW
1431my $section_list = join "\n\n", map { "=item L</$_>" } sort sort_helper keys %valid_sections;
1432
b87d9527 1433output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
6a4c4cd4
DM
1434|=encoding UTF-8
1435|
1436|=head1 NAME
1437|
1438|perlapi - autogenerated documentation for the perl public API
1439|
1440|=head1 DESCRIPTION
1441|X<Perl API> X<API> X<api>
1442|
b87d9527
KW
1443|This file contains most of the documentation of the perl public API, as
1444|generated by F<embed.pl>. Specifically, it is a listing of functions,
a026e430
KW
1445|macros, flags, and variables that may be used by extension writers. Besides
1446|L<perlintern> and F<config.h>, some items are listed here as being actually
1447|documented in another pod.
b87d9527
KW
1448|
1449|L<At the end|/Undocumented functions> is a list of functions which have yet
1450|to be documented. Patches welcome! The interfaces of these are subject to
1451|change without notice.
1452|
6927cd44
KW
1453|Some of the functions documented here are consolidated so that a single entry
1454|serves for multiple functions which all do basically the same thing, but have
1455|some slight differences. For example, one form might process magic, while
1456|another doesn't. The name of each variation is listed at the top of the
1457|single entry. But if all have the same signature (arguments and return type)
1458|except for their names, only the usage for the base form is shown. If any
1459|one of the forms has a different signature (such as returning C<const> or
1460|not) every function's signature is explicitly displayed.
7260d4ba 1461|
a026e430
KW
1462|Anything not listed here or in the other mentioned pods is not part of the
1463|public API, and should not be used by extension writers at all. For these
1464|reasons, blindly using functions listed in F<proto.h> is to be avoided when
1465|writing extensions.
6a4c4cd4
DM
1466|
1467|In Perl, unlike C, a string of characters may generally contain embedded
1468|C<NUL> characters. Sometimes in the documentation a Perl string is referred
1469|to as a "buffer" to distinguish it from a C string, but sometimes they are
1470|both just referred to as strings.
1471|
1472|Note that all Perl API global variables must be referenced with the C<PL_>
1473|prefix. Again, those not listed here are not to be used by extension writers,
1474|and can be changed or removed without notice; same with macros.
1475|Some macros are provided for compatibility with the older,
1476|unadorned names, but this support may be disabled in a future release.
1477|
1478|Perl was originally written to handle US-ASCII only (that is characters
1479|whose ordinal numbers are in the range 0 - 127).
1480|And documentation and comments may still use the term ASCII, when
1481|sometimes in fact the entire range from 0 - 255 is meant.
1482|
1483|The non-ASCII characters below 256 can have various meanings, depending on
1484|various things. (See, most notably, L<perllocale>.) But usually the whole
1485|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
1486|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
1487|"Latin1" as referring just to the characters in the range 128 through 255, or
2de859ce 1488|sometimes from 160 through 255.
6a4c4cd4
DM
1489|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1490|
1491|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1492|L<perlebcdic>). Most of the documentation (and even comments in the code)
1493|ignore the EBCDIC possibility.
1494|For almost all purposes the differences are transparent.
1495|As an example, under EBCDIC,
1496|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1497|whenever this documentation refers to C<utf8>
1498|(and variants of that name, including in function names),
1499|it also (essentially transparently) means C<UTF-EBCDIC>.
1500|But the ordinals of characters differ between ASCII, EBCDIC, and
1501|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1502|number of bytes than in UTF-8.
1503|
6927cd44
KW
1504|The organization of this document is tentative and subject to change.
1505|Suggestions and patches welcome
1506|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1507|
1508|The sections in this document currently are
1509|
1510|=over
1511
1512|$section_list
1513|
1514|=back
1515|
6a4c4cd4 1516|The listing below is alphabetical, case insensitive.
94bdecf9 1517_EOB_
6a4c4cd4
DM
1518|=head1 AUTHORS
1519|
1520|Until May 1997, this document was maintained by Jeff Okamoto
7b1f0a98 1521|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
6a4c4cd4
DM
1522|
1523|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1524|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1525|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1526|Stephen McCamant, and Gurusamy Sarathy.
1527|
7b1f0a98 1528|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
6a4c4cd4
DM
1529|
1530|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1531|
1532|=head1 SEE ALSO
1533|
a026e430 1534|F<config.h>, L<perlintern>, $other_places
94bdecf9
JH
1535_EOE_
1536
79fc8511
FC
1537# List of non-static internal functions
1538my @missing_guts =
9f589e47 1539 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
5a0155e6 1540
7b1f0a98 1541output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
6a4c4cd4
DM
1542|=head1 NAME
1543|
1544|perlintern - autogenerated documentation of purely B<internal>
20046047 1545|Perl functions
6a4c4cd4
DM
1546|
1547|=head1 DESCRIPTION
1548|X<internal Perl functions> X<interpreter functions>
1549|
1550|This file is the autogenerated documentation of functions in the
1551|Perl interpreter that are documented using Perl's internal documentation
1552|format but are not marked as part of the Perl API. In other words,
1553|B<they are not for use in extensions>!
5781dcac
KW
1554
1555|It has the same sections as L<perlapi>, though some may be empty.
6a4c4cd4 1556|
7b1f0a98 1557_EOB_
6a4c4cd4
DM
1558|
1559|=head1 AUTHORS
1560|
1561|The autodocumentation system was originally added to the Perl core by
1562|Benjamin Stuhl. Documentation is by whoever was kind enough to
1563|document their functions.
1564|
1565|=head1 SEE ALSO
1566|
a026e430 1567|F<config.h>, L<perlapi>, $other_places
7b1f0a98 1568_EOE_