This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regen/regcharclass.pl: Change member to method
[perl5.git] / autodoc.pl
CommitLineData
94bdecf9 1#!/usr/bin/perl -w
00d68f0d
KW
2
3use Text::Tabs;
a9a7c81e 4#
6294c161
DM
5# Unconditionally regenerate:
6#
7# pod/perlintern.pod
8# pod/perlapi.pod
9#
10# from information stored in
11#
12# embed.fnc
9b983db7 13# plus all the core .c, .h, and .pod files listed in MANIFEST
6294c161
DM
14#
15# Has an optional arg, which is the directory to chdir to before reading
9b983db7 16# MANIFEST and the files
6294c161 17#
52a9d53b 18# This script is invoked as part of 'make all'
151c3fe5 19#
9b983db7
KW
20# The generated pod consists of sections of related elements, functions,
21# macros, and variables. The keys of %valid_sections give the current legal
22# ones. Just add a new key to add a section.
23#
24# Throughout the files read by this script are lines like
25#
26# =for apidoc_section Section Name
27#
28# "Section Name" (after having been stripped of leading space) must be one of
29# the legal section names, or an error is thrown. All API elements defined
30# between this line and the next 'apidoc_section' line will go into the
31# section "Section Name", sorted by dictionary order within it. perlintern
32# and perlapi are parallel documents, each potentially with a section "Section
33# Name". Each element is marked as to which document it goes into. If there
34# are none for a particular section in perlapi, that section is
35# omitted.
36#
37# Also, in .[ch] files, there may be
38#
39# =head1 Section Name
40#
41# lines in comments. These are also used by this program to switch to section
42# "Section Name". The difference is that if there are any lines after the
43# =head1, inside the same comment, and before any =for apidoc-ish lines, they
44# are used as a heading for section "Section Name" (in both perlintern and
45# perlapi). This includes any =head[2-5]. If more than one '=head1 Section
46# Name' line has content, they appear in the generated pod in an undefined
47# order.
48#
49# The next =head1, =for apidoc_section, or file end terminates what goes into
50# the current section
51#
52# The %valid_sections hash below also can have header content, which will
53# appear before any =head1 content. The hash can also have footer content
54# content, which will appear at the end of the section, after all the
55# elements.
56#
57# The lines that define the actual functions, etc are documented in embed.fnc,
58# because they have flags which must be kept in sync with that file.
1fcde0e9 59
56a0c332 60use strict;
04adaf08 61use warnings;
a64c954a 62
7882b24a
NC
63if (@ARGV) {
64 my $workdir = shift;
65 chdir $workdir
66 or die "Couldn't chdir to '$workdir': $!";
67}
3d7c117d
MB
68require './regen/regen_lib.pl';
69require './regen/embed_lib.pl';
7882b24a 70
a026e430 71my %described_elsewhere;
b87d9527 72
94bdecf9 73#
346f75ff 74# See database of global and static function prototypes in embed.fnc
94bdecf9
JH
75# This is used to generate prototype headers under various configurations,
76# export symbols lists for different platforms, and macros to provide an
77# implicit interpreter context argument.
78#
79
6a235718 80my %docs;
df6bd76f 81my %seen;
5ce57792 82my %funcflags;
5ce57792 83my %missing;
00d68f0d
KW
84my %missing_macros;
85
86my $link_text = "Described in";
87
783469eb
KW
88my $description_indent = 4;
89my $usage_indent = 3; # + initial blank yields 4 total
90
00d68f0d 91my %valid_sections = (
6927cd44
KW
92 'AV Handling' => {},
93 'Callback Functions' => {},
94 'Casting' => {},
95 'Character case changing' => {},
96 'Character classification' => {},
97 'Compile-time scope hooks' => {},
98 'Compiler and Preprocessor information' => {},
99 'Compiler directives' => {},
100 'Concurrency' => {},
101 'COP Hint Hashes' => {},
102 'CV Handling' => {},
103 'Custom Operators' => {},
104 'Display and Dump functions' => {},
105 'Embedding and Interpreter Cloning' => {},
106 'Errno' => {},
107 'Exception Handling (simple) Macros' => {},
108 'Filesystem configuration values' => {
109 header => <<~'EOT',
110 Also see L</List of capability HAS_foo symbols>.
111 EOT
112 },
00d68f0d
KW
113 'Floating point configuration values' => {
114 header => <<~'EOT',
6927cd44
KW
115 Also L</List of capability HAS_foo symbols> lists capabilities
116 that arent in this section. For example C<HAS_ASINH>, for the
117 hyperbolic sine function.
118 EOT
119 },
120 'Formats' => {
121 header => <<~'EOT',
122 These are used for formatting the corresponding type For example,
123 instead of saying
124
125 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
126
127 use
128
129 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
130
131 This keeps you from having to know if, say an IV, needs to be
132 printed as C<%d>, C<%ld>, or something else.
00d68f0d
KW
133 EOT
134 },
135 'General Configuration' => {
136 header => <<~'EOT',
137 This section contains configuration information not otherwise
138 found in the more specialized sections of this document. At the
139 end is a list of C<#defines> whose name should be enough to tell
140 you what they do, and a list of #defines which tell you if you
141 need to C<#include> files to get the corresponding functionality.
142 EOT
143
144 footer => <<~'EOT',
145
146 =head2 List of capability C<HAS_I<foo>> symbols
147
6927cd44
KW
148 This is a list of those symbols that dont appear elsewhere in ths
149 document that indicate if the current platform has a certain
150 capability. Their names all begin with C<HAS_>. Only those
151 symbols whose capability is directly derived from the name are
152 listed here. All others have their meaning expanded out elsewhere
153 in this document. This (relatively) compact list is because we
154 think that the expansion would add little or no value and take up
155 a lot of space (because there are so many). If you think certain
156 ones should be expanded, send email to
157 L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
00d68f0d
KW
158
159 Each symbol here will be C<#define>d if and only if the platform
160 has the capability. If you need more detail, see the
6927cd44
KW
161 corresponding entry in F<config.h>. For convenience, the list is
162 split so that the ones that indicate there is a reentrant version
163 of a capability are listed separately
00d68f0d
KW
164
165 __HAS_LIST__
166
6927cd44
KW
167 And, the reentrant capabilities:
168
169 __HAS_R_LIST__
170
00d68f0d
KW
171 Example usage:
172
173 =over
174
175 #ifdef HAS_STRNLEN
176 use strnlen()
177 #else
178 use an alternative implementation
179 #endif
180
181 =back
182
183 =head2 List of C<#include> needed symbols
184
185 This list contains symbols that indicate if certain C<#include>
186 files are present on the platform. If your code accesses the
187 functionality that one of these is for, you will need to
188 C<#include> it if the symbol on this list is C<#define>d. For
189 more detail, see the corresponding entry in F<config.h>.
190
191 __INCLUDE_LIST__
192
193 Example usage:
194
195 =over
196
197 #ifdef I_WCHAR
198 #include <wchar.h>
199 #endif
200
201 =back
202 EOT
203 },
6927cd44
KW
204 'Global Variables' => {},
205 'GV Handling' => {},
206 'Hook manipulation' => {},
207 'HV Handling' => {},
208 'Input/Output' => {},
209 'Integer configuration values' => {},
210 'Lexer interface' => {},
211 'Locales' => {},
212 'Magic' => {},
213 'Memory Management' => {},
214 'MRO' => {},
215 'Multicall Functions' => {},
216 'Numeric Functions' => {},
217 'Optree construction' => {},
218 'Optree Manipulation Functions' => {},
219 'Pack and Unpack' => {},
220 'Pad Data Structures' => {},
221 'Password and Group access' => {},
222 'Paths to system commands' => {},
223 'Per-Interpreter Variables' => {},
224 'Prototype information' => {},
225 'REGEXP Functions' => {},
226 'Signals' => {},
227 'Site configuration' => {
228 header => <<~'EOT',
229 These variables give details as to where various libraries,
230 installation destinations, I<etc.>, go, as well as what various
231 installation options were selected
232 EOT
233 },
234 'Sockets configuration values' => {},
235 'Source Filters' => {},
236 'Stack Manipulation Macros' => {},
237 'String Handling' => {
238 header => <<~'EOT',
239 See also C<L</Unicode Support>>.
240 EOT
241 },
242 'SV Flags' => {},
243 'SV Handling' => {},
244 'Time' => {},
245 'Typedef names' => {},
246 'Unicode Support' => {},
247 'Utility Functions' => {},
248 'Versioning' => {},
249 'Warning and Dieing' => {},
250 'XS' => {},
00d68f0d 251);
94bdecf9 252
9e019b59
KW
253# Somewhat loose match for an apidoc line so we can catch minor typos.
254# Parentheses are used to capture portions so that below we verify
255# that things are the actual correct syntax.
256my $apidoc_re = qr/ ^ (\s*) # $1
257 (=?) # $2
258 (\s*) # $3
259 for (\s*) # $4
260 apidoc (_item)? # $5
261 (\s*) # $6
262 (.*?) # $7
263 \s* \n /x;
183f6e59
KW
264# Only certain flags, dealing with display, are acceptable for apidoc_item
265my $display_flags = "fFnDopsT";
9e019b59
KW
266
267sub check_api_doc_line ($$) {
268 my ($file, $in) = @_;
269
270 return unless $in =~ $apidoc_re;
271
272 my $is_item = defined $5;
273 my $is_in_proper_form = length $1 == 0
274 && length $2 > 0
275 && length $3 == 0
276 && length $4 > 0
277 && length $6 > 0
278 && length $7 > 0;
279 my $proto_in_file = $7;
280 my $proto = $proto_in_file;
281 $proto = "||$proto" if $proto !~ /\|/;
282 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
283
284 $name && $is_in_proper_form or die <<EOS;
285Bad apidoc at $file line $.:
286 $in
287Expected:
288 =for apidoc flags|returntype|name|arg|arg|...
289 =for apidoc flags|returntype|name
290 =for apidoc name
291(or 'apidoc_item')
292EOS
293
783469eb
KW
294 die "Only [$display_flags] allowed in apidoc_item"
295 if $is_item && $flags =~ /[^$display_flags]/;
296
9e019b59
KW
297 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
298}
94bdecf9 299
4be3b9ca
KW
300sub embed_override($) {
301 my ($element_name) = shift;
302
303 # If the entry is also in embed.fnc, it should be defined
304 # completely there, but not here
305 my $embed_docref = delete $funcflags{$element_name};
306
307 return unless $embed_docref and %$embed_docref;
308
309 my $flags = $embed_docref->{'flags'};
310 warn "embed.fnc entry '$element_name' missing 'd' flag"
311 unless $flags =~ /d/;
312
313 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
314}
315
94bdecf9
JH
316sub autodoc ($$) { # parse a file and extract documentation info
317 my($fh,$file) = @_;
63da8140 318 my($in, $line_num, $header, $section);
f554dfc5 319
3cfa66c4
KW
320 my $file_is_C = $file =~ / \. [ch] $ /x;
321
f554dfc5 322 # Count lines easier
b1cb8a49 323 my $get_next_line = sub { $line_num++; return <$fh> };
f554dfc5 324
63da8140
KW
325 # Read the file
326 while ($in = $get_next_line->()) {
327 last unless defined $in;
3cfa66c4 328
63da8140
KW
329 next unless ( $in =~ / ^ =for [ ]+ apidoc /x
330 # =head1 lines only have effect in C files
331 || ($file_is_C && $in =~ /^=head1/));
332
333 # Here, the line introduces a portion of the input that we care about.
334 # Either it is for an API element, or heading text which we expect
335 # will be used for elements later in the file
336
337 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
183f6e59 338 my (@args, @items);
63da8140
KW
339
340 # If the line starts a new section ...
341 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
183f6e59 342
b1cb8a49 343 $section = $1;
6927cd44
KW
344 die "Unknown section name '$section' in $file near line $.\n"
345 unless defined $valid_sections{$section};
346
3cfa66c4 347 }
63da8140
KW
348 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a
349 # plain apidoc line
183f6e59
KW
350 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
351 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
63da8140
KW
352 }
353 else { # Plain apidoc
354
355 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
356 = check_api_doc_line($file, $in);
4be3b9ca
KW
357 # Override this line with any info in embed.fnc
358 my ($embed_flags, $embed_ret_type, @embed_args)
359 = embed_override($element_name);
360 if ($embed_ret_type) {
63da8140 361 warn "embed.fnc entry overrides redundant information in"
4be3b9ca
KW
362 . " '$proto_in_file' in $file"
363 if $flags || $ret_type || @args;
364 $flags = $embed_flags;
365 $ret_type = $embed_ret_type;
366 @args = @embed_args;
367 }
9e728159
KW
368 elsif ($flags !~ /[my]/) { # Not in embed.fnc, is missing if not
369 # a macro or typedef
63da8140
KW
370 $missing{$element_name} = $file;
371 }
3cfa66c4 372
63da8140 373 die "flag $1 is not legal (for function $element_name (from $file))"
9e728159 374 if $flags =~ / ( [^AabCDdEeFfhiMmNnTOoPpRrSsUuWXxy] ) /x;
151c3fe5 375
9e728159
KW
376 die "'u' flag must also have 'm' or 'y' flags' for $element_name"
377 if $flags =~ /u/ && $flags !~ /[my]/;
63da8140
KW
378 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
379 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
f554dfc5 380
63da8140
KW
381 if (exists $seen{$element_name} && $flags !~ /h/) {
382 die ("'$element_name' in $file was already documented in $seen{$element_name}");
383 }
384 else {
385 $seen{$element_name} = $file;
151c3fe5 386 }
94bdecf9 387 }
df6bd76f 388
63da8140
KW
389 # Here we have processed the initial line in the heading text or API
390 # element, and have saved the important information from it into the
391 # corresponding variables. Now accumulate the text that applies to it
392 # up to a terminating line, which is one of:
393 # 1) =cut
394 # 2) =head (in a C file only =head1)
395 # 3) an end comment line in a C file: m:^\s*\*/:
183f6e59 396 # 4) =for apidoc... (except apidoc_item lines)
63da8140
KW
397 $text = "";
398 my $head_ender_num = ($file_is_C) ? 1 : "";
399 while (defined($in = $get_next_line->())) {
400
401 last if $in =~ /^=cut/x;
402 last if $in =~ /^=head$head_ender_num/;
403
404 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
405
406 # End of comment line in C files is a fall-back terminator,
407 # but warn only if there actually is some accumulated text
408 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
409 last;
df6bd76f 410 }
df6bd76f 411
63da8140
KW
412 if ($in !~ / ^ =for [ ]+ apidoc /x) {
413 $text .= $in;
414 next;
415 }
416
183f6e59 417 # Here, the line is an apidoc line. All but apidoc_item terminate
63da8140 418 # the text being accumulated.
183f6e59
KW
419 last if $in =~ / ^ =for [ ]+ apidoc_section /x;
420
421 my ($item_name, $item_flags, $item_ret_type, $is_item,
4be3b9ca 422 $item_proto, @item_args) = check_api_doc_line($file, $in);
183f6e59
KW
423 last unless $is_item;
424
425 # Here, is an apidoc_item_line; They can only come within apidoc
426 # paragraphs.
4be3b9ca
KW
427 die "Unexpected api_doc_item line '$item_proto'"
428 unless $element_name;
183f6e59
KW
429
430 # We accept blank lines between these, but nothing else;
431 die "apidoc_item lines must immediately follow apidoc lines"
432 if $text =~ /\S/;
4be3b9ca
KW
433 # Override this line with any info in embed.fnc
434 my ($embed_flags, $embed_ret_type, @embed_args)
435 = embed_override($item_name);
436 if ($embed_ret_type) {
437 warn "embed.fnc entry overrides redundant information in"
438 . " '$item_proto' in $file"
439 if $item_flags || $item_ret_type || @item_args;
440
441 $item_flags = $embed_flags;
442 $item_ret_type = $embed_ret_type;
443 @item_args = @embed_args;
444 }
183f6e59
KW
445
446 # Use the base entry flags if none for this item; otherwise add in
447 # any non-display base entry flags.
448 if ($item_flags) {
449 $item_flags .= $flags =~ s/[$display_flags]//rg;
450 }
451 else {
452 $item_flags = $flags;
453 }
454 $item_ret_type = $ret_type unless $item_ret_type;
455 @item_args = @args unless @item_args;
456 push @items, { name => $item_name,
457 ret_type => $item_ret_type,
458 flags => $item_flags,
459 args => [ @item_args ],
460 };
461
462 # This line shows that this element is documented.
463 delete $funcflags{$item_name};
a9a7c81e 464 }
3cfa66c4 465
63da8140
KW
466 # Here, are done accumulating the text for this item. Trim it
467 $text =~ s/ ^ \s* //x;
468 $text =~ s/ \s* $ //x;
469 $text .= "\n" if $text ne "";
6523e108 470
63da8140
KW
471 # And treat all-spaces as nothing at all
472 undef $text unless $text =~ /\S/;
df6bd76f 473
63da8140 474 if ($element_name) {
df6bd76f 475
63da8140
KW
476 # Here, we have accumulated into $text, the pod for $element_name
477 my $where = $flags =~ /A/ ? 'api' : 'guts';
5ce57792 478
63da8140
KW
479 $section = "Functions in file $file" unless defined $section;
480 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
481 unless defined $section;
482 if (exists $docs{$where}{$section}{$element_name}) {
483 warn "$0: duplicate API entry for '$element_name' in"
484 . " $where/$section\n";
485 next;
20046047 486 }
5ce57792 487
63da8140
KW
488 # Override the text with just a link if the flags call for that
489 my $is_link_only = ($flags =~ /h/);
490 if ($is_link_only) {
491 if ($file_is_C) {
183f6e59 492 die "Can't currently handle link with items to it" if @items;
63da8140
KW
493 redo; # Don't put anything if C source
494 }
495
496 # Here, is an 'h' flag in pod. We add a reference to the pod (and
497 # nothing else) to perlapi/intern. (It would be better to add a
498 # reference to the correct =item,=header, but something that makes
499 # it harder is that it that might be a duplicate, like '=item *';
500 # so that is a future enhancement XXX. Another complication is
501 # there might be more than one deserving candidates.)
502 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s)
503 $podname =~ s/\.pod//;
504 $text = "Described in L<$podname>.\n";
505
506 # Don't output a usage example for linked to documentation if
507 # it is trivial (has no arguments) and we aren't to add a
508 # semicolon
509 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
510
511 # Keep track of all the pod files that we refer to.
512 push $described_elsewhere{$podname}->@*, $podname;
513 }
5ce57792 514
5bf02caf
KW
515 $docs{$where}{$section}{$element_name}{flags} = $flags;
516 $docs{$where}{$section}{$element_name}{pod} = $text;
517 $docs{$where}{$section}{$element_name}{file} = $file;
518 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
519 push $docs{$where}{$section}{$element_name}{args}->@*, @args;
183f6e59 520 push $docs{$where}{$section}{$element_name}{items}->@*, @items;
a9a7c81e 521 }
63da8140
KW
522 elsif ($text) {
523 $valid_sections{$section}{header} = "" unless
524 defined $valid_sections{$section}{header};
525 $valid_sections{$section}{header} .= "\n$text";
a9a7c81e 526 }
63da8140
KW
527
528 # We already have the first line of what's to come in $in
529 redo;
530
531 } # End of loop through input
94bdecf9
JH
532}
533
00d68f0d
KW
534my %configs;
535my @has_defs;
536my @has_r_defs; # Reentrant symbols
537my @include_defs;
538sub parse_config_h {
539 use re '/aa'; # Everthing is ASCII in this file
540
541 # Process config.h
d3871820
KW
542 my $config_h = 'config.h';
543 $config_h = 'win32/config.h' unless -e $config_h;
544 die "Can't find $config_h" unless -e $config_h;
545 open my $fh, '<', $config_h or die "Can't open $config_h: $!";
00d68f0d
KW
546 while (<$fh>) {
547
548 # Look for lines like /* FOO_BAR:
549 # By convention all config.h descriptions begin like that
550 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
551 my $name = $1;
552
553 # Here we are starting the description for $name in config.h. We
554 # accumulate the entire description for it into @description.
555 # Flowing text from one input line to another is appended into the
556 # same array element to make a single flowing line element, but
557 # verbatim lines are kept as separate elements in @description.
558 # This will facilitate later doing pattern matching without regard
559 # to line boundaries on non-verbatim text.
560
561 die "Multiple config.h entries for '$name'"
562 if defined $configs{$name}{description};
563
564 # Get first line of description
565 $_ = <$fh>;
566
567 # Each line in the description begins with blanks followed by '/*'
568 # and some spaces.
569 die "Unexpected config.h initial line for $name: '$_'"
570 unless s/ ^ ( \s* \* \s* ) //x;
571 my $initial_text = $1;
572
573 # Initialize the description with this first line (after having
574 # stripped the prefix text)
575 my @description = $_;
576
577 # The first line is used as a template for how much indentation
578 # each normal succeeding line has. Lines indented further
579 # will be considered as intended to be verbatim. But, empty lines
580 # likely won't have trailing blanks, so just strip the whole thing
581 # for them.
582 my $strip_initial_qr = qr! \s* \* \s* $
583 | \Q$initial_text\E
584 !x;
585 $configs{$name}{verbatim} = 0;
586
587 # Read in the remainder of the description
588 while (<$fh>) {
589 last if s| ^ \s* \* / ||x; # A '*/' ends it
590
591 die "Unexpected config.h description line for $name: '$_'"
592 unless s/$strip_initial_qr//;
593
594 # Fix up the few flawed lines in config.h wherein a new
595 # sentence begins with a tab (and maybe a space after that).
596 # Although none of them currently do, let it recognize
597 # something like
598 #
599 # "... text"). The next sentence ...
600 #
601 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg;
602
603 # If this line has extra indentation or looks to have columns,
604 # it should be treated as verbatim. Columns are indicated by
605 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
606 # not preceded by punctuation.
607 if ($_ !~ m/ ^ \s
608 | \S (?: \t
609 | \s{3}
610 | (*nlb:[[:punct:]]) \s{2}
611 )
612 /x)
613 {
614 # But here, is not a verbatim line. Add an empty line if
615 # this is the first non-verbatim after a run of verbatims
616 if ($description[-1] =~ /^\s/) {
617 push @description, "\n", $_;
618 }
619 else { # Otherwise, append this flowing line to the
620 # current flowing line
621 $description[-1] .= $_;
622 }
623 }
624 else {
625 $configs{$name}{verbatim} = 1;
626
627 # The first verbatim line in a run of them is separated by an
628 # empty line from the flowing lines above it
629 push @description, "\n" if $description[-1] =~ /^\S/;
630
631 $_ = Text::Tabs::expand($_);
632
633 # Only a single space so less likely to wrap
634 s/ ^ \s* / /x;
635
636 push @description, $_;
637 }
638 }
639
640 push $configs{$name}{description}->@*, @description
641
642 } # Not a description; see if it is a macro definition.
643 elsif (m! ^
644 (?: / \* )? # Optional commented-out
645 # indication
646 \# \s* define \s+ ( \w+ ) # $1 is the name
647 ( \s* ) # $2 indicates if args or not
648 ( .*? ) # $3 is any definition
649 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/
650 $
651 !x)
652 {
653 my $name = $1;
654
655 # There can be multiple definitions for a name. We want to know
656 # if any of them has arguments, and if any has a body.
657 $configs{$name}{has_args} //= $2 eq "";
658 $configs{$name}{has_args} ||= $2 eq "";
659 $configs{$name}{has_defn} //= $3 ne "";
660 $configs{$name}{has_defn} ||= $3 ne "";
661 }
662 }
663
664 # We now have stored the description and information about every #define
665 # in the file. The description is in a form convenient to operate on to
666 # convert to pod. Do that now.
667 foreach my $name (keys %configs) {
668 next unless defined $configs{$name}{description};
669
670 # All adjacent non-verbatim lines of the description are appended
671 # together in a single element in the array. This allows the patterns
672 # to work across input line boundaries.
673
674 my $pod = "";
675 while (defined ($_ = shift $configs{$name}{description}->@*)) {
676 chomp;
677
678 if (/ ^ \S /x) { # Don't edit verbatim lines
679
680 # Enclose known file/path names not already so enclosed
681 # with <...>. (Some entries in config.h are already
682 # '<path/to/file>')
683 my $file_name_qr = qr! [ \w / ]+ \.
684 (?: c | h | xs | p [lm] | pmc | PL
685 | sh | SH | exe ) \b
686 !xx;
687 my $path_name_qr = qr! (?: / \w+ )+ !x;
688 for my $re ($file_name_qr, $path_name_qr) {
689 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
690 }
691
692 # Enclose <... file/path names with F<...> (but no double
693 # angle brackets)
694 for my $re ($file_name_qr, $path_name_qr) {
695 s! < ( $re ) > !F<$1>!gxx;
696 }
697
698 # Explain metaconfig units
699 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
700
701 # Convert "See foo" to "See C<L</foo>>" if foo is described in
702 # this file. Also create a link to the known file INSTALL.
703 # And, to be more general, handle "See also foo and bar", and
704 # "See also foo, bar, and baz"
705 while (m/ \b [Ss]ee \s+
706 (?: also \s+ )? ( \w+ )
707 (?: , \s+ ( \w+ ) )?
708 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
709 my @links = $1;
710 push @links, $2 if defined $2;
711 push @links, $3 if defined $3;
712 foreach my $link (@links) {
713 if ($link eq 'INSTALL') {
714 s/ \b INSTALL \b /C<L<INSTALL>>/xg;
715 }
716 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
717 s| \b $link \b |C<L</$link>>|xg;
718 $configs{$link}{linked} = 1;
719 $configs{$name}{linked} = 1;
720 }
721 }
722 }
723
724 # Enclose what we think are symbols with C<...>.
725 no warnings 'experimental::vlb';
726 s/ (*nlb:<)
727 (
728 # Any word followed immediately with parens or
729 # brackets
730 \b \w+ (?: \( [^)]* \) # parameter list
731 | \[ [^]]* \] # or array reference
732 )
733 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols.
734 | \b (?: struct | union ) \s \w+
735
736 # Words that contain underscores (which are
737 # definitely not text) or three uppercase letters in
738 # a row. Length two ones, like IV, aren't enclosed,
739 # because they often don't look as nice.
740 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
741 )
742 (*nla:>)
743 /C<$1>/xg;
744
745 # These include foo when the name is HAS_foo. This is a
746 # heuristic which works in most cases.
747 if ($name =~ / ^ HAS_ (.*) /x) {
748 my $symbol = lc $1;
749
750 # Don't include path components, nor things already in
751 # <>, or with trailing '(', '['
752 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
753 }
754 }
755
756 $pod .= "$_\n";
757 }
758 delete $configs{$name}{description};
759
760 $configs{$name}{pod} = $pod;
761 }
762
763 # Now have converted the description to pod. We also now have enough
764 # information that we can do cross checking to find definitions without
765 # corresponding pod, and see if they are mentioned in some description;
766 # otherwise they aren't documented.
767 NAME:
768 foreach my $name (keys %configs) {
769
770 # A definition without pod
771 if (! defined $configs{$name}{pod}) {
772
773 # Leading/trailing underscore means internal to config.h, e.g.,
774 # _GNU_SOURCE
775 next if $name =~ / ^ _ /x;
776 next if $name =~ / _ $ /x;
777
778 # MiXeD case names are internal to config.h; the first 4
779 # characters are sufficient to determine this
780 next if $name =~ / ^ [[:upper:]] [[:lower:]]
781 [[:upper:]] [[:lower:]]
782 /x;
783
784 # Here, not internal to config.h. Look to see if this symbol is
785 # mentioned in the pod of some other. If so, assume it is
786 # documented.
787 foreach my $check_name (keys %configs) {
788 my $this_element = $configs{$check_name};
789 my $this_pod = $this_element->{pod};
790 if (defined $this_pod) {
791 next NAME if $this_pod =~ / \b $name \b /x;
792 }
793 }
794
795 warn "$name has no documentation\n";
796 $missing_macros{$name} = 'config.h';
797
798 next;
799 }
800
801 my $has_defn = $configs{$name}{has_defn};
802 my $has_args = $configs{$name}{has_args};
803
804 # Check if any section already has an entry for this element.
805 # If so, it better be a placeholder, in which case we replace it
806 # with this entry.
807 foreach my $section (keys $docs{'api'}->%*) {
808 if (exists $docs{'api'}{$section}{$name}) {
809 my $was = $docs{'api'}{$section}{$name}->{pod};
810 $was = "" unless $was;
811 chomp $was;
812 if ($was ne "" && $was !~ m/$link_text/) {
813 die "Multiple descriptions for $name\n"
814 . "$section contained '$was'";
815 }
816 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
817 $configs{$name}{section} = $section;
818 last;
819 }
820 }
821
822 my $handled = 0; # Haven't handled this yet
823
824 if (defined $configs{$name}{'section'}) {
825 # This has been taken care of elsewhere.
826 $handled = 1;
827 }
828 else {
9e728159 829 my $flags = "";
00d68f0d
KW
830 if ($has_defn && ! $has_args) {
831 $configs{$name}{args} = 1;
832 }
833
834 # Symbols of the form I_FOO are for #include files. They have
835 # special usage information
836 if ($name =~ / ^ I_ ( .* ) /x) {
837 my $file = lc $1 . '.h';
838 $configs{$name}{usage} = <<~"EOT";
839 #ifdef $name
840 #include <$file>
841 #endif
842 EOT
843 }
844
845 # Compute what section this variable should go into. This
846 # heuristic was determined by manually inspecting the current
847 # things in config.h, and should be adjusted as necessary as
848 # deficiencies are found.
849 #
850 # This is the default section for macros with a definiton but
851 # no arguments, meaning it is replaced unconditionally
852 #
853 my $sb = qr/ _ | \b /x; # segment boundary
854 my $dash_or_spaces = qr/ - | \s+ /x;
855 my $pod = $configs{$name}{pod};
856 if ($name =~ / ^ USE_ /x) {
857 $configs{$name}{'section'} = 'Site configuration';
858 }
859 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
860 {
861 $configs{$name}{'section'} = 'Time';
862 }
863 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
864 | $sb CASTFLAGS $sb
865 | QUADMATH
866 | $sb (?: IS )? NAN
867 | $sb (?: IS )? FINITE
868 /x)
869 {
870 $configs{$name}{'section'} =
871 'Floating point configuration values';
872 }
873 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
874 $configs{$name}{'section'} = 'Filesystem configuration values';
875 }
876 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
877 || $configs{$name}{pod} =~ m/ \b align /x)
878 {
879 $configs{$name}{'section'} = 'Compiler and Preprocessor information';
880 }
881 elsif ($name =~ / ^ [IU] [ \d V ]
882 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
883 {
884 $configs{$name}{'section'} = 'Integer configuration values';
885 }
886 elsif ($name =~ / $sb t $sb /x) {
887 $configs{$name}{'section'} = 'Typedef names';
9e728159 888 $flags .= 'y';
00d68f0d
KW
889 }
890 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
891 && $configs{$name}{pod} =~ m/ \b format \b /ix)
892 {
893 $configs{$name}{'section'} = 'Formats';
894 }
895 elsif ($name =~ / BACKTRACE /x) {
896 $configs{$name}{'section'} = 'Display and Dump functions';
897 }
898 elsif ($name =~ / ALLOC $sb /x) {
899 $configs{$name}{'section'} = 'Memory Management';
900 }
901 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH
902 | $sb FILE $sb
903 | $sb DIR $sb
904 | $sb LSEEK
905 | $sb INO $sb
906 | $sb OPEN
907 | $sb CLOSE
908 | ^ DIR
909 | ^ INO $sb
910 | DIR $
911 | FILENAMES
912 /x
913 || $configs{$name}{pod} =~ m! I/O | stdio
914 | file \s+ descriptor
915 | file \s* system
916 | statfs
917 !x)
918 {
919 $configs{$name}{'section'} = 'Filesystem configuration values';
920 }
921 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
922 $configs{$name}{'section'} = 'Signals';
923 }
924 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
925 $configs{$name}{'section'} = 'Prototype information';
926 }
927 elsif ( $name =~ / ^ LOC_ /x
928 || $configs{$name}{pod} =~ /full path/i)
929 {
930 $configs{$name}{'section'} = 'Paths to system commands';
931 }
932 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
933 $configs{$name}{'section'} = 'Locales';
934 }
935 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) {
936 $configs{$name}{'section'} = 'Compiler and Preprocessor information';
937 }
938 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
939 {
940 $configs{$name}{'section'} = 'Password and Group access';
941 }
942 elsif ($name =~ / SOCKET | $sb SOCK /x) {
943 $configs{$name}{'section'} = 'Sockets configuration values';
944 }
945 elsif ( $name =~ / THREAD | MULTIPLICITY /x
946 || $configs{$name}{pod} =~ m/ \b pthread /ix)
947 {
948 $configs{$name}{'section'} = 'Concurrency';
949 }
950 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN
951 | VENDOR | ^ USE
952 /x)
953 {
954 $configs{$name}{'section'} = 'Site configuration';
955 }
956 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix
957 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
958 || $pod =~ / \b doubles \b /ix
959 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
960 {
961 $configs{$name}{'section'} =
962 'Floating point configuration values';
963 }
964 else {
965 # Above are the specific sections. The rest go into a
966 # grab-bag of general configuration values. However, we put
967 # two classes of them into lists of their names, without their
968 # descriptions, when we think that the description doesn't add
969 # any real value. One list contains the #include variables:
970 # the description is basically boiler plate for each of these.
971 # The other list contains the very many things that are of the
972 # form HAS_foo, and \bfoo\b is contained in its description,
973 # and there is no verbatim text in the pod or links to/from it
974 # (which would add value). That means that it is likely the
975 # intent of the variable can be gleaned from just its name,
976 # and unlikely the description adds signficant value, so just
977 # listing them suffices. Giving their descriptions would
978 # expand this pod significantly with little added value.
979 if ( ! $has_defn
980 && ! $configs{$name}{verbatim}
981 && ! $configs{$name}{linked})
982 {
983 if ($name =~ / ^ I_ ( .* ) /x) {
984 push @include_defs, $name;
985 next;
986 }
987 elsif ($name =~ / ^ HAS_ ( .* ) /x) {
988 my $canonical_name = $1;
989 $canonical_name =~ s/_//g;
990
991 my $canonical_pod = $configs{$name}{pod};
992 $canonical_pod =~ s/_//g;
993
994 if ($canonical_pod =~ / \b $canonical_name \b /xi) {
995 if ($name =~ / $sb R $sb /x) {
996 push @has_r_defs, $name;
997 }
998 else {
999 push @has_defs, $name;
1000 }
1001 next;
1002 }
1003 }
1004 }
1005
1006 $configs{$name}{'section'} = 'General Configuration';
1007 }
1008
1009 my $section = $configs{$name}{'section'};
6927cd44
KW
1010 die "Internal error: '$section' not in \%valid_sections"
1011 unless grep { $_ eq $section } keys %valid_sections;
9e728159 1012 $flags .= 'AdmnT';
00d68f0d 1013 $flags .= 'U' unless defined $configs{$name}{usage};
9e728159 1014
00d68f0d
KW
1015 $docs{'api'}{$section}{$name}{flags} = $flags;
1016 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1017 $docs{'api'}{$section}{$name}{ret_type} = "";
1018 $docs{'api'}{$section}{$name}{file} = 'config.h';
1019 $docs{'api'}{$section}{$name}{usage}
1020 = $configs{$name}{usage} if defined $configs{$name}{usage};
1021 push $docs{'api'}{$section}{$name}{args}->@*, ();
1022 push $docs{'api'}{$section}{$name}{items}->@*, ();
1023 }
1024 }
1025}
1026
94bdecf9 1027sub docout ($$$) { # output the docs for one function
b1cb8a49 1028 my($fh, $element_name, $docref) = @_;
5bf02caf 1029
783469eb
KW
1030 # Trim trailing space
1031 $element_name =~ s/\s*$//;
1032
5bf02caf 1033 my $flags = $docref->{flags};
63da8140 1034 my $pod = $docref->{pod} // "";
5bf02caf 1035 my $file = $docref->{file};
783469eb 1036
183f6e59 1037 my @items = $docref->{items}->@*;
5bf02caf 1038
783469eb
KW
1039 # Make the main element the first of the items. This allows uniform
1040 # treatment below
1041 unshift @items, { name => $element_name,
1042 flags => $flags,
1043 ret_type => $docref->{ret_type},
1044 args => [ $docref->{args}->@* ],
1045 };
94bdecf9 1046
7a997878 1047 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
9639517b 1048
783469eb
KW
1049 print $fh "\n=over $description_indent\n";
1050 print $fh "\n=item C<$_->{name}>\n" for @items;
63da8140 1051
783469eb
KW
1052 # If we're printing only a link to an element, this isn't the major entry,
1053 # so no X<> here.
1054 if ($flags !~ /h/) {
1055 print $fh "X<$_->{name}>" for @items;
1056 print $fh "\n";
d4e99c76 1057 }
63da8140 1058
783469eb
KW
1059 for my $item (@items) {
1060 if ($item->{flags} =~ /D/) {
1061 print $fh <<~"EOT";
54c193ae 1062
783469eb
KW
1063 C<B<DEPRECATED!>> It is planned to remove C<$item->{name}> from a
1064 future release of Perl. Do not use it for new code; remove it from
1065 existing code.
1066 EOT
1067 }
1068 elsif ($item->{flags} =~ /x/) {
1069 print $fh <<~"EOT";
54c193ae 1070
783469eb
KW
1071 NOTE: C<$item->{name}> is B<experimental> and may change or be
1072 removed without notice.
1073 EOT
1074 }
54c193ae 1075 }
94bdecf9 1076
783469eb
KW
1077 chomp $pod; # Make sure prints pod with a single trailing \n
1078 print $fh "\n", $pod, "\n";
3cfa66c4 1079
783469eb
KW
1080 for my $item (@items) {
1081 my $item_flags = $item->{flags};
1082 my $item_name = $item->{name};
3cfa66c4 1083
783469eb
KW
1084 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1085 if $item_flags =~ /O/;
1086 # Is Perl_, but no #define foo # Perl_foo
1087 if (($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1088
1089 # Can't handle threaded varargs
1090 || ($item_flags =~ /f/ && $item_flags !~ /T/))
1091 {
1092 $item->{name} = "Perl_$item_name";
1093 print $fh <<~"EOT";
1094
1095 NOTE: C<$item_name> must be explicitly called as
1096 C<$item->{name}>
1097 EOT
1098 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1099 print $fh ".\n";
1100 }
1101 }
94bdecf9 1102
9e728159
KW
1103 if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1104 # to never warrant a usage line
1105 warn("U and s flags are incompatible")
1106 if $flags =~ /U/ && $flags =~ /s/;
20046047 1107 # nothing
05ca4832 1108 } else {
783469eb
KW
1109
1110 print $fh "\n=over $usage_indent\n";
1111
00d68f0d
KW
1112 if (defined $docref->{usage}) { # An override of the usage section
1113 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1114 }
783469eb
KW
1115 else {
1116
8d1c4423
KW
1117 # Add the thread context formal parameter on expanded-out names
1118 for my $item (@items) {
1119 unshift $item->{args}->@*, (($item->{args}->@*)
1120 ? "pTHX_"
1121 : "pTHX")
1122 if $item->{flags} !~ /T/
1123 && $item->{name} =~ /^Perl_/;
1124 }
1125
783469eb
KW
1126 # Look through all the items in this entry. If all have the same
1127 # return type and arguments, only the main entry is displayed.
1128 # Also, find the longest return type and longest name so that if
1129 # multiple ones are shown, they can be vertically aligned nicely
783469eb 1130 my $need_individual_usage = 0;
53c9fbf2 1131 my $longest_name_length = length $items[0]->{name};
783469eb 1132 my $base_ret_type = $items[0]->{ret_type};
53c9fbf2 1133 my $longest_ret = length $base_ret_type;
783469eb 1134 my @base_args = $items[0]->{args}->@*;
53c9fbf2 1135 for (my $i = 1; $i < @items; $i++) {
783469eb 1136 no warnings 'experimental::smartmatch';
53c9fbf2 1137 my $item = $items[$i];
783469eb
KW
1138 $need_individual_usage = 1
1139 if $item->{ret_type} ne $base_ret_type
1140 || ! ($item->{args}->@* ~~ @base_args);
1141 my $ret_length = length $item->{ret_type};
1142 $longest_ret = $ret_length if $ret_length > $longest_ret;
1143 my $name_length = length $item->{name};
1144 $longest_name_length = $name_length
1145 if $name_length > $longest_name_length;
1ded1f42 1146 }
783469eb
KW
1147
1148 # If we're only showing one entry, only its length matters.
1149 $longest_name_length = length($items[0]->{name})
1150 unless $need_individual_usage;
1151 print $fh "\n";
1152
1153 my $indent = 1; # 1 is sufficient for verbatim; =over is used
1154 # for more
1155 my $ret_name_sep_length = 2; # spaces between return type and name
1156 my $name_indent = $indent + $longest_ret;
1157 $name_indent += $ret_name_sep_length if $longest_ret;
1158
1159 # 80 column terminal - 1 for pager adding a column; -7 for nroff
1160 # indent;
1161 my $max_length = 80 - 1 - 7 - $description_indent - $usage_indent;
1162
1163 for my $item (@items) {
1164 my $ret_type = $item->{ret_type};
1165 my @args = $item->{args}->@*;
1166 my $name = $item->{name};
1167 my $item_flags = $item->{flags};
1168
783469eb
KW
1169 # The return type
1170 print $fh (" " x $indent), $ret_type;
1171
1172 print $fh " " x ( $ret_name_sep_length
1173 + $longest_ret - length $ret_type);
1174 print $fh $name;
1175
1176 if ($item_flags =~ /n/) { # no args
1177 warn("$file: $element_name: n flag without m")
1178 unless $item_flags =~ /m/;
1179 warn("$file: $name: n flag but apparently has args")
1180 if @args;
1181 }
1182 else {
1183 # +1 for the '('
1184 my $arg_indent = $name_indent + $longest_name_length + 1;
1185
1186 # Align the argument lists of the items
1187 print $fh " " x ($longest_name_length - length($name));
1188 print $fh "(";
1189
1190 # Display as many of the arguments on the same line as
1191 # will fit.
1192 my $total_length = $arg_indent;
1193 my $first_line = 1;
1194 for (my $i = 0; $i < @args; $i++) {
1195 my $arg = $args[$i];
1196 my $arg_length = length($arg);
1197
1198 # All but the first arg are preceded by a blank
1199 my $use_blank = $i > 0;
1200
1201 # +1 here and below because either the argument has a
1202 # trailing comma or trailing ')'
1203 $total_length += $arg_length + $use_blank + 1;
1204
1205 # We want none of the arguments to be positioned so
1206 # they extend too far to the right. Ideally, they
1207 # should all start in the same column as the arguments
1208 # on the first line of the function display do. But, if
1209 # necessary, outdent them so that they all start in
1210 # another column, with the longest ending at the right
1211 # margin, like so:
1212 # void function_name(pTHX_ short1,
1213 # short2,
1214 # very_long_argument,
1215 # short3)
1216 if ($total_length > $max_length) {
1217
1218 # If this is the first continuation line,
1219 # calculate the longest argument; this will be the
1220 # one we may have to outdent for.
1221 if ($first_line) {
1222 $first_line = 0;
1223
1224 # We will need at least as much as the current
1225 # argument
1226 my $longest_arg_length = $arg_length
1227 + $use_blank + 1;
1228
1229 # Look through the rest of the args to see if
1230 # any are longer than this one.
1231 for (my $j = $i + 1; $j < @args; $j++) {
1232
1233 # Include the trailing ',' or ')' in the
1234 # length. No need to concern ourselves
1235 # with a leading blank, as the argument
1236 # would be positioned first on the next
1237 # line
1238 my $peek_arg_length = length ($args[$j])
1239 + 1;
1240 $longest_arg_length = $peek_arg_length
1241 if $peek_arg_length > $longest_arg_length;
1242 }
1243
1244 # Calculate the new indent if necessary.
1245 $arg_indent = $max_length - $longest_arg_length
1246 if $arg_indent + $longest_arg_length
1247 > $max_length;
1248 }
1249
1250 print $fh "\n", (" " x $arg_indent);
1251 $total_length = $arg_indent + $arg_length + 1;
1252 $use_blank = 0;
1253 }
1254
1255 # Display this argument
1256 print $fh " " if $use_blank;
1257 print $fh $arg;
1258 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1259
1260 } # End of loop through args
1261
1262 print $fh ")";
1ded1f42 1263 }
783469eb
KW
1264
1265 print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;"
1266 print $fh "\n";
1267
1268 # Only the first entry is normally displayed
1269 last unless $need_individual_usage;
1ded1f42 1270 }
1ded1f42 1271 }
783469eb
KW
1272
1273 print $fh "\n=back\n";
94bdecf9 1274 }
783469eb
KW
1275
1276 print $fh "\n=back\n";
63da8140 1277 print $fh "\n=for hackers\nFound in file $file\n";
94bdecf9
JH
1278}
1279
f83c6033
KW
1280sub sort_helper {
1281 # Do a case-insensitive dictionary sort, with only alphabetics
1282 # significant, falling back to using everything for determinancy
1354d57e 1283 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
f83c6033
KW
1284 || uc($a) cmp uc($b)
1285 || $a cmp $b;
1286}
1287
7b73ff98 1288sub output {
5a0155e6 1289 my ($podname, $header, $dochash, $missing, $footer) = @_;
6a4c4cd4
DM
1290 #
1291 # strip leading '|' from each line which had been used to hide
1292 # pod from pod checkers.
1293 s/^\|//gm for $header, $footer;
1294
7882b24a 1295 my $fh = open_new("pod/$podname.pod", undef,
20046047 1296 {by => "$0 extracting documentation",
f1f44974 1297 from => 'the C source files'}, 1);
e0492643 1298
63da8140 1299 print $fh $header, "\n";
e0492643 1300
5781dcac 1301 for my $section_name (sort sort_helper keys %valid_sections) {
7a997878 1302 my $section_info = $dochash->{$section_name};
36ea2d2f 1303
5781dcac
KW
1304 # We allow empty sections in perlintern.
1305 if (! $section_info && $podname eq 'perlapi') {
36ea2d2f 1306 warn "Empty section '$section_name'; skipped";
36ea2d2f
KW
1307 next;
1308 }
1309
63da8140
KW
1310 print $fh "\n=head1 $section_name\n";
1311
5781dcac
KW
1312 if ($podname eq 'perlapi') {
1313 print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1314 if defined $valid_sections{$section_name}{header};
151c3fe5 1315
5781dcac
KW
1316 # Output any heading-level documentation and delete so won't get in
1317 # the way later
1318 if (exists $section_info->{""}) {
1319 print $fh "\n", $section_info->{""}, "\n";
1320 delete $section_info->{""};
1321 }
151c3fe5 1322 }
151c3fe5 1323
5781dcac
KW
1324
1325 if ($section_info) {
1326 for my $function_name (sort sort_helper keys %$section_info) {
1327 docout($fh, $function_name, $section_info->{$function_name});
1328 }
1329 }
1330 else {
1331 print $fh "\nThere are only public API items currently in $section_name\n";
20046047 1332 }
00b0fb83
KW
1333
1334 print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
5781dcac
KW
1335 if $podname eq 'perlapi'
1336 && defined $valid_sections{$section_name}{footer};
7b73ff98
NC
1337 }
1338
5a0155e6 1339 if (@$missing) {
63da8140
KW
1340 print $fh "\n=head1 Undocumented functions\n";
1341 print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
1342
474d0ac8 1343The following functions have been flagged as part of the public API,
72d33970 1344but are currently undocumented. Use them at your own risk, as the
ba4591a5
KW
1345interfaces are subject to change. Functions that are not listed in this
1346document are not intended for public use, and should NOT be used under any
1347circumstances.
1348
5a4fed09
KW
1349If you feel you need to use one of these functions, first send email to
1350L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. It may be
1351that there is a good reason for the function not being documented, and it
1352should be removed from this list; or it may just be that no one has gotten
1353around to documenting it. In the latter case, you will be asked to submit a
1354patch to document the function. Once your patch is accepted, it will indicate
1355that the interface is stable (unless it is explicitly marked otherwise) and
1356usable by you.
63da8140 1357
cf5f2f8f 1358_EOB_
2616800a
FC
1359The following functions are currently undocumented. If you use one of
1360them, you may wish to consider creating and submitting documentation for
1361it.
63da8140 1362
2616800a 1363_EOB_
783469eb 1364 print $fh "\n=over $description_indent\n";
6a4c4cd4 1365
63da8140
KW
1366 for my $missing (sort sort_helper @$missing) {
1367 print $fh "\n=item C<$missing>\nX<$missing>\n";
1368 }
1369 print $fh "\n=back\n";
5a0155e6 1370 }
63da8140
KW
1371
1372 print $fh "\n$footer\n=cut\n";
5a0155e6 1373
7882b24a 1374 read_only_bottom_close_and_rename($fh);
cd093254
MM
1375}
1376
e8e591c9
NC
1377foreach (@{(setup_embed())[0]}) {
1378 next if @$_ < 2;
b1cb8a49 1379 my ($flags, $ret_type, $func, @args) = @$_;
e8e591c9 1380 s/\b(?:NN|NULLOK)\b\s+//g for @args;
bc350081 1381
5ce57792 1382 $funcflags{$func} = {
20046047 1383 flags => $flags,
b1cb8a49 1384 ret_type => $ret_type,
20046047
KE
1385 args => \@args,
1386 };
5ce57792
NC
1387}
1388
5ce57792
NC
1389# glob() picks up docs from extra .c or .h files that may be in unclean
1390# development trees.
741c0772
NC
1391open my $fh, '<', 'MANIFEST'
1392 or die "Can't open MANIFEST: $!";
1393while (my $line = <$fh>) {
b87d9527 1394 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
5ce57792 1395
7cb0b6c4
KW
1396 # Don't pick up pods from these. (We may pick up generated stuff from
1397 # /lib though)
1398 next if $file =~ m! ^ ( cpan | dist | ext ) / !x;
1399
1ae6ead9 1400 open F, '<', $file or die "Cannot open $file for docs: $!\n";
5ce57792
NC
1401 autodoc(\*F,$file);
1402 close F or die "Error closing $file: $!\n";
1403}
741c0772 1404close $fh or die "Error whilst reading MANIFEST: $!";
5ce57792 1405
00d68f0d
KW
1406parse_config_h();
1407
5ce57792
NC
1408for (sort keys %funcflags) {
1409 next unless $funcflags{$_}{flags} =~ /d/;
6523e108 1410 next if $funcflags{$_}{flags} =~ /h/;
573cf2be 1411 warn "no docs for $_\n";
bc350081 1412}
94bdecf9 1413
5ce57792 1414foreach (sort keys %missing) {
5ce57792 1415 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
94bdecf9
JH
1416}
1417
ff5af78d
KW
1418# List of funcs in the public API that aren't also marked as core-only,
1419# experimental nor deprecated.
b87d9527
KW
1420my @missing_api = grep $funcflags{$_}{flags} =~ /A/
1421 && $funcflags{$_}{flags} !~ /[xD]/
1422 && !$docs{api}{$_}, keys %funcflags;
00d68f0d 1423push @missing_api, keys %missing_macros;
a026e430
KW
1424
1425my $other_places = join ", ", map { "L<$_>" } sort sort_helper qw( perlclib perlxs),
1426 keys %described_elsewhere;
1427
00d68f0d
KW
1428# The S< > makes things less densely packed, hence more readable
1429my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @has_defs;
1430my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @has_r_defs;
1431$valid_sections{'General Configuration'}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1432$valid_sections{'General Configuration'}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1433
1434my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @include_defs;
1435$valid_sections{'General Configuration'}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1436
7260d4ba
KW
1437my $section_list = join "\n\n", map { "=item L</$_>" } sort sort_helper keys %valid_sections;
1438
b87d9527 1439output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
6a4c4cd4
DM
1440|=encoding UTF-8
1441|
1442|=head1 NAME
1443|
1444|perlapi - autogenerated documentation for the perl public API
1445|
1446|=head1 DESCRIPTION
1447|X<Perl API> X<API> X<api>
1448|
b87d9527
KW
1449|This file contains most of the documentation of the perl public API, as
1450|generated by F<embed.pl>. Specifically, it is a listing of functions,
a026e430
KW
1451|macros, flags, and variables that may be used by extension writers. Besides
1452|L<perlintern> and F<config.h>, some items are listed here as being actually
1453|documented in another pod.
b87d9527
KW
1454|
1455|L<At the end|/Undocumented functions> is a list of functions which have yet
1456|to be documented. Patches welcome! The interfaces of these are subject to
1457|change without notice.
1458|
6927cd44
KW
1459|Some of the functions documented here are consolidated so that a single entry
1460|serves for multiple functions which all do basically the same thing, but have
1461|some slight differences. For example, one form might process magic, while
1462|another doesn't. The name of each variation is listed at the top of the
1463|single entry. But if all have the same signature (arguments and return type)
1464|except for their names, only the usage for the base form is shown. If any
1465|one of the forms has a different signature (such as returning C<const> or
1466|not) every function's signature is explicitly displayed.
7260d4ba 1467|
a026e430
KW
1468|Anything not listed here or in the other mentioned pods is not part of the
1469|public API, and should not be used by extension writers at all. For these
1470|reasons, blindly using functions listed in F<proto.h> is to be avoided when
1471|writing extensions.
6a4c4cd4
DM
1472|
1473|In Perl, unlike C, a string of characters may generally contain embedded
1474|C<NUL> characters. Sometimes in the documentation a Perl string is referred
1475|to as a "buffer" to distinguish it from a C string, but sometimes they are
1476|both just referred to as strings.
1477|
1478|Note that all Perl API global variables must be referenced with the C<PL_>
1479|prefix. Again, those not listed here are not to be used by extension writers,
1480|and can be changed or removed without notice; same with macros.
1481|Some macros are provided for compatibility with the older,
1482|unadorned names, but this support may be disabled in a future release.
1483|
1484|Perl was originally written to handle US-ASCII only (that is characters
1485|whose ordinal numbers are in the range 0 - 127).
1486|And documentation and comments may still use the term ASCII, when
1487|sometimes in fact the entire range from 0 - 255 is meant.
1488|
1489|The non-ASCII characters below 256 can have various meanings, depending on
1490|various things. (See, most notably, L<perllocale>.) But usually the whole
1491|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
1492|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
1493|"Latin1" as referring just to the characters in the range 128 through 255, or
2de859ce 1494|sometimes from 160 through 255.
6a4c4cd4
DM
1495|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1496|
1497|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1498|L<perlebcdic>). Most of the documentation (and even comments in the code)
1499|ignore the EBCDIC possibility.
1500|For almost all purposes the differences are transparent.
1501|As an example, under EBCDIC,
1502|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1503|whenever this documentation refers to C<utf8>
1504|(and variants of that name, including in function names),
1505|it also (essentially transparently) means C<UTF-EBCDIC>.
1506|But the ordinals of characters differ between ASCII, EBCDIC, and
1507|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1508|number of bytes than in UTF-8.
1509|
6927cd44
KW
1510|The organization of this document is tentative and subject to change.
1511|Suggestions and patches welcome
1512|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1513|
1514|The sections in this document currently are
1515|
1516|=over
1517
1518|$section_list
1519|
1520|=back
1521|
6a4c4cd4 1522|The listing below is alphabetical, case insensitive.
94bdecf9 1523_EOB_
6a4c4cd4
DM
1524|=head1 AUTHORS
1525|
1526|Until May 1997, this document was maintained by Jeff Okamoto
7b1f0a98 1527|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
6a4c4cd4
DM
1528|
1529|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1530|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1531|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1532|Stephen McCamant, and Gurusamy Sarathy.
1533|
7b1f0a98 1534|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
6a4c4cd4
DM
1535|
1536|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1537|
1538|=head1 SEE ALSO
1539|
a026e430 1540|F<config.h>, L<perlintern>, $other_places
94bdecf9
JH
1541_EOE_
1542
79fc8511
FC
1543# List of non-static internal functions
1544my @missing_guts =
9f589e47 1545 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
5a0155e6 1546
7b1f0a98 1547output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
6a4c4cd4
DM
1548|=head1 NAME
1549|
1550|perlintern - autogenerated documentation of purely B<internal>
20046047 1551|Perl functions
6a4c4cd4
DM
1552|
1553|=head1 DESCRIPTION
1554|X<internal Perl functions> X<interpreter functions>
1555|
1556|This file is the autogenerated documentation of functions in the
1557|Perl interpreter that are documented using Perl's internal documentation
1558|format but are not marked as part of the Perl API. In other words,
1559|B<they are not for use in extensions>!
5781dcac
KW
1560
1561|It has the same sections as L<perlapi>, though some may be empty.
6a4c4cd4 1562|
7b1f0a98 1563_EOB_
6a4c4cd4
DM
1564|
1565|=head1 AUTHORS
1566|
1567|The autodocumentation system was originally added to the Perl core by
1568|Benjamin Stuhl. Documentation is by whoever was kind enough to
1569|document their functions.
1570|
1571|=head1 SEE ALSO
1572|
a026e430 1573|F<config.h>, L<perlapi>, $other_places
7b1f0a98 1574_EOE_