5 # Unconditionally regenerate:
10 # from information stored in
13 # plus all the core .c, .h, and .pod files listed in MANIFEST
14 # plus %extra_input_pods
16 my %extra_input_pods = ( 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => 1 );
18 # Has an optional arg, which is the directory to chdir to before reading
19 # MANIFEST and the files
21 # This script is invoked as part of 'make all'
23 # The generated pod consists of sections of related elements, functions,
24 # macros, and variables. The keys of %valid_sections give the current legal
25 # ones. Just add a new key to add a section.
27 # Throughout the files read by this script are lines like
29 # =for apidoc_section Section Name
30 # =for apidoc_section $section_name_variable
32 # "Section Name" (after having been stripped of leading space) must be one of
33 # the legal section names, or an error is thrown. $section_name_variable must
34 # be one of the legal section name variables defined below; these expand to
35 # legal section names. This form is used so that minor wording changes in
36 # these titles can be confined to this file. All the names of the variables
37 # end in '_scn'; this suffix is optional in the apidoc_section lines.
39 # All API elements defined between this line and the next 'apidoc_section'
40 # line will go into the section "Section Name" (or $section_name_variable),
41 # sorted by dictionary order within it. perlintern and perlapi are parallel
42 # documents, each potentially with a section "Section Name". Each element is
43 # marked as to which document it goes into. If there are none for a
44 # particular section in perlapi, that section is omitted.
46 # Also, in .[ch] files, there may be
50 # lines in comments. These are also used by this program to switch to section
51 # "Section Name". The difference is that if there are any lines after the
52 # =head1, inside the same comment, and before any =for apidoc-ish lines, they
53 # are used as a heading for section "Section Name" (in both perlintern and
54 # perlapi). This includes any =head[2-5]. If more than one '=head1 Section
55 # Name' line has content, they appear in the generated pod in an undefined
56 # order. Note that you can't use a $section_name_variable in =head1 lines
58 # The next =head1, =for apidoc_section, or file end terminates what goes into
61 # The %valid_sections hash below also can have header content, which will
62 # appear before any =head1 content. The hash can also have footer content
63 # content, which will appear at the end of the section, after all the
66 # The lines that define the actual functions, etc are documented in embed.fnc,
67 # because they have flags which must be kept in sync with that file.
72 my $nroff_min_indent = 4; # for non-heading lines
73 # 80 column terminal - 2 for pager adding 2 columns;
74 my $max_width = 80 - 2 - $nroff_min_indent;
75 my $standard_indent = 4; # Any additional indentations
80 or die "Couldn't chdir to '$workdir': $!";
82 require './regen/regen_lib.pl';
83 require './regen/embed_lib.pl';
85 my %described_elsewhere;
88 # See database of global and static function prototypes in embed.fnc
89 # This is used to generate prototype headers under various configurations,
90 # export symbols lists for different platforms, and macros to provide an
91 # implicit interpreter context argument.
100 my $link_text = "Described in";
102 my $description_indent = 4;
103 my $usage_indent = 3; # + initial blank yields 4 total
105 my $AV_scn = 'AV Handling';
106 my $callback_scn = 'Callback Functions';
107 my $casting_scn = 'Casting';
108 my $casing_scn = 'Character case changing';
109 my $classification_scn = 'Character classification';
110 my $names_scn = 'Character names';
111 my $scope_scn = 'Compile-time scope hooks';
112 my $compiler_scn = 'Compiler and Preprocessor information';
113 my $directives_scn = 'Compiler directives';
114 my $concurrency_scn = 'Concurrency';
115 my $COP_scn = 'COPs and Hint Hashes';
116 my $CV_scn = 'CV Handling';
117 my $custom_scn = 'Custom Operators';
118 my $debugging_scn = 'Debugging';
119 my $display_scn = 'Display functions';
120 my $embedding_scn = 'Embedding, Threads, and Interpreter Cloning';
121 my $errno_scn = 'Errno';
122 my $exceptions_scn = 'Exception Handling (simple) Macros';
123 my $filesystem_scn = 'Filesystem configuration values';
124 my $filters_scn = 'Source Filters';
125 my $floating_scn = 'Floating point';
126 my $genconfig_scn = 'General Configuration';
127 my $globals_scn = 'Global Variables';
128 my $GV_scn = 'GV Handling and Stashes';
129 my $hook_scn = 'Hook manipulation';
130 my $HV_scn = 'HV Handling';
131 my $io_scn = 'Input/Output';
132 my $io_formats_scn = 'I/O Formats';
133 my $integer_scn = 'Integer';
134 my $lexer_scn = 'Lexer interface';
135 my $locale_scn = 'Locales';
136 my $magic_scn = 'Magic';
137 my $memory_scn = 'Memory Management';
139 my $multicall_scn = 'Multicall Functions';
140 my $numeric_scn = 'Numeric Functions';
142 # Now combined, as unclear which functions go where, but separate names kept
143 # to avoid 1) other code changes; 2) in case it seems better to split again
144 my $optrees_scn = 'Optrees';
145 my $optree_construction_scn = $optrees_scn; # Was 'Optree construction';
146 my $optree_manipulation_scn = $optrees_scn; # Was 'Optree Manipulation Functions'
147 my $pack_scn = 'Pack and Unpack';
148 my $pad_scn = 'Pad Data Structures';
149 my $password_scn = 'Password and Group access';
150 my $reports_scn = 'Reports and Formats';
151 my $paths_scn = 'Paths to system commands';
152 my $prototypes_scn = 'Prototype information';
153 my $regexp_scn = 'REGEXP Functions';
154 my $signals_scn = 'Signals';
155 my $site_scn = 'Site configuration';
156 my $sockets_scn = 'Sockets configuration values';
157 my $stack_scn = 'Stack Manipulation Macros';
158 my $string_scn = 'String Handling';
159 my $SV_flags_scn = 'SV Flags';
160 my $SV_scn = 'SV Handling';
161 my $tainting_scn = 'Tainting';
162 my $time_scn = 'Time';
163 my $typedefs_scn = 'Typedef names';
164 my $unicode_scn = 'Unicode Support';
165 my $utility_scn = 'Utility Functions';
166 my $versioning_scn = 'Versioning';
167 my $warning_scn = 'Warning and Dieing';
170 # Kept separate at end
171 my $undocumented_scn = 'Undocumented elements';
173 my %valid_sections = (
178 $classification_scn => {},
181 $directives_scn => {},
182 $concurrency_scn => {},
186 This section documents functions to manipulate CVs which are
187 code-values, meaning subroutines. For more information, see
193 $debugging_scn => {},
195 $embedding_scn => {},
197 $exceptions_scn => {},
200 Also see L</List of capability HAS_foo symbols>.
206 Also L</List of capability HAS_foo symbols> lists capabilities
207 that arent in this section. For example C<HAS_ASINH>, for the
208 hyperbolic sine function.
213 This section contains configuration information not otherwise
214 found in the more specialized sections of this document. At the
215 end is a list of C<#defines> whose name should be enough to tell
216 you what they do, and a list of #defines which tell you if you
217 need to C<#include> files to get the corresponding functionality.
222 =head2 List of capability C<HAS_I<foo>> symbols
224 This is a list of those symbols that dont appear elsewhere in ths
225 document that indicate if the current platform has a certain
226 capability. Their names all begin with C<HAS_>. Only those
227 symbols whose capability is directly derived from the name are
228 listed here. All others have their meaning expanded out elsewhere
229 in this document. This (relatively) compact list is because we
230 think that the expansion would add little or no value and take up
231 a lot of space (because there are so many). If you think certain
232 ones should be expanded, send email to
233 L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
235 Each symbol here will be C<#define>d if and only if the platform
236 has the capability. If you need more detail, see the
237 corresponding entry in F<config.h>. For convenience, the list is
238 split so that the ones that indicate there is a reentrant version
239 of a capability are listed separately
243 And, the reentrant capabilities:
249 =over $standard_indent
254 use an alternative implementation
259 =head2 List of C<#include> needed symbols
261 This list contains symbols that indicate if certain C<#include>
262 files are present on the platform. If your code accesses the
263 functionality that one of these is for, you will need to
264 C<#include> it if the symbol on this list is C<#define>d. For
265 more detail, see the corresponding entry in F<config.h>.
271 =over $standard_indent
287 These are used for formatting the corresponding type For example,
290 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
294 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
296 This keeps you from having to know if, say an IV, needs to be
297 printed as C<%d>, C<%ld>, or something else.
306 $multicall_scn => {},
309 $optree_construction_scn => {},
310 $optree_manipulation_scn => {},
315 $prototypes_scn => {},
319 These are used in the simple report generation feature of Perl.
326 These variables give details as to where various libraries,
327 installation destinations, I<etc.>, go, as well as what various
328 installation options were selected
335 See also C<L</$unicode_scn>>.
345 L<perlguts/Unicode Support> has an introduction to this API.
347 See also C<L</$classification_scn>>,
349 and C<L</$string_scn>>.
350 Various functions outside this section also work specially with
351 Unicode. Search for the string "utf8" in this document.
355 $versioning_scn => {},
360 # Somewhat loose match for an apidoc line so we can catch minor typos.
361 # Parentheses are used to capture portions so that below we verify
362 # that things are the actual correct syntax.
363 my $apidoc_re = qr/ ^ (\s*) # $1
371 # Only certain flags, dealing with display, are acceptable for apidoc_item
372 my $display_flags = "fFnDopTx;";
374 sub check_api_doc_line ($$) {
375 my ($file, $in) = @_;
377 return unless $in =~ $apidoc_re;
379 my $is_item = defined $5;
380 my $is_in_proper_form = length $1 == 0
386 || ($is_item && substr($7, 0, 1) eq '|'));
387 my $proto_in_file = $7;
388 my $proto = $proto_in_file;
389 $proto = "||$proto" if $proto !~ /\|/;
390 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
392 $name && $is_in_proper_form or die <<EOS;
393 Bad apidoc at $file line $.:
396 =for apidoc flags|returntype|name|arg|arg|...
397 =for apidoc flags|returntype|name
402 die "Only [$display_flags] allowed in apidoc_item:\n$in"
403 if $is_item && $flags =~ /[^$display_flags]/;
405 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
408 sub embed_override($) {
409 my ($element_name) = shift;
411 # If the entry is also in embed.fnc, it should be defined
412 # completely there, but not here
413 my $embed_docref = delete $funcflags{$element_name};
415 return unless $embed_docref and %$embed_docref;
417 my $flags = $embed_docref->{'flags'};
418 warn "embed.fnc entry '$element_name' missing 'd' flag"
419 unless $flags =~ /d/;
421 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
424 # The section that is in effect at the beginning of the given file. If not
425 # listed here, an apidoc_section line must precede any apidoc lines.
426 # This allows the files listed here that generally are single-purpose, to not
427 # have to worry about the autodoc section
428 my %initial_file_section = (
432 'deb.c' => $debugging_scn,
433 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => $XS_scn,
438 'locale.c' => $locale_scn,
439 'malloc.c' => $memory_scn,
440 'numeric.c' => $numeric_scn,
441 'opnames.h' => $optree_construction_scn,
443 'patchlevel.h' => $versioning_scn,
444 'perlio.h' => $io_scn,
445 'pod/perlapio.pod' => $io_scn,
446 'pod/perlcall.pod' => $callback_scn,
447 'pod/perlembed.pod' => $embedding_scn,
448 'pod/perlfilter.pod' => $filters_scn,
449 'pod/perliol.pod' => $io_scn,
450 'pod/perlmroapi.pod' => $MRO_scn,
451 'pod/perlreguts.pod' => $regexp_scn,
452 'pp_pack.c' => $pack_scn,
453 'pp_sort.c' => $SV_scn,
454 'regcomp.c' => $regexp_scn,
455 'regexp.h' => $regexp_scn,
458 'sv_inline.h' => $SV_scn,
459 'taint.c' => $tainting_scn,
460 'unicode_constants.h' => $unicode_scn,
461 'utf8.c' => $unicode_scn,
462 'utf8.h' => $unicode_scn,
463 'vutil.c' => $versioning_scn,
466 sub autodoc ($$) { # parse a file and extract documentation info
468 my($in, $line_num, $header, $section);
470 $section = $initial_file_section{$file}
471 if defined $initial_file_section{$file};
473 my $file_is_C = $file =~ / \. [ch] $ /x;
476 my $get_next_line = sub { $line_num++; return <$fh> };
479 while ($in = $get_next_line->()) {
480 last unless defined $in;
482 next unless ( $in =~ / ^ =for [ ]+ apidoc /x
483 # =head1 lines only have effect in C files
484 || ($file_is_C && $in =~ /^=head1/));
486 # Here, the line introduces a portion of the input that we care about.
487 # Either it is for an API element, or heading text which we expect
488 # will be used for elements later in the file
490 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
493 # If the line starts a new section ...
494 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
497 if ($section =~ / ^ \$ /x) {
498 $section .= '_scn' unless $section =~ / _scn $ /;
499 $section = eval "$section";
500 die "Unknown \$section variable '$section' in $file: $@" if $@;
502 die "Unknown section name '$section' in $file near line $.\n"
503 unless defined $valid_sections{$section};
506 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a
508 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
509 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
511 else { # Plain apidoc
513 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
514 = check_api_doc_line($file, $in);
515 # Override this line with any info in embed.fnc
516 my ($embed_flags, $embed_ret_type, @embed_args)
517 = embed_override($element_name);
518 if ($embed_ret_type) {
519 warn "embed.fnc entry overrides redundant information in"
520 . " '$proto_in_file' in $file"
521 if $flags || $ret_type || @args;
522 $flags = $embed_flags;
523 $ret_type = $embed_ret_type;
526 elsif ($flags !~ /[my]/) { # Not in embed.fnc, is missing if not
528 $missing{$element_name} = $file;
531 die "flag '$1' is not legal (for function $element_name (from $file))"
532 if $flags =~ / ( [^AabCDdEeFfGhiIMmNnTOoPpRrSsUuWXxy;#] ) /x;
534 die "'u' flag must also have 'm' or 'y' flags' for $element_name"
535 if $flags =~ /u/ && $flags !~ /[my]/;
536 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
537 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
540 die "Return type must be empty for '$element_name'"
542 $ret_type = '#ifdef';
545 if (exists $seen{$element_name} && $flags !~ /h/) {
546 die ("'$element_name' in $file was already documented in $seen{$element_name}");
549 $seen{$element_name} = $file;
553 # Here we have processed the initial line in the heading text or API
554 # element, and have saved the important information from it into the
555 # corresponding variables. Now accumulate the text that applies to it
556 # up to a terminating line, which is one of:
558 # 2) =head (in a C file only =head1)
559 # 3) an end comment line in a C file: m:^\s*\*/:
560 # 4) =for apidoc... (except apidoc_item lines)
562 my $head_ender_num = ($file_is_C) ? 1 : "";
563 while (defined($in = $get_next_line->())) {
565 last if $in =~ /^=cut/x;
566 last if $in =~ /^=head$head_ender_num/;
568 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
570 # End of comment line in C files is a fall-back terminator,
571 # but warn only if there actually is some accumulated text
572 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
576 if ($in !~ / ^ =for [ ]+ apidoc /x) {
581 # Here, the line is an apidoc line. All but apidoc_item terminate
582 # the text being accumulated.
583 last if $in =~ / ^ =for [ ]+ apidoc_section /x;
585 my ($item_name, $item_flags, $item_ret_type, $is_item,
586 $item_proto, @item_args) = check_api_doc_line($file, $in);
587 last unless $is_item;
589 # Here, is an apidoc_item_line; They can only come within apidoc
591 die "Unexpected api_doc_item line '$item_proto'"
592 unless $element_name;
594 # We accept blank lines between these, but nothing else;
595 die "apidoc_item lines must immediately follow apidoc lines for "
596 . " '$element_name' in $file"
598 # Override this line with any info in embed.fnc
599 my ($embed_flags, $embed_ret_type, @embed_args)
600 = embed_override($item_name);
601 if ($embed_ret_type) {
602 warn "embed.fnc entry overrides redundant information in"
603 . " '$item_proto' in $file"
604 if $item_flags || $item_ret_type || @item_args;
606 $item_flags = $embed_flags;
607 $item_ret_type = $embed_ret_type;
608 @item_args = @embed_args;
611 # Use the base entry flags if none for this item; otherwise add in
612 # any non-display base entry flags.
614 $item_flags .= $flags =~ s/[$display_flags]//rg;
617 $item_flags = $flags;
619 $item_ret_type = $ret_type unless $item_ret_type;
620 @item_args = @args unless @item_args;
621 push @items, { name => $item_name,
622 ret_type => $item_ret_type,
623 flags => $item_flags,
624 args => [ @item_args ],
627 # This line shows that this element is documented.
628 delete $funcflags{$item_name};
631 # Here, are done accumulating the text for this item. Trim it
632 $text =~ s/ ^ \s* //x;
633 $text =~ s/ \s* $ //x;
634 $text .= "\n" if $text ne "";
636 # And treat all-spaces as nothing at all
637 undef $text unless $text =~ /\S/;
641 # Here, we have accumulated into $text, the pod for $element_name
642 my $where = $flags =~ /A/ ? 'api' : 'intern';
644 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
645 unless defined $section;
646 my $is_link_only = ($flags =~ /h/);
647 if (! $is_link_only && exists $docs{$where}{$section}{$element_name}) {
648 warn "$0: duplicate API entry for '$element_name' in"
649 . " $where/$section\n";
653 # Override the text with just a link if the flags call for that
656 die "Can't currently handle link with items to it:\n$in"
658 $docs{$where}{$section}{X_tags}{$element_name} = $file;
659 redo; # Don't put anything if C source
662 # Here, is an 'h' flag in pod. We add a reference to the pod (and
663 # nothing else) to perlapi/intern. (It would be better to add a
664 # reference to the correct =item,=header, but something that makes
665 # it harder is that it that might be a duplicate, like '=item *';
666 # so that is a future enhancement XXX. Another complication is
667 # there might be more than one deserving candidates.)
668 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s)
669 $podname =~ s/\.pod//;
670 $text = "Described in L<$podname>.\n";
672 # Don't output a usage example for linked to documentation if
673 # it is trivial (has no arguments) and we aren't to add a
675 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[U;]/;
677 # Keep track of all the pod files that we refer to.
678 push $described_elsewhere{$podname}->@*, $podname;
681 $docs{$where}{$section}{$element_name}{flags} = $flags;
682 $docs{$where}{$section}{$element_name}{pod} = $text;
683 $docs{$where}{$section}{$element_name}{file} = $file;
684 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
685 push $docs{$where}{$section}{$element_name}{args}->@*, @args;
686 push $docs{$where}{$section}{$element_name}{items}->@*, @items;
689 $valid_sections{$section}{header} = "" unless
690 defined $valid_sections{$section}{header};
691 $valid_sections{$section}{header} .= "\n$text";
694 # We already have the first line of what's to come in $in
697 } # End of loop through input
702 my @has_r_defs; # Reentrant symbols
706 use re '/aa'; # Everthing is ASCII in this file
709 my $config_h = 'config.h';
710 $config_h = 'win32/config.h' unless -e $config_h;
711 die "Can't find $config_h" unless -e $config_h;
712 open my $fh, '<', $config_h or die "Can't open $config_h: $!";
715 # Look for lines like /* FOO_BAR:
716 # By convention all config.h descriptions begin like that
717 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
720 # Here we are starting the description for $name in config.h. We
721 # accumulate the entire description for it into @description.
722 # Flowing text from one input line to another is appended into the
723 # same array element to make a single flowing line element, but
724 # verbatim lines are kept as separate elements in @description.
725 # This will facilitate later doing pattern matching without regard
726 # to line boundaries on non-verbatim text.
728 die "Multiple config.h entries for '$name'"
729 if defined $configs{$name}{description};
731 # Get first line of description
734 # Each line in the description begins with blanks followed by '/*'
736 die "Unexpected config.h initial line for $name: '$_'"
737 unless s/ ^ ( \s* \* \s* ) //x;
738 my $initial_text = $1;
740 # Initialize the description with this first line (after having
741 # stripped the prefix text)
742 my @description = $_;
744 # The first line is used as a template for how much indentation
745 # each normal succeeding line has. Lines indented further
746 # will be considered as intended to be verbatim. But, empty lines
747 # likely won't have trailing blanks, so just strip the whole thing
749 my $strip_initial_qr = qr! \s* \* \s* $
752 $configs{$name}{verbatim} = 0;
754 # Read in the remainder of the description
756 last if s| ^ \s* \* / ||x; # A '*/' ends it
758 die "Unexpected config.h description line for $name: '$_'"
759 unless s/$strip_initial_qr//;
761 # Fix up the few flawed lines in config.h wherein a new
762 # sentence begins with a tab (and maybe a space after that).
763 # Although none of them currently do, let it recognize
766 # "... text"). The next sentence ...
768 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg;
770 # If this line has extra indentation or looks to have columns,
771 # it should be treated as verbatim. Columns are indicated by
772 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
773 # not preceded by punctuation.
777 | (*nlb:[[:punct:]]) \s{2}
781 # But here, is not a verbatim line. Add an empty line if
782 # this is the first non-verbatim after a run of verbatims
783 if ($description[-1] =~ /^\s/) {
784 push @description, "\n", $_;
786 else { # Otherwise, append this flowing line to the
787 # current flowing line
788 $description[-1] .= $_;
792 $configs{$name}{verbatim} = 1;
794 # The first verbatim line in a run of them is separated by an
795 # empty line from the flowing lines above it
796 push @description, "\n" if $description[-1] =~ /^\S/;
798 $_ = Text::Tabs::expand($_);
800 # Only a single space so less likely to wrap
803 push @description, $_;
807 push $configs{$name}{description}->@*, @description
809 } # Not a description; see if it is a macro definition.
811 (?: / \* )? # Optional commented-out
813 \# \s* define \s+ ( \w+ ) # $1 is the name
814 ( \s* ) # $2 indicates if args or not
815 ( .*? ) # $3 is any definition
816 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/
822 # There can be multiple definitions for a name. We want to know
823 # if any of them has arguments, and if any has a body.
824 $configs{$name}{has_args} //= $2 eq "";
825 $configs{$name}{has_args} ||= $2 eq "";
826 $configs{$name}{has_defn} //= $3 ne "";
827 $configs{$name}{has_defn} ||= $3 ne "";
831 # We now have stored the description and information about every #define
832 # in the file. The description is in a form convenient to operate on to
833 # convert to pod. Do that now.
834 foreach my $name (keys %configs) {
835 next unless defined $configs{$name}{description};
837 # All adjacent non-verbatim lines of the description are appended
838 # together in a single element in the array. This allows the patterns
839 # to work across input line boundaries.
842 while (defined ($_ = shift $configs{$name}{description}->@*)) {
845 if (/ ^ \S /x) { # Don't edit verbatim lines
847 # Enclose known file/path names not already so enclosed
848 # with <...>. (Some entries in config.h are already
850 my $file_name_qr = qr! [ \w / ]+ \.
851 (?: c | h | xs | p [lm] | pmc | PL
854 my $path_name_qr = qr! (?: / \w+ )+ !x;
855 for my $re ($file_name_qr, $path_name_qr) {
856 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
859 # Enclose <... file/path names with F<...> (but no double
861 for my $re ($file_name_qr, $path_name_qr) {
862 s! < ( $re ) > !F<$1>!gxx;
865 # Explain metaconfig units
866 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
868 # Convert "See foo" to "See C<L</foo>>" if foo is described in
869 # this file. Also create a link to the known file INSTALL.
870 # And, to be more general, handle "See also foo and bar", and
871 # "See also foo, bar, and baz"
872 while (m/ \b [Ss]ee \s+
873 (?: also \s+ )? ( \w+ )
875 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
877 push @links, $2 if defined $2;
878 push @links, $3 if defined $3;
879 foreach my $link (@links) {
880 if ($link eq 'INSTALL') {
881 s/ \b INSTALL \b /C<L<INSTALL>>/xg;
883 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
884 s| \b $link \b |C<L</$link>>|xg;
885 $configs{$link}{linked} = 1;
886 $configs{$name}{linked} = 1;
891 # Enclose what we think are symbols with C<...>.
892 no warnings 'experimental::vlb';
895 # Any word followed immediately with parens or
897 \b \w+ (?: \( [^)]* \) # parameter list
898 | \[ [^]]* \] # or array reference
900 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols.
901 | \b (?: struct | union ) \s \w+
903 # Words that contain underscores (which are
904 # definitely not text) or three uppercase letters in
905 # a row. Length two ones, like IV, aren't enclosed,
906 # because they often don't look as nice.
907 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
912 # These include foo when the name is HAS_foo. This is a
913 # heuristic which works in most cases.
914 if ($name =~ / ^ HAS_ (.*) /x) {
917 # Don't include path components, nor things already in
918 # <>, or with trailing '(', '['
919 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
925 delete $configs{$name}{description};
927 $configs{$name}{pod} = $pod;
930 # Now have converted the description to pod. We also now have enough
931 # information that we can do cross checking to find definitions without
932 # corresponding pod, and see if they are mentioned in some description;
933 # otherwise they aren't documented.
935 foreach my $name (keys %configs) {
937 # A definition without pod
938 if (! defined $configs{$name}{pod}) {
940 # Leading/trailing underscore means internal to config.h, e.g.,
942 next if $name =~ / ^ _ /x;
943 next if $name =~ / _ $ /x;
945 # MiXeD case names are internal to config.h; the first 4
946 # characters are sufficient to determine this
947 next if $name =~ / ^ [[:upper:]] [[:lower:]]
948 [[:upper:]] [[:lower:]]
951 # Here, not internal to config.h. Look to see if this symbol is
952 # mentioned in the pod of some other. If so, assume it is
954 foreach my $check_name (keys %configs) {
955 my $this_element = $configs{$check_name};
956 my $this_pod = $this_element->{pod};
957 if (defined $this_pod) {
958 next NAME if $this_pod =~ / \b $name \b /x;
962 warn "$name has no documentation\n";
963 $missing_macros{$name} = 'config.h';
968 my $has_defn = $configs{$name}{has_defn};
969 my $has_args = $configs{$name}{has_args};
971 # Check if any section already has an entry for this element.
972 # If so, it better be a placeholder, in which case we replace it
974 foreach my $section (keys $docs{'api'}->%*) {
975 if (exists $docs{'api'}{$section}{$name}) {
976 my $was = $docs{'api'}{$section}{$name}->{pod};
977 $was = "" unless $was;
979 if ($was ne "" && $was !~ m/$link_text/) {
980 die "Multiple descriptions for $name\n"
981 . "$section contained '$was'";
983 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
984 $configs{$name}{section} = $section;
989 my $handled = 0; # Haven't handled this yet
991 if (defined $configs{$name}{'section'}) {
992 # This has been taken care of elsewhere.
997 if ($has_defn && ! $has_args) {
998 $configs{$name}{args} = 1;
1001 # Symbols of the form I_FOO are for #include files. They have
1002 # special usage information
1003 if ($name =~ / ^ I_ ( .* ) /x) {
1004 my $file = lc $1 . '.h';
1005 $configs{$name}{usage} = <<~"EOT";
1012 # Compute what section this variable should go into. This
1013 # heuristic was determined by manually inspecting the current
1014 # things in config.h, and should be adjusted as necessary as
1015 # deficiencies are found.
1017 # This is the default section for macros with a definiton but
1018 # no arguments, meaning it is replaced unconditionally
1020 my $sb = qr/ _ | \b /x; # segment boundary
1021 my $dash_or_spaces = qr/ - | \s+ /x;
1022 my $pod = $configs{$name}{pod};
1023 if ($name =~ / ^ USE_ /x) {
1024 $configs{$name}{'section'} = $site_scn;
1026 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
1028 $configs{$name}{'section'} = $time_scn;
1030 elsif ( $name =~ / ^ [[:alpha:]]+ f $ /x
1031 && $configs{$name}{pod} =~ m/ \b format \b /ix)
1033 $configs{$name}{'section'} = $io_formats_scn;
1035 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
1039 | $sb (?: IS )? FINITE
1042 $configs{$name}{'section'} =
1045 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
1046 $configs{$name}{'section'} = $filesystem_scn;
1048 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
1049 || $configs{$name}{pod} =~ m/ \b align /x)
1051 $configs{$name}{'section'} = $compiler_scn;
1053 elsif ($name =~ / ^ [IU] [ \d V ]
1054 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
1056 $configs{$name}{'section'} = $integer_scn;
1058 elsif ($name =~ / $sb t $sb /x) {
1059 $configs{$name}{'section'} = $typedefs_scn;
1062 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
1063 && $configs{$name}{pod} =~ m/ \b format \b /ix)
1065 $configs{$name}{'section'} = $io_formats_scn;
1067 elsif ($name =~ / BACKTRACE /x) {
1068 $configs{$name}{'section'} = $debugging_scn;
1070 elsif ($name =~ / ALLOC $sb /x) {
1071 $configs{$name}{'section'} = $memory_scn;
1073 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH
1085 || $configs{$name}{pod} =~ m! I/O | stdio
1086 | file \s+ descriptor
1091 $configs{$name}{'section'} = $filesystem_scn;
1093 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
1094 $configs{$name}{'section'} = $signals_scn;
1096 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
1097 $configs{$name}{'section'} = $prototypes_scn;
1099 elsif ( $name =~ / ^ LOC_ /x
1100 || $configs{$name}{pod} =~ /full path/i)
1102 $configs{$name}{'section'} = $paths_scn;
1104 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
1105 $configs{$name}{'section'} = $locale_scn;
1107 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) {
1108 $configs{$name}{'section'} = $compiler_scn;
1110 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
1112 $configs{$name}{'section'} = $password_scn;
1114 elsif ($name =~ / SOCKET | $sb SOCK /x) {
1115 $configs{$name}{'section'} = $sockets_scn;
1117 elsif ( $name =~ / THREAD | MULTIPLICITY /x
1118 || $configs{$name}{pod} =~ m/ \b pthread /ix)
1120 $configs{$name}{'section'} = $concurrency_scn;
1122 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN
1126 $configs{$name}{'section'} = $site_scn;
1128 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix
1129 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
1130 || $pod =~ / \b doubles \b /ix
1131 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
1133 $configs{$name}{'section'} =
1137 # Above are the specific sections. The rest go into a
1138 # grab-bag of general configuration values. However, we put
1139 # two classes of them into lists of their names, without their
1140 # descriptions, when we think that the description doesn't add
1141 # any real value. One list contains the #include variables:
1142 # the description is basically boiler plate for each of these.
1143 # The other list contains the very many things that are of the
1144 # form HAS_foo, and \bfoo\b is contained in its description,
1145 # and there is no verbatim text in the pod or links to/from it
1146 # (which would add value). That means that it is likely the
1147 # intent of the variable can be gleaned from just its name,
1148 # and unlikely the description adds signficant value, so just
1149 # listing them suffices. Giving their descriptions would
1150 # expand this pod significantly with little added value.
1152 && ! $configs{$name}{verbatim}
1153 && ! $configs{$name}{linked})
1155 if ($name =~ / ^ I_ ( .* ) /x) {
1156 push @include_defs, $name;
1159 elsif ($name =~ / ^ HAS_ ( .* ) /x) {
1160 my $canonical_name = $1;
1161 $canonical_name =~ s/_//g;
1163 my $canonical_pod = $configs{$name}{pod};
1164 $canonical_pod =~ s/_//g;
1166 if ($canonical_pod =~ / \b $canonical_name \b /xi) {
1167 if ($name =~ / $sb R $sb /x) {
1168 push @has_r_defs, $name;
1171 push @has_defs, $name;
1178 $configs{$name}{'section'} = $genconfig_scn;
1181 my $section = $configs{$name}{'section'};
1182 die "Internal error: '$section' not in \%valid_sections"
1183 unless grep { $_ eq $section } keys %valid_sections;
1185 $flags .= 'U' unless defined $configs{$name}{usage};
1187 # All the information has been gathered; save it
1188 $docs{'api'}{$section}{$name}{flags} = $flags;
1189 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1190 $docs{'api'}{$section}{$name}{ret_type} = "";
1191 $docs{'api'}{$section}{$name}{file} = 'config.h';
1192 $docs{'api'}{$section}{$name}{usage}
1193 = $configs{$name}{usage} if defined $configs{$name}{usage};
1194 push $docs{'api'}{$section}{$name}{args}->@*, ();
1195 push $docs{'api'}{$section}{$name}{items}->@*, ();
1200 sub format_pod_indexes($) {
1201 my $entries_ref = shift;
1203 # Output the X<> references to the names, packed since they don't get
1204 # displayed, but not too many per line so that when someone is editing the
1205 # file, it doesn't run on
1208 my $line_length = 0;
1209 for my $name (sort dictionary_order $entries_ref->@*) {
1210 my $entry = "X<$name>";
1211 my $entry_length = length $entry;
1213 # Don't loop forever if we have a verrry long name, and don't go too
1215 if ($line_length > 0 && $line_length + $entry_length > $max_width) {
1221 $line_length += $entry_length;
1227 sub docout ($$$) { # output the docs for one function group
1228 my($fh, $element_name, $docref) = @_;
1230 # Trim trailing space
1231 $element_name =~ s/\s*$//;
1233 my $flags = $docref->{flags};
1234 my $pod = $docref->{pod} // "";
1235 my $file = $docref->{file};
1237 my @items = $docref->{items}->@*;
1239 # Make the main element the first of the items. This allows uniform
1241 unshift @items, { name => $element_name,
1243 ret_type => $docref->{ret_type},
1244 args => [ $docref->{args}->@* ],
1247 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
1249 print $fh "\n=over $description_indent\n";
1250 print $fh "\n=item C<$_->{name}>\n" for @items;
1252 # If we're printing only a link to an element, this isn't the major entry,
1254 if ($flags !~ /h/) {
1255 print $fh "X<$_->{name}>" for @items;
1261 for my $item (@items) {
1262 push @deprecated, "C<$item->{name}>" if $item->{flags} =~ /D/;
1263 push @experimental, "C<$item->{name}>" if $item->{flags} =~ /x/;
1266 for my $which (\@deprecated, \@experimental) {
1272 if ($which->@* == 1) {
1275 $list = $which->[0];
1277 elsif ($which->@* == @items) {
1280 $list = (@items == 2)
1282 : "all these forms";
1287 my $final = pop $which->@*;
1288 $list = "the " . join ", ", $which->@*;
1289 $list .= "," if $which->@* > 1;
1290 $list .= " and $final forms";
1293 if ($which == \@deprecated) {
1296 C<B<DEPRECATED!>> It is planned to remove $list
1297 from a future release of Perl. Do not use $it for
1298 new code; remove $it from existing code.
1304 NOTE: $list $is B<experimental> and may change or be
1305 removed without notice.
1311 chomp $pod; # Make sure prints pod with a single trailing \n
1312 print $fh "\n", $pod, "\n";
1314 for my $item (@items) {
1315 my $item_flags = $item->{flags};
1316 my $item_name = $item->{name};
1318 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1319 if $item_flags =~ /O/;
1320 # Is Perl_, but no #define foo # Perl_foo
1321 if ( ($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1323 # Can't handle threaded varargs
1324 || ($item_flags =~ /f/ && $item_flags !~ /T/))
1326 $item->{name} = "Perl_$item_name";
1329 NOTE: C<$item_name> must be explicitly called as
1332 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1337 if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1338 # to never warrant a usage line
1339 warn("U and ; flags are incompatible")
1340 if $flags =~ /U/ && $flags =~ /;/;
1344 print $fh "\n=over $usage_indent\n";
1346 if (defined $docref->{usage}) { # An override of the usage section
1347 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1351 # Add the thread context formal parameter on expanded-out names
1352 for my $item (@items) {
1353 unshift $item->{args}->@*, (($item->{args}->@*)
1356 if $item->{flags} !~ /T/
1357 && $item->{name} =~ /^Perl_/;
1360 # Look through all the items in this entry. If all have the same
1361 # return type and arguments (including thread context), only the
1362 # main entry is displayed.
1363 # Also, find the longest return type and longest name so that if
1364 # multiple ones are shown, they can be vertically aligned nicely
1365 my $need_individual_usage = 0;
1366 my $longest_name_length = length $items[0]->{name};
1367 my $base_ret_type = $items[0]->{ret_type};
1368 my $longest_ret = length $base_ret_type;
1369 my @base_args = $items[0]->{args}->@*;
1370 my $base_thread_context = $items[0]->{flags} =~ /T/;
1371 for (my $i = 1; $i < @items; $i++) {
1372 no warnings 'experimental::smartmatch';
1373 my $item = $items[$i];
1374 $need_individual_usage = 1
1375 if $item->{ret_type} ne $base_ret_type
1376 || ! ($item->{args}->@* ~~ @base_args)
1377 || ( $item->{flags} =~ /T/
1378 != $base_thread_context);
1379 my $ret_length = length $item->{ret_type};
1380 $longest_ret = $ret_length if $ret_length > $longest_ret;
1381 my $name_length = length $item->{name};
1382 $longest_name_length = $name_length
1383 if $name_length > $longest_name_length;
1386 # If we're only showing one entry, only its length matters.
1387 $longest_name_length = length($items[0]->{name})
1388 unless $need_individual_usage;
1391 my $indent = 1; # 1 is sufficient for verbatim; =over is used
1393 my $ret_name_sep_length = 2; # spaces between return type and name
1394 my $name_indent = $indent + $longest_ret;
1395 $name_indent += $ret_name_sep_length if $longest_ret;
1397 my $this_max_width =
1398 $max_width - $description_indent - $usage_indent;
1400 for my $item (@items) {
1401 my $ret_type = $item->{ret_type};
1402 my @args = $item->{args}->@*;
1403 my $name = $item->{name};
1404 my $item_flags = $item->{flags};
1407 print $fh (" " x $indent), $ret_type;
1409 print $fh " " x ( $ret_name_sep_length
1410 + $longest_ret - length $ret_type);
1413 if ($item_flags =~ /n/) { # no args
1414 warn("$file: $element_name: n flag without m")
1415 unless $item_flags =~ /m/;
1416 warn("$file: $name: n flag but apparently has args")
1421 my $arg_indent = $name_indent + $longest_name_length + 1;
1423 # Align the argument lists of the items
1424 print $fh " " x ($longest_name_length - length($name));
1427 # Display as many of the arguments on the same line as
1429 my $total_length = $arg_indent;
1431 for (my $i = 0; $i < @args; $i++) {
1432 my $arg = $args[$i];
1433 my $arg_length = length($arg);
1435 # All but the first arg are preceded by a blank
1436 my $use_blank = $i > 0;
1438 # +1 here and below because either the argument has a
1439 # trailing comma or trailing ')'
1440 $total_length += $arg_length + $use_blank + 1;
1442 # We want none of the arguments to be positioned so
1443 # they extend too far to the right. Ideally, they
1444 # should all start in the same column as the arguments
1445 # on the first line of the function display do. But, if
1446 # necessary, outdent them so that they all start in
1447 # another column, with the longest ending at the right
1449 # void function_name(pTHX_ short1,
1451 # very_long_argument,
1453 if ($total_length > $this_max_width) {
1455 # If this is the first continuation line,
1456 # calculate the longest argument; this will be the
1457 # one we may have to outdent for.
1461 # We will need at least as much as the current
1463 my $longest_arg_length = $arg_length
1466 # Look through the rest of the args to see if
1467 # any are longer than this one.
1468 for (my $j = $i + 1; $j < @args; $j++) {
1470 # Include the trailing ',' or ')' in the
1471 # length. No need to concern ourselves
1472 # with a leading blank, as the argument
1473 # would be positioned first on the next
1475 my $peek_arg_length = length ($args[$j])
1477 $longest_arg_length = $peek_arg_length
1478 if $peek_arg_length > $longest_arg_length;
1481 # Calculate the new indent if necessary.
1483 $this_max_width - $longest_arg_length
1484 if $arg_indent + $longest_arg_length
1488 print $fh "\n", (" " x $arg_indent);
1489 $total_length = $arg_indent + $arg_length + 1;
1493 # Display this argument
1494 print $fh " " if $use_blank;
1496 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1498 } # End of loop through args
1503 print $fh ";" if $item_flags =~ /;/; # semicolon: "dTHR;"
1506 # Only the first entry is normally displayed
1507 last unless $need_individual_usage;
1511 print $fh "\n=back\n";
1514 print $fh "\n=back\n";
1515 print $fh "\n=for hackers\nFound in file $file\n";
1518 sub construct_missings_section {
1519 my ($missings_hdr, $missings_ref) = @_;
1522 $text .= "$missings_hdr\n" . format_pod_indexes($missings_ref);
1524 if ($missings_ref->@* == 0) {
1525 return $text . "\nThere are currently no items of this type\n";
1528 # Sort the elements.
1529 my @missings = sort dictionary_order $missings_ref->@*;
1536 # Look through all the elements in the list and see how many columns we
1537 # could place them in the output what will fit in the available width.
1538 my $min_spacer = 2; # Need this much space between columns
1544 # We start with more columns, and work down until we find a number that
1545 # can accommodate all the data. This algorithm doesn't require the
1546 # resulting columns to all have the same width. This can allow for
1547 # as tight of packing as the data will possibly allow.
1548 for ($columns = 7; $columns >= 1; $columns--) {
1550 # For this many columns, we will need this many rows (final row might
1551 # not be completely filled)
1552 $rows = (@missings + $columns - 1) / $columns;
1554 # We only need to execute this final iteration to calculate the number
1555 # of rows, as we can't get fewer than a single column.
1556 last if $columns == 1;
1558 my $row_width = 1; # For 1 space indent
1559 my $i = 0; # Which missing element
1561 # For each column ...
1562 for my $col (0 .. $columns - 1) {
1564 # Calculate how wide the column needs to be, which is based on the
1565 # widest element in it
1566 $col_widths[$col] = 0;
1568 # Look through all the rows to find the widest element
1569 for my $row (0 .. $rows - 1) {
1571 # Skip if this row doesn't have an entry for this column
1572 last if $i >= @missings;
1574 # This entry occupies this many bytes.
1575 my $this_width = length $missings[$i];
1577 # All but the final column need a spacer between it and the
1579 $this_width += $min_spacer if $col < $columns - 1;
1582 # This column will need to have enough width to accommodate
1584 if ($this_width > $col_widths[$col]) {
1586 # We can't have this many columns if the total width
1587 # exceeds the available; bail now and try fewer columns
1588 next COLUMN if $row_width + $this_width > $max_width;
1590 $col_widths[$col] = $this_width;
1593 $i++; # The next row will contain the next item
1596 $row_width += $col_widths[$col];
1597 next COLUMN if $row_width > $max_width;
1600 # If we get this far, this many columns works
1604 # Here, have calculated the number of rows ($rows) and columns ($columns)
1605 # required to list the elements. @col_widths contains the width of each
1610 # Assemble the output
1611 for my $row (0 .. $rows - 1) {
1612 for my $col (0 .. $columns - 1) {
1613 $text .= " " if $col == 0; # Indent one to mark as verbatim
1615 my $index = $row + $rows * $col; # Convert 2 dimensions to 1
1617 # Skip if this row doesn't have an entry for this column
1618 next if $index >= @missings;
1620 my $element = $missings[$index];
1623 # Add alignment spaces for all but final column
1624 $text .= " " x ($col_widths[$col] - length $element)
1625 if $col < $columns - 1;
1628 $text .= "\n"; # End of row
1634 sub dictionary_order {
1635 # Do a case-insensitive dictionary sort, falling back in stages to using
1636 # everything for determinancy. The initial comparison ignores
1637 # all non-word characters and non-trailing underscores and digits, with
1638 # trailing ones collating to after any other characters. This collation
1639 # order continues in case tie breakers are needed; sequences of digits
1640 # that do get looked at always compare numerically. The first tie
1641 # breaker takes all digits and underscores into account. The next tie
1642 # breaker uses a caseless character-by-character comparison of everything
1643 # (including non-word characters). Finally is a cased comparison.
1645 # This gives intuitive results, but obviously could be tweaked.
1647 no warnings 'non_unicode';
1652 # Convert all digit sequences to same length with leading zeros, so for
1653 # example, 8 will compare less than 16 (using a fill length value that
1654 # should be longer than any sequence in the input).
1655 $a =~ s/(\d+)/sprintf "%06d", $1/ge;
1656 $b =~ s/(\d+)/sprintf "%06d", $1/ge;
1658 # Translate any underscores and digits so they compare after all Unicode
1660 $a =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1661 $b =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1663 use feature 'state';
1664 # Modify \w, \W to reflect the changes.
1665 state $ud = '\x{110000}-\x{11000A}'; # xlated underscore, digits
1666 state $w = "\\w$ud"; # new \w string
1667 state $mod_w = qr/[$w]/;
1668 state $mod_W = qr/[^$w]/;
1670 # Only \w for initial comparison
1671 my $a_only_word = uc($a =~ s/$mod_W//gr);
1672 my $b_only_word = uc($b =~ s/$mod_W//gr);
1674 # And not initial nor interior underscores nor digits (by squeezing them
1676 my $a_stripped = $a_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1677 my $b_stripped = $b_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1679 # If the stripped versions differ, use that as the comparison.
1680 my $cmp = $a_stripped cmp $b_stripped;
1681 return $cmp if $cmp;
1683 # For the first tie breaker, repeat, but consider initial and interior
1684 # underscores and digits, again having those compare after all Unicode
1686 $cmp = $a_only_word cmp $b_only_word;
1687 return $cmp if $cmp;
1689 # Next tie breaker is just a caseless comparison
1690 $cmp = uc($a) cmp uc($b);
1691 return $cmp if $cmp;
1693 # Finally a straight comparison
1698 my ($podname, $header, $dochash, $footer, @missings_refs) = @_;
1700 # strip leading '|' from each line which had been used to hide
1701 # pod from pod checkers.
1702 s/^\|//gm for $header, $footer, @missings_refs;
1704 my $fh = open_new("pod/$podname.pod", undef,
1705 {by => "$0 extracting documentation",
1706 from => 'the C source files'}, 1);
1708 print $fh $header, "\n";
1710 for my $section_name (sort dictionary_order keys %valid_sections) {
1711 my $section_info = $dochash->{$section_name};
1713 # We allow empty sections in perlintern.
1714 if (! $section_info && $podname eq 'perlapi') {
1715 warn "Empty section '$section_name'; skipped";
1719 print $fh "\n=head1 $section_name\n";
1721 if ($section_info->{X_tags}) {
1722 print $fh "X<$_>" for sort keys $section_info->{X_tags}->%*;
1724 delete $section_info->{X_tags};
1727 if ($podname eq 'perlapi') {
1728 print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1729 if defined $valid_sections{$section_name}{header};
1731 # Output any heading-level documentation and delete so won't get in
1733 if (exists $section_info->{""}) {
1734 print $fh "\n", $section_info->{""}, "\n";
1735 delete $section_info->{""};
1739 if ($section_info && keys $section_info->%*) {
1740 for my $function_name (sort dictionary_order keys %$section_info) {
1741 docout($fh, $function_name, $section_info->{$function_name});
1745 my $pod_type = ($podname eq 'api') ? "public" : "internal";
1746 print $fh "\nThere are currently no $pod_type API items in ",
1747 $section_name, "\n";
1750 print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
1751 if $podname eq 'perlapi'
1752 && defined $valid_sections{$section_name}{footer};
1758 my $missings_hdr = shift @missings_refs or last;
1759 my $missings_ref = shift @missings_refs or die "Foo";
1765 =head1 $undocumented_scn
1770 print $fh construct_missings_section($missings_hdr, $missings_ref);
1773 print $fh "\n$footer\n=cut\n";
1775 read_only_bottom_close_and_rename($fh);
1778 foreach (@{(setup_embed())[0]}) {
1780 my ($flags, $ret_type, $func, @args) = @$_;
1781 s/\b(?:NN|NULLOK)\b\s+//g for @args;
1783 $funcflags{$func} = {
1785 ret_type => $ret_type,
1790 # glob() picks up docs from extra .c or .h files that may be in unclean
1791 # development trees.
1792 open my $fh, '<', 'MANIFEST'
1793 or die "Can't open MANIFEST: $!";
1794 while (my $line = <$fh>) {
1795 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
1797 # Don't pick up pods from these.
1798 next if $file =~ m! ^ ( cpan | dist | ext ) / !x
1799 && ! defined $extra_input_pods{$file};
1801 open F, '<', $file or die "Cannot open $file for docs: $!\n";
1803 close F or die "Error closing $file: $!\n";
1805 close $fh or die "Error whilst reading MANIFEST: $!";
1809 for (sort keys %funcflags) {
1810 next unless $funcflags{$_}{flags} =~ /d/;
1811 next if $funcflags{$_}{flags} =~ /h/;
1812 warn "no docs for $_\n";
1815 foreach (sort keys %missing) {
1816 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
1819 # List of funcs in the public API that aren't also marked as core-only,
1820 # experimental nor deprecated.
1822 my @undocumented_api = grep { $funcflags{$_}{flags} =~ /A/
1825 my @undocumented_intern = grep { $funcflags{$_}{flags} !~ /[AS]/
1826 && ! $docs{intern}{$_}
1828 my @undocumented_deprecated_api = grep { $funcflags{$_}{flags} =~ /D/ }
1830 my @undocumented_deprecated_intern = grep { $funcflags{$_}{flags} =~ /D/ }
1831 @undocumented_intern;
1832 my @undocumented_experimental_api = grep { $funcflags{$_}{flags} =~ /x/ }
1834 my @undocumented_experimental_intern = grep { $funcflags{$_}{flags} =~ /x/ }
1835 @undocumented_intern;
1836 my @missing_api = grep { $funcflags{$_}{flags} !~ /[xD]/ } @undocumented_api;
1837 push @missing_api, keys %missing_macros;
1839 my @missing_intern = grep { $funcflags{$_}{flags} !~ /[xD]/ }
1840 @undocumented_intern;
1842 my @other_places = ( qw(perlclib ), keys %described_elsewhere );
1843 my $places_other_than_intern = join ", ",
1844 map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
1845 my $places_other_than_api = join ", ",
1846 map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
1848 # The S< > makes things less densely packed, hence more readable
1849 my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
1850 my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs;
1851 $valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1852 $valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1854 my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
1855 $valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1857 my $section_list = join "\n\n", map { "=item L</$_>" }
1858 sort(dictionary_order keys %valid_sections),
1859 $undocumented_scn; # Keep last
1861 # Leading '|' is to hide these lines from pod checkers. khw is unsure if this
1863 my $api_hdr = <<"_EOB_";
1868 |perlapi - autogenerated documentation for the perl public API
1871 |X<Perl API> X<API> X<api>
1873 |This file contains most of the documentation of the perl public API, as
1874 |generated by F<embed.pl>. Specifically, it is a listing of functions,
1875 |macros, flags, and variables that may be used by extension writers. Besides
1876 |L<perlintern> and F<config.h>, some items are listed here as being actually
1877 |documented in another pod.
1879 |L<At the end|/$undocumented_scn> is a list of functions which have yet
1880 |to be documented. Patches welcome! The interfaces of these are subject to
1881 |change without notice.
1883 |Some of the functions documented here are consolidated so that a single entry
1884 |serves for multiple functions which all do basically the same thing, but have
1885 |some slight differences. For example, one form might process magic, while
1886 |another doesn't. The name of each variation is listed at the top of the
1887 |single entry. But if all have the same signature (arguments and return type)
1888 |except for their names, only the usage for the base form is shown. If any
1889 |one of the forms has a different signature (such as returning C<const> or
1890 |not) every function's signature is explicitly displayed.
1892 |Anything not listed here or in the other mentioned pods is not part of the
1893 |public API, and should not be used by extension writers at all. For these
1894 |reasons, blindly using functions listed in F<proto.h> is to be avoided when
1895 |writing extensions.
1897 |In Perl, unlike C, a string of characters may generally contain embedded
1898 |C<NUL> characters. Sometimes in the documentation a Perl string is referred
1899 |to as a "buffer" to distinguish it from a C string, but sometimes they are
1900 |both just referred to as strings.
1902 |Note that all Perl API global variables must be referenced with the C<PL_>
1903 |prefix. Again, those not listed here are not to be used by extension writers,
1904 |and may be changed or removed without notice; same with macros.
1905 |Some macros are provided for compatibility with the older,
1906 |unadorned names, but this support may be disabled in a future release.
1908 |Perl was originally written to handle US-ASCII only (that is characters
1909 |whose ordinal numbers are in the range 0 - 127).
1910 |And documentation and comments may still use the term ASCII, when
1911 |sometimes in fact the entire range from 0 - 255 is meant.
1913 |The non-ASCII characters below 256 can have various meanings, depending on
1914 |various things. (See, most notably, L<perllocale>.) But usually the whole
1915 |range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
1916 |"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
1917 |"Latin1" as referring just to the characters in the range 128 through 255, or
1918 |sometimes from 160 through 255.
1919 |This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1921 |Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1922 |L<perlebcdic>). Most of the documentation (and even comments in the code)
1923 |ignore the EBCDIC possibility.
1924 |For almost all purposes the differences are transparent.
1925 |As an example, under EBCDIC,
1926 |instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1927 |whenever this documentation refers to C<utf8>
1928 |(and variants of that name, including in function names),
1929 |it also (essentially transparently) means C<UTF-EBCDIC>.
1930 |But the ordinals of characters differ between ASCII, EBCDIC, and
1931 |the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1932 |number of bytes than in UTF-8.
1934 |The organization of this document is tentative and subject to change.
1935 |Suggestions and patches welcome
1936 |L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1938 |The sections in this document currently are
1940 |=over $standard_indent
1946 |The listing below is alphabetical, case insensitive.
1949 my $api_footer = <<"_EOE_";
1952 |Until May 1997, this document was maintained by Jeff Okamoto
1953 |<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
1955 |With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1956 |Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1957 |Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1958 |Stephen McCamant, and Gurusamy Sarathy.
1960 |API Listing originally by Dean Roehrich <roehrich\@cray.com>.
1962 |Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1966 |F<config.h>, $places_other_than_api
1969 my $api_missings_hdr = <<'_EOT_';
1970 |The following functions have been flagged as part of the public
1971 |API, but are currently undocumented. Use them at your own risk,
1972 |as the interfaces are subject to change. Functions that are not
1973 |listed in this document are not intended for public use, and
1974 |should NOT be used under any circumstances.
1976 |If you feel you need to use one of these functions, first send
1977 |email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
1978 |It may be that there is a good reason for the function not being
1979 |documented, and it should be removed from this list; or it may
1980 |just be that no one has gotten around to documenting it. In the
1981 |latter case, you will be asked to submit a patch to document the
1982 |function. Once your patch is accepted, it will indicate that the
1983 |interface is stable (unless it is explicitly marked otherwise) and
1987 my $api_experimental_hdr = <<"_EOT_";
1989 |Next are the API-flagged elements that are considered experimental. Using one
1990 |of these is even more risky than plain undocumented ones. They are listed
1991 |here because they should be listed somewhere (so their existence doesn't get
1992 |lost) and this is the best place for them.
1995 my $api_deprecated_hdr = <<"_EOT_";
1997 |Finally are deprecated undocumented API elements.
1998 |Do not use any for new code; remove all occurrences of all of these from
2002 output('perlapi', $api_hdr, $docs{api}, $api_footer,
2003 $api_missings_hdr, \@missing_api,
2004 $api_experimental_hdr, \@undocumented_experimental_api,
2005 $api_deprecated_hdr, \@undocumented_deprecated_api);
2007 my $intern_hdr = <<"_EOB_";
2010 |perlintern - autogenerated documentation of purely B<internal>
2014 |X<internal Perl functions> X<interpreter functions>
2016 |This file is the autogenerated documentation of functions in the
2017 |Perl interpreter that are documented using Perl's internal documentation
2018 |format but are not marked as part of the Perl API. In other words,
2019 |B<they are not for use in extensions>!
2021 |It has the same sections as L<perlapi>, though some may be empty.
2025 my $intern_footer = <<"_EOE_";
2029 |The autodocumentation system was originally added to the Perl core by
2030 |Benjamin Stuhl. Documentation is by whoever was kind enough to
2031 |document their functions.
2035 |F<config.h>, $places_other_than_intern
2038 my $intern_missings_hdr = <<"_EOT_";
2040 |This section lists the elements that are otherwise undocumented. If you use
2041 |any of them, please consider creating and submitting documentation for it.
2043 |Experimental and deprecated undocumented elements are listed separately at the
2048 my $intern_experimental_hdr = <<"_EOT_";
2050 |Next are the experimental undocumented elements
2054 my $intern_deprecated_hdr = <<"_EOT_";
2056 |Finally are the deprecated undocumented elements.
2057 |Do not use any for new code; remove all occurrences of all of these from
2062 output('perlintern', $intern_hdr, $docs{intern}, $intern_footer,
2063 $intern_missings_hdr, \@missing_intern,
2064 $intern_experimental_hdr, \@undocumented_experimental_intern,
2065 $intern_deprecated_hdr, \@undocumented_deprecated_intern