autodoc.pl

   1 #!/usr/bin/perl -w
   2
   3 use Text::Tabs;
   4 #
   5 # Unconditionally regenerate:
   6 #
   7 #    pod/perlintern.pod
   8 #    pod/perlapi.pod
   9 #
  10 # from information stored in
  11 #
  12 #    embed.fnc
  13 #    plus all the core .c, .h, and .pod files listed in MANIFEST
  14 #    plus %extra_input_pods
  15
  16 my %extra_input_pods = ( 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => 1 );
  17
  18 # Has an optional arg, which is the directory to chdir to before reading
  19 # MANIFEST and the files
  20 #
  21 # This script is invoked as part of 'make all'
  22 #
  23 # The generated pod consists of sections of related elements, functions,
  24 # macros, and variables.  The keys of %valid_sections give the current legal
  25 # ones.  Just add a new key to add a section.
  26 #
  27 # Throughout the files read by this script are lines like
  28 #
  29 # =for apidoc_section Section Name
  30 # =for apidoc_section $section_name_variable
  31 #
  32 # "Section Name" (after having been stripped of leading space) must be one of
  33 # the legal section names, or an error is thrown.  $section_name_variable must
  34 # be one of the legal section name variables defined below; these expand to
  35 # legal section names.  This form is used so that minor wording changes in
  36 # these titles can be confined to this file.  All the names of the variables
  37 # end in '_scn'; this suffix is optional in the apidoc_section lines.
  38 #
  39 # All API elements defined between this line and the next 'apidoc_section'
  40 # line will go into the section "Section Name" (or $section_name_variable),
  41 # sorted by dictionary order within it.  perlintern and perlapi are parallel
  42 # documents, each potentially with a section "Section Name".  Each element is
  43 # marked as to which document it goes into.  If there are none for a
  44 # particular section in perlapi, that section is omitted.
  45 #
  46 # Also, in .[ch] files, there may be
  47 #
  48 # =head1 Section Name
  49 #
  50 # lines in comments.  These are also used by this program to switch to section
  51 # "Section Name".  The difference is that if there are any lines after the
  52 # =head1, inside the same comment, and before any =for apidoc-ish lines, they
  53 # are used as a heading for section "Section Name" (in both perlintern and
  54 # perlapi).  This includes any =head[2-5].  If more than one '=head1 Section
  55 # Name' line has content, they appear in the generated pod in an undefined
  56 # order.  Note that you can't use a $section_name_variable in =head1 lines
  57 #
  58 # The next =head1, =for apidoc_section, or file end terminates what goes into
  59 # the current section
  60 #
  61 # The %valid_sections hash below also can have header content, which will
  62 # appear before any =head1 content.  The hash can also have footer content
  63 # content, which will appear at the end of the section, after all the
  64 # elements.
  65 #
  66 # The lines that define the actual functions, etc are documented in embed.fnc,
  67 # because they have flags which must be kept in sync with that file.
  68
  69 use strict;
  70 use warnings;
  71
  72 my $nroff_min_indent = 4;   # for non-heading lines
  73 # 80 column terminal - 2 for pager adding 2 columns;
  74 my $max_width = 80 - 2 - $nroff_min_indent;
  75 my $standard_indent = 4;  # Any additional indentations
  76
  77 if (@ARGV) {
  78     my $workdir = shift;
  79     chdir $workdir
  80         or die "Couldn't chdir to '$workdir': $!";
  81 }
  82 require './regen/regen_lib.pl';
  83 require './regen/embed_lib.pl';
  84
  85 my %described_elsewhere;
  86
  87 #
  88 # See database of global and static function prototypes in embed.fnc
  89 # This is used to generate prototype headers under various configurations,
  90 # export symbols lists for different platforms, and macros to provide an
  91 # implicit interpreter context argument.
  92 #
  93
  94 my %docs;
  95 my %seen;
  96 my %funcflags;
  97 my %missing;
  98 my %missing_macros;
  99
 100 my $link_text = "Described in";
 101
 102 my $description_indent = 4;
 103 my $usage_indent = 3;   # + initial blank yields 4 total
 104
 105 my $AV_scn = 'AV Handling';
 106 my $callback_scn = 'Callback Functions';
 107 my $casting_scn = 'Casting';
 108 my $casing_scn = 'Character case changing';
 109 my $classification_scn = 'Character classification';
 110 my $names_scn = 'Character names';
 111 my $scope_scn = 'Compile-time scope hooks';
 112 my $compiler_scn = 'Compiler and Preprocessor information';
 113 my $directives_scn = 'Compiler directives';
 114 my $concurrency_scn = 'Concurrency';
 115 my $COP_scn = 'COPs and Hint Hashes';
 116 my $CV_scn = 'CV Handling';
 117 my $custom_scn = 'Custom Operators';
 118 my $debugging_scn = 'Debugging';
 119 my $display_scn = 'Display functions';
 120 my $embedding_scn = 'Embedding, Threads, and Interpreter Cloning';
 121 my $errno_scn = 'Errno';
 122 my $exceptions_scn = 'Exception Handling (simple) Macros';
 123 my $filesystem_scn = 'Filesystem configuration values';
 124 my $filters_scn = 'Source Filters';
 125 my $floating_scn = 'Floating point';
 126 my $genconfig_scn = 'General Configuration';
 127 my $globals_scn = 'Global Variables';
 128 my $GV_scn = 'GV Handling and Stashes';
 129 my $hook_scn = 'Hook manipulation';
 130 my $HV_scn = 'HV Handling';
 131 my $io_scn = 'Input/Output';
 132 my $io_formats_scn = 'I/O Formats';
 133 my $integer_scn = 'Integer';
 134 my $lexer_scn = 'Lexer interface';
 135 my $locale_scn = 'Locales';
 136 my $magic_scn = 'Magic';
 137 my $memory_scn = 'Memory Management';
 138 my $MRO_scn = 'MRO';
 139 my $multicall_scn = 'Multicall Functions';
 140 my $numeric_scn = 'Numeric Functions';
 141
 142 # Now combined, as unclear which functions go where, but separate names kept
 143 # to avoid 1) other code changes; 2) in case it seems better to split again
 144 my $optrees_scn = 'Optrees';
 145 my $optree_construction_scn = $optrees_scn; # Was 'Optree construction';
 146 my $optree_manipulation_scn = $optrees_scn; # Was 'Optree Manipulation Functions'
 147 my $pack_scn = 'Pack and Unpack';
 148 my $pad_scn = 'Pad Data Structures';
 149 my $password_scn = 'Password and Group access';
 150 my $reports_scn = 'Reports and Formats';
 151 my $paths_scn = 'Paths to system commands';
 152 my $prototypes_scn = 'Prototype information';
 153 my $regexp_scn = 'REGEXP Functions';
 154 my $signals_scn = 'Signals';
 155 my $site_scn = 'Site configuration';
 156 my $sockets_scn = 'Sockets configuration values';
 157 my $stack_scn = 'Stack Manipulation Macros';
 158 my $string_scn = 'String Handling';
 159 my $SV_flags_scn = 'SV Flags';
 160 my $SV_scn = 'SV Handling';
 161 my $tainting_scn = 'Tainting';
 162 my $time_scn = 'Time';
 163 my $typedefs_scn = 'Typedef names';
 164 my $unicode_scn = 'Unicode Support';
 165 my $utility_scn = 'Utility Functions';
 166 my $versioning_scn = 'Versioning';
 167 my $warning_scn = 'Warning and Dieing';
 168 my $XS_scn = 'XS';
 169
 170 # Kept separate at end
 171 my $undocumented_scn = 'Undocumented elements';
 172
 173 my %valid_sections = (
 174     $AV_scn => {},
 175     $callback_scn => {},
 176     $casting_scn => {},
 177     $casing_scn => {},
 178     $classification_scn => {},
 179     $scope_scn => {},
 180     $compiler_scn => {},
 181     $directives_scn => {},
 182     $concurrency_scn => {},
 183     $COP_scn => {},
 184     $CV_scn => {
 185         header => <<~'EOT',
 186             This section documents functions to manipulate CVs which are
 187             code-values, meaning subroutines.  For more information, see
 188             L<perlguts>.
 189             EOT
 190     },
 191
 192     $custom_scn => {},
 193     $debugging_scn => {},
 194     $display_scn => {},
 195     $embedding_scn => {},
 196     $errno_scn => {},
 197     $exceptions_scn => {},
 198     $filesystem_scn => {
 199         header => <<~'EOT',
 200             Also see L</List of capability HAS_foo symbols>.
 201             EOT
 202         },
 203     $filters_scn => {},
 204     $floating_scn => {
 205         header => <<~'EOT',
 206             Also L</List of capability HAS_foo symbols> lists capabilities
 207             that arent in this section.  For example C<HAS_ASINH>, for the
 208             hyperbolic sine function.
 209             EOT
 210         },
 211     $genconfig_scn => {
 212         header => <<~'EOT',
 213             This section contains configuration information not otherwise
 214             found in the more specialized sections of this document.  At the
 215             end is a list of C<#defines> whose name should be enough to tell
 216             you what they do, and a list of #defines which tell you if you
 217             need to C<#include> files to get the corresponding functionality.
 218             EOT
 219
 220         footer => <<~EOT,
 221
 222             =head2 List of capability C<HAS_I<foo>> symbols
 223
 224             This is a list of those symbols that dont appear elsewhere in ths
 225             document that indicate if the current platform has a certain
 226             capability.  Their names all begin with C<HAS_>.  Only those
 227             symbols whose capability is directly derived from the name are
 228             listed here.  All others have their meaning expanded out elsewhere
 229             in this document.  This (relatively) compact list is because we
 230             think that the expansion would add little or no value and take up
 231             a lot of space (because there are so many).  If you think certain
 232             ones should be expanded, send email to
 233             L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
 234
 235             Each symbol here will be C<#define>d if and only if the platform
 236             has the capability.  If you need more detail, see the
 237             corresponding entry in F<config.h>.  For convenience, the list is
 238             split so that the ones that indicate there is a reentrant version
 239             of a capability are listed separately
 240
 241             __HAS_LIST__
 242
 243             And, the reentrant capabilities:
 244
 245             __HAS_R_LIST__
 246
 247             Example usage:
 248
 249             =over $standard_indent
 250
 251              #ifdef HAS_STRNLEN
 252                use strnlen()
 253              #else
 254                use an alternative implementation
 255              #endif
 256
 257             =back
 258
 259             =head2 List of C<#include> needed symbols
 260
 261             This list contains symbols that indicate if certain C<#include>
 262             files are present on the platform.  If your code accesses the
 263             functionality that one of these is for, you will need to
 264             C<#include> it if the symbol on this list is C<#define>d.  For
 265             more detail, see the corresponding entry in F<config.h>.
 266
 267             __INCLUDE_LIST__
 268
 269             Example usage:
 270
 271             =over $standard_indent
 272
 273              #ifdef I_WCHAR
 274                #include <wchar.h>
 275              #endif
 276
 277             =back
 278             EOT
 279       },
 280     $globals_scn => {},
 281     $GV_scn => {},
 282     $hook_scn => {},
 283     $HV_scn => {},
 284     $io_scn => {},
 285     $io_formats_scn => {
 286         header => <<~'EOT',
 287             These are used for formatting the corresponding type For example,
 288             instead of saying
 289
 290              Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
 291
 292             use
 293
 294              Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
 295
 296             This keeps you from having to know if, say an IV, needs to be
 297             printed as C<%d>, C<%ld>, or something else.
 298             EOT
 299       },
 300     $integer_scn => {},
 301     $lexer_scn => {},
 302     $locale_scn => {},
 303     $magic_scn => {},
 304     $memory_scn => {},
 305     $MRO_scn => {},
 306     $multicall_scn => {},
 307     $numeric_scn => {},
 308     $optrees_scn => {},
 309     $optree_construction_scn => {},
 310     $optree_manipulation_scn => {},
 311     $pack_scn => {},
 312     $pad_scn => {},
 313     $password_scn => {},
 314     $paths_scn => {},
 315     $prototypes_scn => {},
 316     $regexp_scn => {},
 317     $reports_scn => {
 318         header => <<~"EOT",
 319             These are used in the simple report generation feature of Perl.
 320             See L<perlform>.
 321             EOT
 322       },
 323     $signals_scn => {},
 324     $site_scn => {
 325         header => <<~'EOT',
 326             These variables give details as to where various libraries,
 327             installation destinations, I<etc.>, go, as well as what various
 328             installation options were selected
 329             EOT
 330       },
 331     $sockets_scn => {},
 332     $stack_scn => {},
 333     $string_scn => {
 334         header => <<~EOT,
 335             See also C<L</$unicode_scn>>.
 336             EOT
 337       },
 338     $SV_flags_scn => {},
 339     $SV_scn => {},
 340     $tainting_scn => {},
 341     $time_scn => {},
 342     $typedefs_scn => {},
 343     $unicode_scn => {
 344         header => <<~EOT,
 345             L<perlguts/Unicode Support> has an introduction to this API.
 346
 347             See also C<L</$classification_scn>>,
 348             C<L</$casing_scn>>,
 349             and C<L</$string_scn>>.
 350             Various functions outside this section also work specially with
 351             Unicode.  Search for the string "utf8" in this document.
 352             EOT
 353       },
 354     $utility_scn => {},
 355     $versioning_scn => {},
 356     $warning_scn => {},
 357     $XS_scn => {},
 358 );
 359
 360 # Somewhat loose match for an apidoc line so we can catch minor typos.
 361 # Parentheses are used to capture portions so that below we verify
 362 # that things are the actual correct syntax.
 363 my $apidoc_re = qr/ ^ (\s*)            # $1
 364                       (=?)             # $2
 365                       (\s*)            # $3
 366                       for (\s*)        # $4
 367                       apidoc (_item)?  # $5
 368                       (\s*)            # $6
 369                       (.*?)            # $7
 370                       \s* \n /x;
 371 # Only certain flags, dealing with display, are acceptable for apidoc_item
 372 my $display_flags = "fFnDopTx;";
 373
 374 sub check_api_doc_line ($$) {
 375     my ($file, $in) = @_;
 376
 377     return unless $in =~ $apidoc_re;
 378
 379     my $is_item = defined $5;
 380     my $is_in_proper_form = length $1 == 0
 381                          && length $2 > 0
 382                          && length $3 == 0
 383                          && length $4 > 0
 384                          && length $7 > 0
 385                          && (    length $6 > 0
 386                              || ($is_item && substr($7, 0, 1) eq '|'));
 387     my $proto_in_file = $7;
 388     my $proto = $proto_in_file;
 389     $proto = "||$proto" if $proto !~ /\|/;
 390     my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
 391
 392     $name && $is_in_proper_form or die <<EOS;
 393 Bad apidoc at $file line $.:
 394   $in
 395 Expected:
 396   =for apidoc flags|returntype|name|arg|arg|...
 397   =for apidoc flags|returntype|name
 398   =for apidoc name
 399 (or 'apidoc_item')
 400 EOS
 401
 402     die "Only [$display_flags] allowed in apidoc_item:\n$in"
 403                             if $is_item && $flags =~ /[^$display_flags]/;
 404
 405     return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
 406 }
 407
 408 sub embed_override($) {
 409     my ($element_name) = shift;
 410
 411     # If the entry is also in embed.fnc, it should be defined
 412     # completely there, but not here
 413     my $embed_docref = delete $funcflags{$element_name};
 414
 415     return unless $embed_docref and %$embed_docref;
 416
 417     my $flags = $embed_docref->{'flags'};
 418     warn "embed.fnc entry '$element_name' missing 'd' flag"
 419                                             unless $flags =~ /d/;
 420
 421     return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
 422 }
 423
 424 # The section that is in effect at the beginning of the given file.  If not
 425 # listed here, an apidoc_section line must precede any apidoc lines.
 426 # This allows the files listed here that generally are single-purpose, to not
 427 # have to worry about the autodoc section
 428 my %initial_file_section = (
 429                             'av.c' => $AV_scn,
 430                             'av.h' => $AV_scn,
 431                             'cv.h' => $CV_scn,
 432                             'deb.c' => $debugging_scn,
 433                             'dist/ExtUtils-ParseXS/lib/perlxs.pod' => $XS_scn,
 434                             'doio.c' => $io_scn,
 435                             'gv.c' => $GV_scn,
 436                             'gv.h' => $GV_scn,
 437                             'hv.h' => $HV_scn,
 438                             'locale.c' => $locale_scn,
 439                             'malloc.c' => $memory_scn,
 440                             'numeric.c' => $numeric_scn,
 441                             'opnames.h' => $optree_construction_scn,
 442                             'pad.h'=> $pad_scn,
 443                             'patchlevel.h' => $versioning_scn,
 444                             'perlio.h' => $io_scn,
 445                             'pod/perlapio.pod' => $io_scn,
 446                             'pod/perlcall.pod' => $callback_scn,
 447                             'pod/perlembed.pod' => $embedding_scn,
 448                             'pod/perlfilter.pod' => $filters_scn,
 449                             'pod/perliol.pod' => $io_scn,
 450                             'pod/perlmroapi.pod' => $MRO_scn,
 451                             'pod/perlreguts.pod' => $regexp_scn,
 452                             'pp_pack.c' => $pack_scn,
 453                             'pp_sort.c' => $SV_scn,
 454                             'regcomp.c' => $regexp_scn,
 455                             'regexp.h' => $regexp_scn,
 456                             'sv.h' => $SV_scn,
 457                             'sv.c' => $SV_scn,
 458                             'sv_inline.h' => $SV_scn,
 459                             'taint.c' => $tainting_scn,
 460                             'unicode_constants.h' => $unicode_scn,
 461                             'utf8.c' => $unicode_scn,
 462                             'utf8.h' => $unicode_scn,
 463                             'vutil.c' => $versioning_scn,
 464                            );
 465
 466 sub autodoc ($$) { # parse a file and extract documentation info
 467     my($fh,$file) = @_;
 468     my($in, $line_num, $header, $section);
 469
 470     $section = $initial_file_section{$file}
 471                                     if defined $initial_file_section{$file};
 472
 473     my $file_is_C = $file =~ / \. [ch] $ /x;
 474
 475     # Count lines easier
 476     my $get_next_line = sub { $line_num++; return <$fh> };
 477
 478     # Read the file
 479     while ($in = $get_next_line->()) {
 480         last unless defined $in;
 481
 482         next unless (    $in =~ / ^ =for [ ]+ apidoc /x
 483                                       # =head1 lines only have effect in C files
 484                      || ($file_is_C && $in =~ /^=head1/));
 485
 486         # Here, the line introduces a portion of the input that we care about.
 487         # Either it is for an API element, or heading text which we expect
 488         # will be used for elements later in the file
 489
 490         my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
 491         my (@args, @items);
 492
 493         # If the line starts a new section ...
 494         if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
 495
 496             $section = $1;
 497             if ($section =~ / ^ \$ /x) {
 498                 $section .= '_scn' unless $section =~ / _scn $ /;
 499                 $section = eval "$section";
 500                 die "Unknown \$section variable '$section' in $file: $@" if $@;
 501             }
 502             die "Unknown section name '$section' in $file near line $.\n"
 503                                     unless defined $valid_sections{$section};
 504
 505         }
 506         elsif ($in=~ /^ =for [ ]+ apidoc \B /x) {   # Otherwise better be a
 507                                                     # plain apidoc line
 508             die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
 509             die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
 510         }
 511         else {  # Plain apidoc
 512
 513             ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
 514                                                 = check_api_doc_line($file, $in);
 515             # Override this line with any info in embed.fnc
 516             my ($embed_flags, $embed_ret_type, @embed_args)
 517                                                 = embed_override($element_name);
 518             if ($embed_ret_type) {
 519                 warn "embed.fnc entry overrides redundant information in"
 520                     . " '$proto_in_file' in $file"
 521                                                if $flags || $ret_type || @args;
 522                 $flags = $embed_flags;
 523                 $ret_type = $embed_ret_type;
 524                 @args = @embed_args;
 525             }
 526             elsif ($flags !~ /[my]/)  { # Not in embed.fnc, is missing if not
 527                                         # a macro or typedef
 528                 $missing{$element_name} = $file;
 529             }
 530
 531             die "flag '$1' is not legal (for function $element_name (from $file))"
 532                         if $flags =~ / ( [^AabCDdEeFfGhiIMmNnTOoPpRrSsUuWXxy;#] ) /x;
 533
 534             die "'u' flag must also have 'm' or 'y' flags' for $element_name"
 535                                             if $flags =~ /u/ && $flags !~ /[my]/;
 536             warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
 537                         if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
 538
 539             if ($flags =~ /#/) {
 540                 die "Return type must be empty for '$element_name'"
 541                                                                    if $ret_type;
 542                 $ret_type = '#ifdef';
 543             }
 544
 545             if (exists $seen{$element_name} && $flags !~ /h/) {
 546                 die ("'$element_name' in $file was already documented in $seen{$element_name}");
 547             }
 548             else {
 549                 $seen{$element_name} = $file;
 550             }
 551         }
 552
 553         # Here we have processed the initial line in the heading text or API
 554         # element, and have saved the important information from it into the
 555         # corresponding variables.  Now accumulate the text that applies to it
 556         # up to a terminating line, which is one of:
 557         # 1) =cut
 558         # 2) =head (in a C file only =head1)
 559         # 3) an end comment line in a C file: m:^\s*\*/:
 560         # 4) =for apidoc... (except apidoc_item lines)
 561         $text = "";
 562         my $head_ender_num = ($file_is_C) ? 1 : "";
 563         while (defined($in = $get_next_line->())) {
 564
 565             last if $in =~ /^=cut/x;
 566             last if $in =~ /^=head$head_ender_num/;
 567
 568             if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
 569
 570                 # End of comment line in C files is a fall-back terminator,
 571                 # but warn only if there actually is some accumulated text
 572                 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
 573                 last;
 574             }
 575
 576             if ($in !~ / ^ =for [ ]+ apidoc /x) {
 577                 $text .= $in;
 578                 next;
 579             }
 580
 581             # Here, the line is an apidoc line.  All but apidoc_item terminate
 582             # the text being accumulated.
 583             last if $in =~ / ^ =for [ ]+ apidoc_section /x;
 584
 585             my ($item_name, $item_flags, $item_ret_type, $is_item,
 586                     $item_proto, @item_args) = check_api_doc_line($file, $in);
 587             last unless $is_item;
 588
 589             # Here, is an apidoc_item_line; They can only come within apidoc
 590             # paragraphs.
 591             die "Unexpected api_doc_item line '$item_proto'"
 592                                                         unless $element_name;
 593
 594             # We accept blank lines between these, but nothing else;
 595             die "apidoc_item lines must immediately follow apidoc lines for "
 596               . " '$element_name' in $file"
 597                                                             if $text =~ /\S/;
 598             # Override this line with any info in embed.fnc
 599             my ($embed_flags, $embed_ret_type, @embed_args)
 600                                                 = embed_override($item_name);
 601             if ($embed_ret_type) {
 602                 warn "embed.fnc entry overrides redundant information in"
 603                     . " '$item_proto' in $file"
 604                                 if $item_flags || $item_ret_type || @item_args;
 605
 606                 $item_flags = $embed_flags;
 607                 $item_ret_type = $embed_ret_type;
 608                 @item_args = @embed_args;
 609             }
 610
 611             # Use the base entry flags if none for this item; otherwise add in
 612             # any non-display base entry flags.
 613             if ($item_flags) {
 614                 $item_flags .= $flags =~ s/[$display_flags]//rg;
 615             }
 616             else {
 617                 $item_flags = $flags;
 618             }
 619             $item_ret_type = $ret_type unless $item_ret_type;
 620             @item_args = @args unless @item_args;
 621             push @items, { name     => $item_name,
 622                            ret_type => $item_ret_type,
 623                            flags    => $item_flags,
 624                            args     => [ @item_args ],
 625                          };
 626
 627             # This line shows that this element is documented.
 628             delete $funcflags{$item_name};
 629         }
 630
 631         # Here, are done accumulating the text for this item.  Trim it
 632         $text =~ s/ ^ \s* //x;
 633         $text =~ s/ \s* $ //x;
 634         $text .= "\n" if $text ne "";
 635
 636         # And treat all-spaces as nothing at all
 637         undef $text unless $text =~ /\S/;
 638
 639         if ($element_name) {
 640
 641             # Here, we have accumulated into $text, the pod for $element_name
 642             my $where = $flags =~ /A/ ? 'api' : 'intern';
 643
 644             die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
 645                                                     unless defined $section;
 646             my $is_link_only = ($flags =~ /h/);
 647             if (! $is_link_only && exists $docs{$where}{$section}{$element_name}) {
 648                 warn "$0: duplicate API entry for '$element_name' in"
 649                     . " $where/$section\n";
 650                 next;
 651             }
 652
 653             # Override the text with just a link if the flags call for that
 654             if ($is_link_only) {
 655                 if ($file_is_C) {
 656                     die "Can't currently handle link with items to it:\n$in"
 657                                                                        if @items;
 658                     $docs{$where}{$section}{X_tags}{$element_name} = $file;
 659                     redo;    # Don't put anything if C source
 660                 }
 661
 662                 # Here, is an 'h' flag in pod.  We add a reference to the pod (and
 663                 # nothing else) to perlapi/intern.  (It would be better to add a
 664                 # reference to the correct =item,=header, but something that makes
 665                 # it harder is that it that might be a duplicate, like '=item *';
 666                 # so that is a future enhancement XXX.  Another complication is
 667                 # there might be more than one deserving candidates.)
 668                 my $podname = $file =~ s!.*/!!r;    # Rmv directory name(s)
 669                 $podname =~ s/\.pod//;
 670                 $text = "Described in L<$podname>.\n";
 671
 672                 # Don't output a usage example for linked to documentation if
 673                 # it is trivial (has no arguments) and we aren't to add a
 674                 # semicolon
 675                 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[U;]/;
 676
 677                 # Keep track of all the pod files that we refer to.
 678                 push $described_elsewhere{$podname}->@*, $podname;
 679             }
 680
 681             $docs{$where}{$section}{$element_name}{flags} = $flags;
 682             $docs{$where}{$section}{$element_name}{pod} = $text;
 683             $docs{$where}{$section}{$element_name}{file} = $file;
 684             $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
 685             push $docs{$where}{$section}{$element_name}{args}->@*, @args;
 686             push $docs{$where}{$section}{$element_name}{items}->@*, @items;
 687         }
 688         elsif ($text) {
 689             $valid_sections{$section}{header} = "" unless
 690                                     defined $valid_sections{$section}{header};
 691             $valid_sections{$section}{header} .= "\n$text";
 692         }
 693
 694         # We already have the first line of what's to come in $in
 695         redo;
 696
 697     } # End of loop through input
 698 }
 699
 700 my %configs;
 701 my @has_defs;
 702 my @has_r_defs;     # Reentrant symbols
 703 my @include_defs;
 704
 705 sub parse_config_h {
 706     use re '/aa';   # Everthing is ASCII in this file
 707
 708     # Process config.h
 709     my $config_h = 'config.h';
 710     $config_h = 'win32/config.h' unless -e $config_h;
 711     die "Can't find $config_h" unless -e $config_h;
 712     open my $fh, '<', $config_h or die "Can't open $config_h: $!";
 713     while (<$fh>) {
 714
 715         # Look for lines like /* FOO_BAR:
 716         # By convention all config.h descriptions begin like that
 717         if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
 718             my $name = $1;
 719
 720             # Here we are starting the description for $name in config.h.  We
 721             # accumulate the entire description for it into @description.
 722             # Flowing text from one input line to another is appended into the
 723             # same array element to make a single flowing line element, but
 724             # verbatim lines are kept as separate elements in @description.
 725             # This will facilitate later doing pattern matching without regard
 726             # to line boundaries on non-verbatim text.
 727
 728             die "Multiple config.h entries for '$name'"
 729                                         if defined $configs{$name}{description};
 730
 731             # Get first line of description
 732             $_ = <$fh>;
 733
 734             # Each line in the description begins with blanks followed by '/*'
 735             # and some spaces.
 736             die "Unexpected config.h initial line for $name: '$_'"
 737                                             unless s/ ^ ( \s* \* \s* ) //x;
 738             my $initial_text = $1;
 739
 740             # Initialize the description with this first line (after having
 741             # stripped the prefix text)
 742             my @description = $_;
 743
 744             # The first line is used as a template for how much indentation
 745             # each normal succeeding line has.  Lines indented further
 746             # will be considered as intended to be verbatim.  But, empty lines
 747             # likely won't have trailing blanks, so just strip the whole thing
 748             # for them.
 749             my $strip_initial_qr = qr!   \s* \* \s* $
 750                                     | \Q$initial_text\E
 751                                     !x;
 752             $configs{$name}{verbatim} = 0;
 753
 754             # Read in the remainder of the description
 755             while (<$fh>) {
 756                 last if s| ^ \s* \* / ||x;  # A '*/' ends it
 757
 758                 die "Unexpected config.h description line for $name: '$_'"
 759                                                 unless s/$strip_initial_qr//;
 760
 761                 # Fix up the few flawed lines in config.h wherein a new
 762                 # sentence begins with a tab (and maybe a space after that).
 763                 # Although none of them currently do, let it recognize
 764                 # something like
 765                 #
 766                 #   "... text").  The next sentence ...
 767                 #
 768                 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1  $2/xg;
 769
 770                 # If this line has extra indentation or looks to have columns,
 771                 # it should be treated as verbatim.  Columns are indicated by
 772                 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
 773                 # not preceded by punctuation.
 774                 if ($_ !~ m/  ^ \s
 775                               | \S (?:                    \t
 776                                     |                     \s{3}
 777                                     |  (*nlb:[[:punct:]]) \s{2}
 778                                    )
 779                            /x)
 780                 {
 781                     # But here, is not a verbatim line.  Add an empty line if
 782                     # this is the first non-verbatim after a run of verbatims
 783                     if ($description[-1] =~ /^\s/) {
 784                         push @description, "\n", $_;
 785                     }
 786                     else {  # Otherwise, append this flowing line to the
 787                             # current flowing line
 788                         $description[-1] .= $_;
 789                     }
 790                 }
 791                 else {
 792                     $configs{$name}{verbatim} = 1;
 793
 794                     # The first verbatim line in a run of them is separated by an
 795                     # empty line from the flowing lines above it
 796                     push @description, "\n" if $description[-1] =~ /^\S/;
 797
 798                     $_ = Text::Tabs::expand($_);
 799
 800                     # Only a single space so less likely to wrap
 801                     s/ ^ \s* / /x;
 802
 803                     push @description, $_;
 804                 }
 805             }
 806
 807             push $configs{$name}{description}->@*, @description
 808
 809         }   # Not a description; see if it is a macro definition.
 810         elsif (m! ^
 811                   (?: / \* )?                   # Optional commented-out
 812                                                 # indication
 813                       \# \s* define \s+ ( \w+ ) # $1 is the name
 814                   (   \s* )                     # $2 indicates if args or not
 815                   (   .*? )                     # $3 is any definition
 816                   (?: / \s* \* \* / )?          # Optional trailing /**/ or / **/
 817                   $
 818                 !x)
 819         {
 820             my $name = $1;
 821
 822             # There can be multiple definitions for a name.  We want to know
 823             # if any of them has arguments, and if any has a body.
 824             $configs{$name}{has_args} //= $2 eq "";
 825             $configs{$name}{has_args} ||= $2 eq "";
 826             $configs{$name}{has_defn} //= $3 ne "";
 827             $configs{$name}{has_defn} ||= $3 ne "";
 828         }
 829     }
 830
 831     # We now have stored the description and information about every #define
 832     # in the file.  The description is in a form convenient to operate on to
 833     # convert to pod.  Do that now.
 834     foreach my $name (keys %configs) {
 835         next unless defined $configs{$name}{description};
 836
 837         # All adjacent non-verbatim lines of the description are appended
 838         # together in a single element in the array.  This allows the patterns
 839         # to work across input line boundaries.
 840
 841         my $pod = "";
 842         while (defined ($_ = shift $configs{$name}{description}->@*)) {
 843             chomp;
 844
 845             if (/ ^ \S /x) {  # Don't edit verbatim lines
 846
 847                 # Enclose known file/path names not already so enclosed
 848                 # with <...>.  (Some entries in config.h are already
 849                 # '<path/to/file>')
 850                 my $file_name_qr = qr! [ \w / ]+ \.
 851                                     (?: c | h | xs | p [lm] | pmc | PL
 852                                         | sh | SH | exe ) \b
 853                                     !xx;
 854                 my $path_name_qr = qr! (?: / \w+ )+ !x;
 855                 for my $re ($file_name_qr, $path_name_qr) {
 856                     s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
 857                 }
 858
 859                 # Enclose <... file/path names with F<...> (but no double
 860                 # angle brackets)
 861                 for my $re ($file_name_qr, $path_name_qr) {
 862                     s! < ( $re ) > !F<$1>!gxx;
 863                 }
 864
 865                 # Explain metaconfig units
 866                 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
 867
 868                 # Convert "See foo" to "See C<L</foo>>" if foo is described in
 869                 # this file.  Also create a link to the known file INSTALL.
 870                 # And, to be more general, handle "See also foo and bar", and
 871                 # "See also foo, bar, and baz"
 872                 while (m/ \b [Ss]ee \s+
 873                          (?: also \s+ )?    ( \w+ )
 874                          (?: ,  \s+         ( \w+ ) )?
 875                          (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
 876                     my @links = $1;
 877                     push @links, $2 if defined $2;
 878                     push @links, $3 if defined $3;
 879                     foreach my $link (@links) {
 880                         if ($link eq 'INSTALL') {
 881                             s/ \b INSTALL \b /C<L<INSTALL>>/xg;
 882                         }
 883                         elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
 884                             s| \b $link \b |C<L</$link>>|xg;
 885                             $configs{$link}{linked} = 1;
 886                             $configs{$name}{linked} = 1;
 887                         }
 888                     }
 889                 }
 890
 891                 # Enclose what we think are symbols with C<...>.
 892                 no warnings 'experimental::vlb';
 893                 s/ (*nlb:<)
 894                    (
 895                         # Any word followed immediately with parens or
 896                         # brackets
 897                         \b \w+ (?: \( [^)]* \)    # parameter list
 898                                  | \[ [^]]* \]    # or array reference
 899                                )
 900                     | (*plb: ^ | \s ) -D \w+    # Also -Dsymbols.
 901                     | \b (?: struct | union ) \s \w+
 902
 903                         # Words that contain underscores (which are
 904                         # definitely not text) or three uppercase letters in
 905                         # a row.  Length two ones, like IV, aren't enclosed,
 906                         # because they often don't look as nice.
 907                     | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
 908                    )
 909                     (*nla:>)
 910                  /C<$1>/xg;
 911
 912                 # These include foo when the name is HAS_foo.  This is a
 913                 # heuristic which works in most cases.
 914                 if ($name =~ / ^ HAS_ (.*) /x) {
 915                     my $symbol = lc $1;
 916
 917                     # Don't include path components, nor things already in
 918                     # <>, or with trailing '(', '['
 919                     s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
 920                 }
 921             }
 922
 923             $pod .=  "$_\n";
 924         }
 925         delete $configs{$name}{description};
 926
 927         $configs{$name}{pod} = $pod;
 928     }
 929
 930     # Now have converted the description to pod.  We also now have enough
 931     # information that we can do cross checking to find definitions without
 932     # corresponding pod, and see if they are mentioned in some description;
 933     # otherwise they aren't documented.
 934   NAME:
 935     foreach my $name (keys %configs) {
 936
 937         # A definition without pod
 938         if (! defined $configs{$name}{pod}) {
 939
 940             # Leading/trailing underscore means internal to config.h, e.g.,
 941             # _GNU_SOURCE
 942             next if $name =~ / ^ _ /x;
 943             next if $name =~ / _ $ /x;
 944
 945             # MiXeD case names are internal to config.h; the first 4
 946             # characters are sufficient to determine this
 947             next if $name =~ / ^ [[:upper:]] [[:lower:]]
 948                                  [[:upper:]] [[:lower:]]
 949                             /x;
 950
 951             # Here, not internal to config.h.  Look to see if this symbol is
 952             # mentioned in the pod of some other.  If so, assume it is
 953             # documented.
 954             foreach my $check_name (keys %configs) {
 955                 my $this_element = $configs{$check_name};
 956                 my $this_pod = $this_element->{pod};
 957                 if (defined $this_pod) {
 958                     next NAME if $this_pod =~ / \b $name \b /x;
 959                 }
 960             }
 961
 962             warn "$name has no documentation\n";
 963             $missing_macros{$name} = 'config.h';
 964
 965             next;
 966         }
 967
 968         my $has_defn = $configs{$name}{has_defn};
 969         my $has_args = $configs{$name}{has_args};
 970
 971         # Check if any section already has an entry for this element.
 972         # If so, it better be a placeholder, in which case we replace it
 973         # with this entry.
 974         foreach my $section (keys $docs{'api'}->%*) {
 975             if (exists $docs{'api'}{$section}{$name}) {
 976                 my $was = $docs{'api'}{$section}{$name}->{pod};
 977                 $was = "" unless $was;
 978                 chomp $was;
 979                 if ($was ne "" && $was !~ m/$link_text/) {
 980                     die "Multiple descriptions for $name\n"
 981                         . "$section contained '$was'";
 982                 }
 983                 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
 984                 $configs{$name}{section} = $section;
 985                 last;
 986             }
 987         }
 988
 989         my $handled = 0;    # Haven't handled this yet
 990
 991         if (defined $configs{$name}{'section'}) {
 992             # This has been taken care of elsewhere.
 993             $handled = 1;
 994         }
 995         else {
 996             my $flags = "";
 997             if ($has_defn && ! $has_args) {
 998                 $configs{$name}{args} = 1;
 999             }
1000
1001             # Symbols of the form I_FOO are for #include files.  They have
1002             # special usage information
1003             if ($name =~ / ^ I_ ( .* ) /x) {
1004                 my $file = lc $1 . '.h';
1005                 $configs{$name}{usage} = <<~"EOT";
1006                     #ifdef $name
1007                         #include <$file>
1008                     #endif
1009                     EOT
1010             }
1011
1012             # Compute what section this variable should go into.  This
1013             # heuristic was determined by manually inspecting the current
1014             # things in config.h, and should be adjusted as necessary as
1015             # deficiencies are found.
1016             #
1017             # This is the default section for macros with a definiton but
1018             # no arguments, meaning it is replaced unconditionally
1019             #
1020             my $sb = qr/ _ | \b /x; # segment boundary
1021             my $dash_or_spaces = qr/ - | \s+ /x;
1022             my $pod = $configs{$name}{pod};
1023             if ($name =~ / ^ USE_ /x) {
1024                 $configs{$name}{'section'} = $site_scn;
1025             }
1026             elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
1027             {
1028                 $configs{$name}{'section'} = $time_scn;
1029             }
1030             elsif (   $name =~ / ^ [[:alpha:]]+ f $ /x
1031                    && $configs{$name}{pod} =~ m/ \b format \b /ix)
1032             {
1033                 $configs{$name}{'section'} = $io_formats_scn;
1034             }
1035             elsif ($name =~ /  DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
1036                             | $sb CASTFLAGS $sb
1037                             | QUADMATH
1038                             | $sb (?: IS )? NAN
1039                             | $sb (?: IS )? FINITE
1040                             /x)
1041             {
1042                 $configs{$name}{'section'} =
1043                                     $floating_scn;
1044             }
1045             elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
1046                 $configs{$name}{'section'} = $filesystem_scn;
1047             }
1048             elsif (   $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
1049                    || $configs{$name}{pod} =~ m/ \b align /x)
1050             {
1051                 $configs{$name}{'section'} = $compiler_scn;
1052             }
1053             elsif ($name =~ / ^ [IU] [ \d V ]
1054                             | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
1055             {
1056                 $configs{$name}{'section'} = $integer_scn;
1057             }
1058             elsif ($name =~ / $sb t $sb /x) {
1059                 $configs{$name}{'section'} = $typedefs_scn;
1060                 $flags .= 'y';
1061             }
1062             elsif (   $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
1063                     && $configs{$name}{pod} =~ m/ \b format \b /ix)
1064             {
1065                 $configs{$name}{'section'} = $io_formats_scn;
1066             }
1067             elsif ($name =~ / BACKTRACE /x) {
1068                 $configs{$name}{'section'} = $debugging_scn;
1069             }
1070             elsif ($name =~ / ALLOC $sb /x) {
1071                 $configs{$name}{'section'} = $memory_scn;
1072             }
1073             elsif (   $name =~ /   STDIO | FCNTL | EOF | FFLUSH
1074                                 | $sb FILE $sb
1075                                 | $sb DIR $sb
1076                                 | $sb LSEEK
1077                                 | $sb INO $sb
1078                                 | $sb OPEN
1079                                 | $sb CLOSE
1080                                 | ^ DIR
1081                                 | ^ INO $sb
1082                                 | DIR $
1083                                 | FILENAMES
1084                                 /x
1085                     || $configs{$name}{pod} =~ m!  I/O | stdio
1086                                                 | file \s+ descriptor
1087                                                 | file \s* system
1088                                                 | statfs
1089                                                 !x)
1090             {
1091                 $configs{$name}{'section'} = $filesystem_scn;
1092             }
1093             elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
1094                 $configs{$name}{'section'} = $signals_scn;
1095             }
1096             elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
1097                 $configs{$name}{'section'} = $prototypes_scn;
1098             }
1099             elsif (   $name =~ / ^ LOC_ /x
1100                     || $configs{$name}{pod} =~ /full path/i)
1101             {
1102                 $configs{$name}{'section'} = $paths_scn;
1103             }
1104             elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
1105                 $configs{$name}{'section'} = $locale_scn;
1106             }
1107             elsif ($configs{$name}{pod} =~ /  GCC | C99 | C\+\+ /xi) {
1108                 $configs{$name}{'section'} = $compiler_scn;
1109             }
1110             elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
1111             {
1112                 $configs{$name}{'section'} = $password_scn;
1113             }
1114             elsif ($name =~ /  SOCKET | $sb SOCK /x) {
1115                 $configs{$name}{'section'} = $sockets_scn;
1116             }
1117             elsif (   $name =~ / THREAD | MULTIPLICITY /x
1118                     || $configs{$name}{pod} =~ m/ \b pthread /ix)
1119             {
1120                 $configs{$name}{'section'} = $concurrency_scn;
1121             }
1122             elsif ($name =~ /  PERL | ^ PRIV | SITE | ARCH | BIN
1123                                 | VENDOR | ^ USE
1124                             /x)
1125             {
1126                 $configs{$name}{'section'} = $site_scn;
1127             }
1128             elsif (   $pod =~ / \b floating $dash_or_spaces point \b /ix
1129                    || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
1130                    || $pod =~ / \b doubles \b /ix
1131                    || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
1132             {
1133                 $configs{$name}{'section'} =
1134                                     $floating_scn;
1135             }
1136             else {
1137                 # Above are the specific sections.  The rest go into a
1138                 # grab-bag of general configuration values.  However, we put
1139                 # two classes of them into lists of their names, without their
1140                 # descriptions, when we think that the description doesn't add
1141                 # any real value.  One list contains the #include variables:
1142                 # the description is basically boiler plate for each of these.
1143                 # The other list contains the very many things that are of the
1144                 # form HAS_foo, and \bfoo\b is contained in its description,
1145                 # and there is no verbatim text in the pod or links to/from it
1146                 # (which would add value).  That means that it is likely the
1147                 # intent of the variable can be gleaned from just its name,
1148                 # and unlikely the description adds signficant value, so just
1149                 # listing them suffices.  Giving their descriptions would
1150                 # expand this pod significantly with little added value.
1151                 if (   ! $has_defn
1152                     && ! $configs{$name}{verbatim}
1153                     && ! $configs{$name}{linked})
1154                 {
1155                     if ($name =~ / ^ I_ ( .* ) /x) {
1156                         push @include_defs, $name;
1157                         next;
1158                     }
1159                     elsif ($name =~ / ^ HAS_ ( .* ) /x) {
1160                         my $canonical_name = $1;
1161                         $canonical_name =~ s/_//g;
1162
1163                         my $canonical_pod = $configs{$name}{pod};
1164                         $canonical_pod =~ s/_//g;
1165
1166                         if ($canonical_pod =~ / \b $canonical_name \b /xi) {
1167                             if ($name =~ / $sb R $sb /x) {
1168                                 push @has_r_defs, $name;
1169                             }
1170                             else {
1171                                 push @has_defs, $name;
1172                             }
1173                             next;
1174                         }
1175                     }
1176                 }
1177
1178                 $configs{$name}{'section'} = $genconfig_scn;
1179             }
1180
1181             my $section = $configs{$name}{'section'};
1182             die "Internal error: '$section' not in \%valid_sections"
1183                             unless grep { $_ eq $section } keys %valid_sections;
1184             $flags .= 'AdmnT';
1185             $flags .= 'U' unless defined $configs{$name}{usage};
1186
1187             # All the information has been gathered; save it
1188             $docs{'api'}{$section}{$name}{flags} = $flags;
1189             $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1190             $docs{'api'}{$section}{$name}{ret_type} = "";
1191             $docs{'api'}{$section}{$name}{file} = 'config.h';
1192             $docs{'api'}{$section}{$name}{usage}
1193                 = $configs{$name}{usage} if defined $configs{$name}{usage};
1194             push $docs{'api'}{$section}{$name}{args}->@*, ();
1195             push $docs{'api'}{$section}{$name}{items}->@*, ();
1196         }
1197     }
1198 }
1199
1200 sub format_pod_indexes($) {
1201     my $entries_ref = shift;
1202
1203     # Output the X<> references to the names, packed since they don't get
1204     # displayed, but not too many per line so that when someone is editing the
1205     # file, it doesn't run on
1206
1207     my $text ="";
1208     my $line_length = 0;
1209     for my $name (sort dictionary_order $entries_ref->@*) {
1210         my $entry = "X<$name>";
1211         my $entry_length = length $entry;
1212
1213         # Don't loop forever if we have a verrry long name, and don't go too
1214         # far to the right.
1215         if ($line_length > 0 && $line_length + $entry_length > $max_width) {
1216             $text .= "\n";
1217             $line_length = 0;
1218         }
1219
1220         $text .= $entry;
1221         $line_length += $entry_length;
1222     }
1223
1224     return $text;
1225 }
1226
1227 sub docout ($$$) { # output the docs for one function group
1228     my($fh, $element_name, $docref) = @_;
1229
1230     # Trim trailing space
1231     $element_name =~ s/\s*$//;
1232
1233     my $flags = $docref->{flags};
1234     my $pod = $docref->{pod} // "";
1235     my $file = $docref->{file};
1236
1237     my @items = $docref->{items}->@*;
1238
1239     # Make the main element the first of the items.  This allows uniform
1240     # treatment below
1241     unshift @items, {   name => $element_name,
1242                         flags => $flags,
1243                         ret_type => $docref->{ret_type},
1244                         args => [ $docref->{args}->@* ],
1245                     };
1246
1247     warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
1248
1249     print $fh "\n=over $description_indent\n";
1250     print $fh "\n=item C<$_->{name}>\n" for @items;
1251
1252     # If we're printing only a link to an element, this isn't the major entry,
1253     # so no X<> here.
1254     if ($flags !~ /h/) {
1255         print $fh "X<$_->{name}>" for @items;
1256         print $fh "\n";
1257     }
1258
1259     my @deprecated;
1260     my @experimental;
1261     for my $item (@items) {
1262         push @deprecated,   "C<$item->{name}>" if $item->{flags} =~ /D/;
1263         push @experimental, "C<$item->{name}>" if $item->{flags} =~ /x/;
1264     }
1265
1266     for my $which (\@deprecated, \@experimental) {
1267         if ($which->@*) {
1268             my $is;
1269             my $it;
1270             my $list;
1271
1272             if ($which->@* == 1) {
1273                 $is = 'is';
1274                 $it = 'it';
1275                 $list = $which->[0];
1276             }
1277             elsif ($which->@* == @items) {
1278                 $is = 'are';
1279                 $it = 'them';
1280                 $list = (@items == 2)
1281                          ? "both forms"
1282                          : "all these forms";
1283             }
1284             else {
1285                 $is = 'are';
1286                 $it = 'them';
1287                 my $final = pop $which->@*;
1288                 $list = "the " . join ", ", $which->@*;
1289                 $list .= "," if $which->@* > 1;
1290                 $list .= " and $final forms";
1291             }
1292
1293             if ($which == \@deprecated) {
1294                 print $fh <<~"EOT";
1295
1296                     C<B<DEPRECATED!>>  It is planned to remove $list
1297                     from a future release of Perl.  Do not use $it for
1298                     new code; remove $it from existing code.
1299                     EOT
1300             }
1301             else {
1302                 print $fh <<~"EOT";
1303
1304                     NOTE: $list $is B<experimental> and may change or be
1305                     removed without notice.
1306                     EOT
1307             }
1308         }
1309     }
1310
1311     chomp $pod;     # Make sure prints pod with a single trailing \n
1312     print $fh "\n", $pod, "\n";
1313
1314     for my $item (@items) {
1315         my $item_flags = $item->{flags};
1316         my $item_name = $item->{name};
1317
1318         print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1319                                                     if $item_flags =~ /O/;
1320         # Is Perl_, but no #define foo # Perl_foo
1321         if (   ($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1322
1323                 # Can't handle threaded varargs
1324             || ($item_flags =~ /f/ && $item_flags !~ /T/))
1325         {
1326             $item->{name} = "Perl_$item_name";
1327             print $fh <<~"EOT";
1328
1329                 NOTE: C<$item_name> must be explicitly called as
1330                 C<$item->{name}>
1331                 EOT
1332             print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1333             print $fh ".\n";
1334         }
1335     }
1336
1337     if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1338                             # to never warrant a usage line
1339         warn("U and ; flags are incompatible")
1340                                             if $flags =~ /U/ && $flags =~ /;/;
1341         # nothing
1342     } else {
1343
1344         print $fh "\n=over $usage_indent\n";
1345
1346         if (defined $docref->{usage}) {     # An override of the usage section
1347             print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1348         }
1349         else {
1350
1351             # Add the thread context formal parameter on expanded-out names
1352             for my $item (@items) {
1353                 unshift $item->{args}->@*, (($item->{args}->@*)
1354                                             ? "pTHX_"
1355                                             : "pTHX")
1356                                                    if $item->{flags} !~ /T/
1357                                                    && $item->{name} =~ /^Perl_/;
1358             }
1359
1360             # Look through all the items in this entry.  If all have the same
1361             # return type and arguments (including thread context), only the
1362             # main entry is displayed.
1363             # Also, find the longest return type and longest name so that if
1364             # multiple ones are shown, they can be vertically aligned nicely
1365             my $need_individual_usage = 0;
1366             my $longest_name_length = length $items[0]->{name};
1367             my $base_ret_type = $items[0]->{ret_type};
1368             my $longest_ret = length $base_ret_type;
1369             my @base_args = $items[0]->{args}->@*;
1370             my $base_thread_context = $items[0]->{flags} =~ /T/;
1371             for (my $i = 1; $i < @items; $i++) {
1372                 no warnings 'experimental::smartmatch';
1373                 my $item = $items[$i];
1374                 $need_individual_usage = 1
1375                                     if    $item->{ret_type} ne $base_ret_type
1376                                     || ! ($item->{args}->@* ~~ @base_args)
1377                                     ||   (   $item->{flags} =~ /T/
1378                                           != $base_thread_context);
1379                 my $ret_length = length $item->{ret_type};
1380                 $longest_ret = $ret_length if $ret_length > $longest_ret;
1381                 my $name_length = length $item->{name};
1382                 $longest_name_length = $name_length
1383                                         if $name_length > $longest_name_length;
1384             }
1385
1386             # If we're only showing one entry, only its length matters.
1387             $longest_name_length = length($items[0]->{name})
1388                                                 unless $need_individual_usage;
1389             print $fh "\n";
1390
1391             my $indent = 1;     # 1 is sufficient for verbatim; =over is used
1392                                 # for more
1393             my $ret_name_sep_length = 2; # spaces between return type and name
1394             my $name_indent = $indent + $longest_ret;
1395             $name_indent += $ret_name_sep_length if $longest_ret;
1396
1397             my $this_max_width =
1398                                $max_width - $description_indent - $usage_indent;
1399
1400             for my $item (@items) {
1401                 my $ret_type = $item->{ret_type};
1402                 my @args = $item->{args}->@*;
1403                 my $name = $item->{name};
1404                 my $item_flags = $item->{flags};
1405
1406                 # The return type
1407                 print $fh (" " x $indent), $ret_type;
1408
1409                 print $fh " " x (  $ret_name_sep_length
1410                                  + $longest_ret - length $ret_type);
1411                 print $fh $name;
1412
1413                 if ($item_flags =~ /n/) { # no args
1414                     warn("$file: $element_name: n flag without m")
1415                                                     unless $item_flags =~ /m/;
1416                     warn("$file: $name: n flag but apparently has args")
1417                                                                     if @args;
1418                 }
1419                 else {
1420                     # +1 for the '('
1421                     my $arg_indent = $name_indent + $longest_name_length + 1;
1422
1423                     # Align the argument lists of the items
1424                     print $fh " " x ($longest_name_length - length($name));
1425                     print $fh "(";
1426
1427                     # Display as many of the arguments on the same line as
1428                     # will fit.
1429                     my $total_length = $arg_indent;
1430                     my $first_line = 1;
1431                     for (my $i = 0; $i < @args; $i++) {
1432                         my $arg = $args[$i];
1433                         my $arg_length = length($arg);
1434
1435                         # All but the first arg are preceded by a blank
1436                         my $use_blank = $i > 0;
1437
1438                         # +1 here and below because either the argument has a
1439                         # trailing comma or trailing ')'
1440                         $total_length += $arg_length + $use_blank + 1;
1441
1442                         # We want none of the arguments to be positioned so
1443                         # they extend too far to the right.  Ideally, they
1444                         # should all start in the same column as the arguments
1445                         # on the first line of the function display do.  But, if
1446                         # necessary, outdent them so that they all start in
1447                         # another column, with the longest ending at the right
1448                         # margin, like so:
1449                         #                   void  function_name(pTHX_ short1,
1450                         #                                    short2,
1451                         #                                    very_long_argument,
1452                         #                                    short3)
1453                         if ($total_length > $this_max_width) {
1454
1455                             # If this is the first continuation line,
1456                             # calculate the longest argument; this will be the
1457                             # one we may have to outdent for.
1458                             if ($first_line) {
1459                                 $first_line = 0;
1460
1461                                 # We will need at least as much as the current
1462                                 # argument
1463                                 my $longest_arg_length = $arg_length
1464                                                        + $use_blank + 1;
1465
1466                                 # Look through the rest of the args to see if
1467                                 # any are longer than this one.
1468                                 for (my $j = $i + 1; $j < @args; $j++) {
1469
1470                                     # Include the trailing ',' or ')' in the
1471                                     # length.  No need to concern ourselves
1472                                     # with a leading blank, as the argument
1473                                     # would be positioned first on the next
1474                                     # line
1475                                     my $peek_arg_length = length ($args[$j])
1476                                                         + 1;
1477                                     $longest_arg_length = $peek_arg_length
1478                                       if $peek_arg_length > $longest_arg_length;
1479                                 }
1480
1481                                 # Calculate the new indent if necessary.
1482                                 $arg_indent =
1483                                         $this_max_width - $longest_arg_length
1484                                         if $arg_indent + $longest_arg_length
1485                                                             > $this_max_width;
1486                             }
1487
1488                             print $fh "\n", (" " x $arg_indent);
1489                             $total_length = $arg_indent + $arg_length + 1;
1490                             $use_blank = 0;
1491                         }
1492
1493                         # Display this argument
1494                         print $fh " " if $use_blank;
1495                         print $fh $arg;
1496                         print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1497
1498                     } # End of loop through args
1499
1500                     print $fh ")";
1501                 }
1502
1503                 print $fh ";" if $item_flags =~ /;/; # semicolon: "dTHR;"
1504                 print $fh "\n";
1505
1506                 # Only the first entry is normally displayed
1507                 last unless $need_individual_usage;
1508             }
1509         }
1510
1511         print $fh "\n=back\n";
1512     }
1513
1514     print $fh "\n=back\n";
1515     print $fh "\n=for hackers\nFound in file $file\n";
1516 }
1517
1518 sub construct_missings_section {
1519     my ($missings_hdr, $missings_ref) = @_;
1520     my $text = "";
1521
1522     $text .= "$missings_hdr\n" . format_pod_indexes($missings_ref);
1523
1524     if ($missings_ref->@* == 0) {
1525         return $text . "\nThere are currently no items of this type\n";
1526     }
1527
1528     # Sort the elements.
1529     my @missings = sort dictionary_order $missings_ref->@*;
1530
1531
1532     $text .= "\n";
1533
1534     use integer;
1535
1536     # Look through all the elements in the list and see how many columns we
1537     # could place them in the output what will fit in the available width.
1538     my $min_spacer = 2;     # Need this much space between columns
1539     my $columns;
1540     my $rows;
1541     my @col_widths;
1542
1543   COLUMN:
1544     # We start with more columns, and work down until we find a number that
1545     # can accommodate all the data.  This algorithm doesn't require the
1546     # resulting columns to all have the same width.  This can allow for
1547     # as tight of packing as the data will possibly allow.
1548     for ($columns = 7; $columns >= 1; $columns--) {
1549
1550         # For this many columns, we will need this many rows (final row might
1551         # not be completely filled)
1552         $rows = (@missings + $columns - 1) / $columns;
1553
1554         # We only need to execute this final iteration to calculate the number
1555         # of rows, as we can't get fewer than a single column.
1556         last if $columns == 1;
1557
1558         my $row_width = 1;  # For 1 space indent
1559         my $i = 0;  # Which missing element
1560
1561         # For each column ...
1562         for my $col (0 .. $columns - 1) {
1563
1564             # Calculate how wide the column needs to be, which is based on the
1565             # widest element in it
1566             $col_widths[$col] = 0;
1567
1568             # Look through all the rows to find the widest element
1569             for my $row (0 .. $rows - 1) {
1570
1571                 # Skip if this row doesn't have an entry for this column
1572                 last if $i >= @missings;
1573
1574                 # This entry occupies this many bytes.
1575                 my $this_width = length $missings[$i];
1576
1577                 # All but the final column need a spacer between it and the
1578                 # next column over.
1579                 $this_width += $min_spacer if $col < $columns - 1;
1580
1581
1582                 # This column will need to have enough width to accommodate
1583                 # this element
1584                 if ($this_width > $col_widths[$col]) {
1585
1586                     # We can't have this many columns if the total width
1587                     # exceeds the available; bail now and try fewer columns
1588                     next COLUMN if $row_width + $this_width > $max_width;
1589
1590                     $col_widths[$col] = $this_width;
1591                 }
1592
1593                 $i++;   # The next row will contain the next item
1594             }
1595
1596             $row_width += $col_widths[$col];
1597             next COLUMN if $row_width > $max_width;
1598         }
1599
1600         # If we get this far, this many columns works
1601         last;
1602     }
1603
1604     # Here, have calculated the number of rows ($rows) and columns ($columns)
1605     # required to list the elements.  @col_widths contains the width of each
1606     # column.
1607
1608     $text .= "\n";
1609
1610     # Assemble the output
1611     for my $row (0 .. $rows - 1) {
1612         for my $col (0 .. $columns - 1) {
1613             $text .= " " if $col == 0;  # Indent one to mark as verbatim
1614
1615             my $index = $row + $rows * $col;  # Convert 2 dimensions to 1
1616
1617             # Skip if this row doesn't have an entry for this column
1618             next if $index >= @missings;
1619
1620             my $element = $missings[$index];
1621             $text .= $element;
1622
1623             # Add alignment spaces for all but final column
1624             $text .= " " x ($col_widths[$col] - length $element)
1625                                                         if $col < $columns - 1;
1626         }
1627
1628         $text .= "\n";  # End of row
1629     }
1630
1631     return $text;
1632 }
1633
1634 sub dictionary_order {
1635     # Do a case-insensitive dictionary sort, falling back in stages to using
1636     # everything for determinancy.  The initial comparison ignores
1637     # all non-word characters and non-trailing underscores and digits, with
1638     # trailing ones collating to after any other characters.  This collation
1639     # order continues in case tie breakers are needed; sequences of digits
1640     # that do get looked at always compare numerically.  The first tie
1641     # breaker takes all digits and underscores into account.  The next tie
1642     # breaker uses a caseless character-by-character comparison of everything
1643     # (including non-word characters).  Finally is a cased comparison.
1644     #
1645     # This gives intuitive results, but obviously could be tweaked.
1646
1647     no warnings 'non_unicode';
1648
1649     local $a = $a;
1650     local $b = $b;
1651
1652     # Convert all digit sequences to same length with leading zeros, so for
1653     # example, 8 will compare less than 16 (using a fill length value that
1654     # should be longer than any sequence in the input).
1655     $a =~ s/(\d+)/sprintf "%06d", $1/ge;
1656     $b =~ s/(\d+)/sprintf "%06d", $1/ge;
1657
1658     # Translate any underscores and digits so they compare after all Unicode
1659     # characters
1660     $a =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1661     $b =~ tr[_0-9]/\x{110000}-\x{11000A}/;
1662
1663     use feature 'state';
1664     # Modify \w, \W to reflect the changes.
1665     state $ud = '\x{110000}-\x{11000A}';    # xlated underscore, digits
1666     state $w = "\\w$ud";                    # new \w string
1667     state $mod_w = qr/[$w]/;
1668     state $mod_W = qr/[^$w]/;
1669
1670     # Only \w for initial comparison
1671     my $a_only_word = uc($a =~ s/$mod_W//gr);
1672     my $b_only_word = uc($b =~ s/$mod_W//gr);
1673
1674     # And not initial nor interior underscores nor digits (by squeezing them
1675     # out)
1676     my $a_stripped = $a_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1677     my $b_stripped = $b_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
1678
1679     # If the stripped versions differ, use that as the comparison.
1680     my $cmp = $a_stripped cmp $b_stripped;
1681     return $cmp if $cmp;
1682
1683     # For the first tie breaker, repeat, but consider initial and interior
1684     # underscores and digits, again having those compare after all Unicode
1685     # characters
1686     $cmp = $a_only_word cmp $b_only_word;
1687     return $cmp if $cmp;
1688
1689     # Next tie breaker is just a caseless comparison
1690     $cmp = uc($a) cmp uc($b);
1691     return $cmp if $cmp;
1692
1693     # Finally a straight comparison
1694     return $a cmp $b;
1695 }
1696
1697 sub output {
1698     my ($podname, $header, $dochash, $footer, @missings_refs) = @_;
1699     #
1700     # strip leading '|' from each line which had been used to hide
1701     # pod from pod checkers.
1702     s/^\|//gm for $header, $footer, @missings_refs;
1703
1704     my $fh = open_new("pod/$podname.pod", undef,
1705                       {by => "$0 extracting documentation",
1706                        from => 'the C source files'}, 1);
1707
1708     print $fh $header, "\n";
1709
1710     for my $section_name (sort dictionary_order keys %valid_sections) {
1711         my $section_info = $dochash->{$section_name};
1712
1713         # We allow empty sections in perlintern.
1714         if (! $section_info && $podname eq 'perlapi') {
1715             warn "Empty section '$section_name'; skipped";
1716             next;
1717         }
1718
1719         print $fh "\n=head1 $section_name\n";
1720
1721         if ($section_info->{X_tags}) {
1722             print $fh "X<$_>" for sort keys $section_info->{X_tags}->%*;
1723             print $fh "\n";
1724             delete $section_info->{X_tags};
1725         }
1726
1727         if ($podname eq 'perlapi') {
1728             print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1729                                 if defined $valid_sections{$section_name}{header};
1730
1731             # Output any heading-level documentation and delete so won't get in
1732             # the way later
1733             if (exists $section_info->{""}) {
1734                 print $fh "\n", $section_info->{""}, "\n";
1735                 delete $section_info->{""};
1736             }
1737         }
1738
1739         if ($section_info && keys $section_info->%*) {
1740             for my $function_name (sort dictionary_order keys %$section_info) {
1741                 docout($fh, $function_name, $section_info->{$function_name});
1742             }
1743         }
1744         else {
1745             my $pod_type = ($podname eq 'api') ? "public" : "internal";
1746             print $fh "\nThere are currently no $pod_type API items in ",
1747                       $section_name, "\n";
1748         }
1749
1750         print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
1751                             if $podname eq 'perlapi'
1752                             && defined $valid_sections{$section_name}{footer};
1753     }
1754
1755
1756     my $first_time = 1;
1757     while (1) {
1758         my $missings_hdr = shift @missings_refs or last;
1759         my $missings_ref = shift @missings_refs or die "Foo";
1760
1761         if ($first_time) {
1762             $first_time = 0;
1763             print $fh <<~EOT;
1764
1765                 =head1 $undocumented_scn
1766
1767                 EOT
1768         }
1769
1770         print $fh construct_missings_section($missings_hdr, $missings_ref);
1771     }
1772
1773     print $fh "\n$footer\n=cut\n";
1774
1775     read_only_bottom_close_and_rename($fh);
1776 }
1777
1778 foreach (@{(setup_embed())[0]}) {
1779     next if @$_ < 2;
1780     my ($flags, $ret_type, $func, @args) = @$_;
1781     s/\b(?:NN|NULLOK)\b\s+//g for @args;
1782
1783     $funcflags{$func} = {
1784                          flags => $flags,
1785                          ret_type => $ret_type,
1786                          args => \@args,
1787                         };
1788 }
1789
1790 # glob() picks up docs from extra .c or .h files that may be in unclean
1791 # development trees.
1792 open my $fh, '<', 'MANIFEST'
1793     or die "Can't open MANIFEST: $!";
1794 while (my $line = <$fh>) {
1795     next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
1796
1797     # Don't pick up pods from these.
1798     next if $file =~ m! ^ ( cpan | dist | ext ) / !x
1799          && ! defined $extra_input_pods{$file};
1800
1801     open F, '<', $file or die "Cannot open $file for docs: $!\n";
1802     autodoc(\*F,$file);
1803     close F or die "Error closing $file: $!\n";
1804 }
1805 close $fh or die "Error whilst reading MANIFEST: $!";
1806
1807 parse_config_h();
1808
1809 for (sort keys %funcflags) {
1810     next unless $funcflags{$_}{flags} =~ /d/;
1811     next if $funcflags{$_}{flags} =~ /h/;
1812     warn "no docs for $_\n";
1813 }
1814
1815 foreach (sort keys %missing) {
1816     warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
1817 }
1818
1819 # List of funcs in the public API that aren't also marked as core-only,
1820 # experimental nor deprecated.
1821
1822 my @undocumented_api =    grep {        $funcflags{$_}{flags} =~ /A/
1823                                    && ! $docs{api}{$_}
1824                                } keys %funcflags;
1825 my @undocumented_intern = grep {        $funcflags{$_}{flags} !~ /[AS]/
1826                                    && ! $docs{intern}{$_}
1827                                } keys %funcflags;
1828 my @undocumented_deprecated_api    = grep { $funcflags{$_}{flags} =~ /D/ }
1829                                                             @undocumented_api;
1830 my @undocumented_deprecated_intern = grep { $funcflags{$_}{flags} =~ /D/ }
1831                                                            @undocumented_intern;
1832 my @undocumented_experimental_api    =  grep { $funcflags{$_}{flags} =~ /x/ }
1833                                                             @undocumented_api;
1834 my @undocumented_experimental_intern =  grep { $funcflags{$_}{flags} =~ /x/ }
1835                                                            @undocumented_intern;
1836 my @missing_api = grep { $funcflags{$_}{flags} !~ /[xD]/ } @undocumented_api;
1837 push @missing_api, keys %missing_macros;
1838
1839 my @missing_intern = grep { $funcflags{$_}{flags} !~ /[xD]/ }
1840                                                            @undocumented_intern;
1841
1842 my @other_places = ( qw(perlclib ), keys %described_elsewhere );
1843 my $places_other_than_intern = join ", ",
1844             map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
1845 my $places_other_than_api = join ", ",
1846             map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
1847
1848 # The S< > makes things less densely packed, hence more readable
1849 my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
1850 my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs;
1851 $valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1852 $valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1853
1854 my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
1855 $valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1856
1857 my $section_list = join "\n\n", map { "=item L</$_>" }
1858                                 sort(dictionary_order keys %valid_sections),
1859                                 $undocumented_scn;  # Keep last
1860
1861 # Leading '|' is to hide these lines from pod checkers.  khw is unsure if this
1862 # is still needed.
1863 my $api_hdr = <<"_EOB_";
1864 |=encoding UTF-8
1865 |
1866 |=head1 NAME
1867 |
1868 |perlapi - autogenerated documentation for the perl public API
1869 |
1870 |=head1 DESCRIPTION
1871 |X<Perl API> X<API> X<api>
1872 |
1873 |This file contains most of the documentation of the perl public API, as
1874 |generated by F<embed.pl>.  Specifically, it is a listing of functions,
1875 |macros, flags, and variables that may be used by extension writers.  Besides
1876 |L<perlintern> and F<config.h>, some items are listed here as being actually
1877 |documented in another pod.
1878 |
1879 |L<At the end|/$undocumented_scn> is a list of functions which have yet
1880 |to be documented.  Patches welcome!  The interfaces of these are subject to
1881 |change without notice.
1882 |
1883 |Some of the functions documented here are consolidated so that a single entry
1884 |serves for multiple functions which all do basically the same thing, but have
1885 |some slight differences.  For example, one form might process magic, while
1886 |another doesn't.  The name of each variation is listed at the top of the
1887 |single entry.  But if all have the same signature (arguments and return type)
1888 |except for their names, only the usage for the base form is shown.  If any
1889 |one of the forms has a different signature (such as returning C<const> or
1890 |not) every function's signature is explicitly displayed.
1891 |
1892 |Anything not listed here or in the other mentioned pods is not part of the
1893 |public API, and should not be used by extension writers at all.  For these
1894 |reasons, blindly using functions listed in F<proto.h> is to be avoided when
1895 |writing extensions.
1896 |
1897 |In Perl, unlike C, a string of characters may generally contain embedded
1898 |C<NUL> characters.  Sometimes in the documentation a Perl string is referred
1899 |to as a "buffer" to distinguish it from a C string, but sometimes they are
1900 |both just referred to as strings.
1901 |
1902 |Note that all Perl API global variables must be referenced with the C<PL_>
1903 |prefix.  Again, those not listed here are not to be used by extension writers,
1904 |and may be changed or removed without notice; same with macros.
1905 |Some macros are provided for compatibility with the older,
1906 |unadorned names, but this support may be disabled in a future release.
1907 |
1908 |Perl was originally written to handle US-ASCII only (that is characters
1909 |whose ordinal numbers are in the range 0 - 127).
1910 |And documentation and comments may still use the term ASCII, when
1911 |sometimes in fact the entire range from 0 - 255 is meant.
1912 |
1913 |The non-ASCII characters below 256 can have various meanings, depending on
1914 |various things.  (See, most notably, L<perllocale>.)  But usually the whole
1915 |range can be referred to as ISO-8859-1.  Often, the term "Latin-1" (or
1916 |"Latin1") is used as an equivalent for ISO-8859-1.  But some people treat
1917 |"Latin1" as referring just to the characters in the range 128 through 255, or
1918 |sometimes from 160 through 255.
1919 |This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1920 |
1921 |Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1922 |L<perlebcdic>).  Most of the documentation (and even comments in the code)
1923 |ignore the EBCDIC possibility.
1924 |For almost all purposes the differences are transparent.
1925 |As an example, under EBCDIC,
1926 |instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1927 |whenever this documentation refers to C<utf8>
1928 |(and variants of that name, including in function names),
1929 |it also (essentially transparently) means C<UTF-EBCDIC>.
1930 |But the ordinals of characters differ between ASCII, EBCDIC, and
1931 |the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1932 |number of bytes than in UTF-8.
1933 |
1934 |The organization of this document is tentative and subject to change.
1935 |Suggestions and patches welcome
1936 |L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1937 |
1938 |The sections in this document currently are
1939 |
1940 |=over $standard_indent
1941
1942 |$section_list
1943 |
1944 |=back
1945 |
1946 |The listing below is alphabetical, case insensitive.
1947 _EOB_
1948
1949 my $api_footer = <<"_EOE_";
1950 |=head1 AUTHORS
1951 |
1952 |Until May 1997, this document was maintained by Jeff Okamoto
1953 |<okamoto\@corp.hp.com>.  It is now maintained as part of Perl itself.
1954 |
1955 |With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1956 |Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1957 |Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1958 |Stephen McCamant, and Gurusamy Sarathy.
1959 |
1960 |API Listing originally by Dean Roehrich <roehrich\@cray.com>.
1961 |
1962 |Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1963 |
1964 |=head1 SEE ALSO
1965 |
1966 |F<config.h>, $places_other_than_api
1967 _EOE_
1968
1969 my $api_missings_hdr = <<'_EOT_';
1970 |The following functions have been flagged as part of the public
1971 |API, but are currently undocumented.  Use them at your own risk,
1972 |as the interfaces are subject to change.  Functions that are not
1973 |listed in this document are not intended for public use, and
1974 |should NOT be used under any circumstances.
1975 |
1976 |If you feel you need to use one of these functions, first send
1977 |email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
1978 |It may be that there is a good reason for the function not being
1979 |documented, and it should be removed from this list; or it may
1980 |just be that no one has gotten around to documenting it.  In the
1981 |latter case, you will be asked to submit a patch to document the
1982 |function.  Once your patch is accepted, it will indicate that the
1983 |interface is stable (unless it is explicitly marked otherwise) and
1984 |usable by you.
1985 _EOT_
1986
1987 my $api_experimental_hdr = <<"_EOT_";
1988 |
1989 |Next are the API-flagged elements that are considered experimental.  Using one
1990 |of these is even more risky than plain undocumented ones.  They are listed
1991 |here because they should be listed somewhere (so their existence doesn't get
1992 |lost) and this is the best place for them.
1993 _EOT_
1994
1995 my $api_deprecated_hdr = <<"_EOT_";
1996 |
1997 |Finally are deprecated undocumented API elements.
1998 |Do not use any for new code; remove all occurrences of all of these from
1999 |existing code.
2000 _EOT_
2001
2002 output('perlapi', $api_hdr, $docs{api}, $api_footer,
2003        $api_missings_hdr, \@missing_api,
2004        $api_experimental_hdr, \@undocumented_experimental_api,
2005        $api_deprecated_hdr, \@undocumented_deprecated_api);
2006
2007 my $intern_hdr = <<"_EOB_";
2008 |=head1 NAME
2009 |
2010 |perlintern - autogenerated documentation of purely B<internal>
2011 |Perl functions
2012 |
2013 |=head1 DESCRIPTION
2014 |X<internal Perl functions> X<interpreter functions>
2015 |
2016 |This file is the autogenerated documentation of functions in the
2017 |Perl interpreter that are documented using Perl's internal documentation
2018 |format but are not marked as part of the Perl API.  In other words,
2019 |B<they are not for use in extensions>!
2020
2021 |It has the same sections as L<perlapi>, though some may be empty.
2022 |
2023 _EOB_
2024
2025 my $intern_footer = <<"_EOE_";
2026 |
2027 |=head1 AUTHORS
2028 |
2029 |The autodocumentation system was originally added to the Perl core by
2030 |Benjamin Stuhl.  Documentation is by whoever was kind enough to
2031 |document their functions.
2032 |
2033 |=head1 SEE ALSO
2034 |
2035 |F<config.h>, $places_other_than_intern
2036 _EOE_
2037
2038 my $intern_missings_hdr = <<"_EOT_";
2039 |
2040 |This section lists the elements that are otherwise undocumented.  If you use
2041 |any of them, please consider creating and submitting documentation for it.
2042 |
2043 |Experimental and deprecated undocumented elements are listed separately at the
2044 |end.
2045 |
2046 _EOT_
2047
2048 my $intern_experimental_hdr = <<"_EOT_";
2049 |
2050 |Next are the experimental undocumented elements
2051 |
2052 _EOT_
2053
2054 my $intern_deprecated_hdr = <<"_EOT_";
2055 |
2056 |Finally are the deprecated undocumented elements.
2057 |Do not use any for new code; remove all occurrences of all of these from
2058 |existing code.
2059 |
2060 _EOT_
2061
2062 output('perlintern', $intern_hdr, $docs{intern}, $intern_footer,
2063        $intern_missings_hdr, \@missing_intern,
2064        $intern_experimental_hdr, \@undocumented_experimental_intern,
2065        $intern_deprecated_hdr, \@undocumented_deprecated_intern
2066       );