autodoc.pl

   1 #!/usr/bin/perl -w
   2
   3 use Text::Tabs;
   4 #
   5 # Unconditionally regenerate:
   6 #
   7 #    pod/perlintern.pod
   8 #    pod/perlapi.pod
   9 #
  10 # from information stored in
  11 #
  12 #    embed.fnc
  13 #    plus all the core .c, .h, and .pod files listed in MANIFEST
  14 #
  15 # Has an optional arg, which is the directory to chdir to before reading
  16 # MANIFEST and the files
  17 #
  18 # This script is invoked as part of 'make all'
  19 #
  20 # The generated pod consists of sections of related elements, functions,
  21 # macros, and variables.  The keys of %valid_sections give the current legal
  22 # ones.  Just add a new key to add a section.
  23 #
  24 # Throughout the files read by this script are lines like
  25 #
  26 # =for apidoc_section Section Name
  27 # =for apidoc_section $section_name_variable
  28 #
  29 # "Section Name" (after having been stripped of leading space) must be one of
  30 # the legal section names, or an error is thrown.  $section_name_variable must
  31 # be one of the legal section name variables defined below; these expand to
  32 # legal section names.  This form is used so that minor wording changes in
  33 # these titles can be confied to this file.  All the names of the variables
  34 # end in '_scn'; this suffix is optional in the apidoc_section lines.
  35 #
  36 # All API elements defined between this line and the next 'apidoc_section'
  37 # line will go into the section "Section Name" (or $section_name_variable),
  38 # sorted by dictionary order within it.  perlintern and perlapi are parallel
  39 # documents, each potentially with a section "Section Name".  Each element is
  40 # marked as to which document it goes into.  If there are none for a
  41 # particular section in perlapi, that section is omitted.
  42 #
  43 # Also, in .[ch] files, there may be
  44 #
  45 # =head1 Section Name
  46 #
  47 # lines in comments.  These are also used by this program to switch to section
  48 # "Section Name".  The difference is that if there are any lines after the
  49 # =head1, inside the same comment, and before any =for apidoc-ish lines, they
  50 # are used as a heading for section "Section Name" (in both perlintern and
  51 # perlapi).  This includes any =head[2-5].  If more than one '=head1 Section
  52 # Name' line has content, they appear in the generated pod in an undefined
  53 # order.  Note that you can't use a $section_name_variable in =head1 lines
  54 #
  55 # The next =head1, =for apidoc_section, or file end terminates what goes into
  56 # the current section
  57 #
  58 # The %valid_sections hash below also can have header content, which will
  59 # appear before any =head1 content.  The hash can also have footer content
  60 # content, which will appear at the end of the section, after all the
  61 # elements.
  62 #
  63 # The lines that define the actual functions, etc are documented in embed.fnc,
  64 # because they have flags which must be kept in sync with that file.
  65
  66 use strict;
  67 use warnings;
  68
  69 # 80 column terminal - 2 for pager adding 2 columns; -7 for nroff
  70 # indent;
  71 my $max_width = 80 - 2 - 7;
  72
  73 if (@ARGV) {
  74     my $workdir = shift;
  75     chdir $workdir
  76         or die "Couldn't chdir to '$workdir': $!";
  77 }
  78 require './regen/regen_lib.pl';
  79 require './regen/embed_lib.pl';
  80
  81 my %described_elsewhere;
  82
  83 #
  84 # See database of global and static function prototypes in embed.fnc
  85 # This is used to generate prototype headers under various configurations,
  86 # export symbols lists for different platforms, and macros to provide an
  87 # implicit interpreter context argument.
  88 #
  89
  90 my %docs;
  91 my %seen;
  92 my %funcflags;
  93 my %missing;
  94 my %missing_macros;
  95
  96 my $link_text = "Described in";
  97
  98 my $description_indent = 4;
  99 my $usage_indent = 3;   # + initial blank yields 4 total
 100
 101 my $AV_scn = 'AV Handling';
 102 my $callback_scn = 'Callback Functions';
 103 my $casting_scn = 'Casting';
 104 my $casing_scn = 'Character case changing';
 105 my $classification_scn = 'Character classification';
 106 my $names_scn = 'Character names';
 107 my $scope_scn = 'Compile-time scope hooks';
 108 my $compiler_scn = 'Compiler and Preprocessor information';
 109 my $directives_scn = 'Compiler directives';
 110 my $concurrency_scn = 'Concurrency';
 111 my $COP_scn = 'COP Hint Hashes';
 112 my $CV_scn = 'CV Handling';
 113 my $custom_scn = 'Custom Operators';
 114 my $debugging_scn = 'Debugging';
 115 my $display_scn = 'Display functions';
 116 my $embedding_scn = 'Embedding and Interpreter Cloning';
 117 my $errno_scn = 'Errno';
 118 my $exceptions_scn = 'Exception Handling (simple) Macros';
 119 my $filesystem_scn = 'Filesystem configuration values';
 120 my $floating_scn = 'Floating point configuration values';
 121 my $formats_scn = 'Formats';
 122 my $genconfig_scn = 'General Configuration';
 123 my $globals_scn = 'Global Variables';
 124 my $GV_scn = 'GV Handling';
 125 my $hook_scn = 'Hook manipulation';
 126 my $HV_scn = 'HV Handling';
 127 my $io_scn = 'Input/Output';
 128 my $integer_scn = 'Integer configuration values';
 129 my $lexer_scn = 'Lexer interface';
 130 my $locale_scn = 'Locales';
 131 my $magic_scn = 'Magic';
 132 my $memory_scn = 'Memory Management';
 133 my $MRO_scn = 'MRO';
 134 my $multicall_scn = 'Multicall Functions';
 135 my $numeric_scn = 'Numeric Functions';
 136 my $optree_construction_scn = 'Optree construction';
 137 my $optree_manipulation_scn = 'Optree Manipulation Functions';
 138 my $pack_scn = 'Pack and Unpack';
 139 my $pad_scn = 'Pad Data Structures';
 140 my $password_scn = 'Password and Group access';
 141 my $paths_scn = 'Paths to system commands';
 142 my $prototypes_scn = 'Prototype information';
 143 my $regexp_scn = 'REGEXP Functions';
 144 my $signals_scn = 'Signals';
 145 my $site_scn = 'Site configuration';
 146 my $sockets_scn = 'Sockets configuration values';
 147 my $filters_scn = 'Source Filters';
 148 my $stack_scn = 'Stack Manipulation Macros';
 149 my $string_scn = 'String Handling';
 150 my $SV_flags_scn = 'SV Flags';
 151 my $SV_scn = 'SV Handling';
 152 my $time_scn = 'Time';
 153 my $typedefs_scn = 'Typedef names';
 154 my $unicode_scn = 'Unicode Support';
 155 my $utility_scn = 'Utility Functions';
 156 my $versioning_scn = 'Versioning';
 157 my $warning_scn = 'Warning and Dieing';
 158 my $XS_scn = 'XS';
 159
 160 # Kept separate at end
 161 my $undocumented_scn = 'Undocumented elements';
 162
 163 my %valid_sections = (
 164     $AV_scn => {},
 165     $callback_scn => {},
 166     $casting_scn => {},
 167     $casing_scn => {},
 168     $classification_scn => {},
 169     $scope_scn => {},
 170     $compiler_scn => {},
 171     $directives_scn => {},
 172     $concurrency_scn => {},
 173     $COP_scn => {},
 174     $CV_scn => {
 175         header => <<~'EOT',
 176             This section documents functions to manipulate CVs which are
 177             code-values, meaning subroutines.  For more information, see
 178             L<perlguts>.
 179             EOT
 180     },
 181
 182     $custom_scn => {},
 183     $debugging_scn => {},
 184     $display_scn => {},
 185     $embedding_scn => {},
 186     $errno_scn => {},
 187     $exceptions_scn => {},
 188     $filesystem_scn => {
 189         header => <<~'EOT',
 190             Also see L</List of capability HAS_foo symbols>.
 191             EOT
 192         },
 193     $floating_scn => {
 194         header => <<~'EOT',
 195             Also L</List of capability HAS_foo symbols> lists capabilities
 196             that arent in this section.  For example C<HAS_ASINH>, for the
 197             hyperbolic sine function.
 198             EOT
 199         },
 200     $formats_scn => {
 201         header => <<~'EOT',
 202             These are used for formatting the corresponding type For example,
 203             instead of saying
 204
 205              Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
 206
 207             use
 208
 209              Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
 210
 211             This keeps you from having to know if, say an IV, needs to be
 212             printed as C<%d>, C<%ld>, or something else.
 213             EOT
 214       },
 215     $genconfig_scn => {
 216         header => <<~'EOT',
 217             This section contains configuration information not otherwise
 218             found in the more specialized sections of this document.  At the
 219             end is a list of C<#defines> whose name should be enough to tell
 220             you what they do, and a list of #defines which tell you if you
 221             need to C<#include> files to get the corresponding functionality.
 222             EOT
 223
 224         footer => <<~'EOT',
 225
 226             =head2 List of capability C<HAS_I<foo>> symbols
 227
 228             This is a list of those symbols that dont appear elsewhere in ths
 229             document that indicate if the current platform has a certain
 230             capability.  Their names all begin with C<HAS_>.  Only those
 231             symbols whose capability is directly derived from the name are
 232             listed here.  All others have their meaning expanded out elsewhere
 233             in this document.  This (relatively) compact list is because we
 234             think that the expansion would add little or no value and take up
 235             a lot of space (because there are so many).  If you think certain
 236             ones should be expanded, send email to
 237             L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
 238
 239             Each symbol here will be C<#define>d if and only if the platform
 240             has the capability.  If you need more detail, see the
 241             corresponding entry in F<config.h>.  For convenience, the list is
 242             split so that the ones that indicate there is a reentrant version
 243             of a capability are listed separately
 244
 245             __HAS_LIST__
 246
 247             And, the reentrant capabilities:
 248
 249             __HAS_R_LIST__
 250
 251             Example usage:
 252
 253             =over
 254
 255              #ifdef HAS_STRNLEN
 256                use strnlen()
 257              #else
 258                use an alternative implementation
 259              #endif
 260
 261             =back
 262
 263             =head2 List of C<#include> needed symbols
 264
 265             This list contains symbols that indicate if certain C<#include>
 266             files are present on the platform.  If your code accesses the
 267             functionality that one of these is for, you will need to
 268             C<#include> it if the symbol on this list is C<#define>d.  For
 269             more detail, see the corresponding entry in F<config.h>.
 270
 271             __INCLUDE_LIST__
 272
 273             Example usage:
 274
 275             =over
 276
 277              #ifdef I_WCHAR
 278                #include <wchar.h>
 279              #endif
 280
 281             =back
 282             EOT
 283       },
 284     $globals_scn => {},
 285     $GV_scn => {},
 286     $hook_scn => {},
 287     $HV_scn => {},
 288     $io_scn => {},
 289     $integer_scn => {},
 290     $lexer_scn => {},
 291     $locale_scn => {},
 292     $magic_scn => {},
 293     $memory_scn => {},
 294     $MRO_scn => {},
 295     $multicall_scn => {},
 296     $numeric_scn => {},
 297     $optree_construction_scn => {},
 298     $optree_manipulation_scn => {},
 299     $pack_scn => {},
 300     $pad_scn => {},
 301     $password_scn => {},
 302     $paths_scn => {},
 303     $prototypes_scn => {},
 304     $regexp_scn => {},
 305     $signals_scn => {},
 306     $site_scn => {
 307         header => <<~'EOT',
 308             These variables give details as to where various libraries,
 309             installation destinations, I<etc.>, go, as well as what various
 310             installation options were selected
 311             EOT
 312       },
 313     $sockets_scn => {},
 314     $filters_scn => {},
 315     $stack_scn => {},
 316     $string_scn => {
 317         header => <<~EOT,
 318             See also C<L</$unicode_scn>>.
 319             EOT
 320       },
 321     $SV_flags_scn => {},
 322     $SV_scn => {},
 323     $time_scn => {},
 324     $typedefs_scn => {},
 325     $unicode_scn => {
 326         header => <<~EOT,
 327             L<perlguts/Unicode Support> has an introduction to this API.
 328
 329             See also C<L</$classification_scn>>,
 330             C<L</$casing_scn>>,
 331             and C<L</$string_scn>>.
 332             Various functions outside this section also work specially with
 333             Unicode.  Search for the string "utf8" in this document.
 334             EOT
 335       },
 336     $utility_scn => {},
 337     $versioning_scn => {},
 338     $warning_scn => {},
 339     $XS_scn => {},
 340 );
 341
 342 # Somewhat loose match for an apidoc line so we can catch minor typos.
 343 # Parentheses are used to capture portions so that below we verify
 344 # that things are the actual correct syntax.
 345 my $apidoc_re = qr/ ^ (\s*)            # $1
 346                       (=?)             # $2
 347                       (\s*)            # $3
 348                       for (\s*)        # $4
 349                       apidoc (_item)?  # $5
 350                       (\s*)            # $6
 351                       (.*?)            # $7
 352                       \s* \n /x;
 353 # Only certain flags, dealing with display, are acceptable for apidoc_item
 354 my $display_flags = "fFnDopsT";
 355
 356 sub check_api_doc_line ($$) {
 357     my ($file, $in) = @_;
 358
 359     return unless $in =~ $apidoc_re;
 360
 361     my $is_item = defined $5;
 362     my $is_in_proper_form = length $1 == 0
 363                          && length $2 > 0
 364                          && length $3 == 0
 365                          && length $4 > 0
 366                          && length $6 > 0
 367                          && length $7 > 0;
 368     my $proto_in_file = $7;
 369     my $proto = $proto_in_file;
 370     $proto = "||$proto" if $proto !~ /\|/;
 371     my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
 372
 373     $name && $is_in_proper_form or die <<EOS;
 374 Bad apidoc at $file line $.:
 375   $in
 376 Expected:
 377   =for apidoc flags|returntype|name|arg|arg|...
 378   =for apidoc flags|returntype|name
 379   =for apidoc name
 380 (or 'apidoc_item')
 381 EOS
 382
 383     die "Only [$display_flags] allowed in apidoc_item:\n$in"
 384                             if $is_item && $flags =~ /[^$display_flags]/;
 385
 386     return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
 387 }
 388
 389 sub embed_override($) {
 390     my ($element_name) = shift;
 391
 392     # If the entry is also in embed.fnc, it should be defined
 393     # completely there, but not here
 394     my $embed_docref = delete $funcflags{$element_name};
 395
 396     return unless $embed_docref and %$embed_docref;
 397
 398     my $flags = $embed_docref->{'flags'};
 399     warn "embed.fnc entry '$element_name' missing 'd' flag"
 400                                             unless $flags =~ /d/;
 401
 402     return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
 403 }
 404
 405 # The section that is in effect at the beginning of the given file.  If not
 406 # listed here, an apidoc_section line must precede any apidoc lines.
 407 # This allows the files listed here that generally are single-purpose, to not
 408 # have to worry about the autodoc section
 409 my %initial_file_section = (
 410                             'av.c' => $AV_scn,
 411                             'av.h' => $AV_scn,
 412                             'cv.h' => $CV_scn,
 413                             'doio.c' => $io_scn,
 414                             'gv.c' => $GV_scn,
 415                             'gv.h' => $GV_scn,
 416                             'hv.h' => $HV_scn,
 417                             'locale.c' => $locale_scn,
 418                             'malloc.c' => $memory_scn,
 419                             'numeric.c' => $numeric_scn,
 420                             'opnames.h' => $optree_construction_scn,
 421                             'pad.h'=> $pad_scn,
 422                             'patchlevel.h' => $versioning_scn,
 423                             'perlio.h' => $io_scn,
 424                             'pod/perlapio.pod' => $io_scn,
 425                             'pod/perlcall.pod' => $callback_scn,
 426                             'pod/perlembed.pod' => $embedding_scn,
 427                             'pod/perlfilter.pod' => $filters_scn,
 428                             'pod/perliol.pod' => $io_scn,
 429                             'pod/perlmroapi.pod' => $MRO_scn,
 430                             'pod/perlreguts.pod' => $regexp_scn,
 431                             'pp_pack.c' => $pack_scn,
 432                             'pp_sort.c' => $SV_scn,
 433                             'regcomp.c' => $regexp_scn,
 434                             'regexp.h' => $regexp_scn,
 435                             'unicode_constants.h' => $unicode_scn,
 436                             'utf8.c' => $unicode_scn,
 437                             'utf8.h' => $unicode_scn,
 438                             'vutil.c' => $versioning_scn,
 439                            );
 440
 441 sub autodoc ($$) { # parse a file and extract documentation info
 442     my($fh,$file) = @_;
 443     my($in, $line_num, $header, $section);
 444
 445     $section = $initial_file_section{$file}
 446                                     if defined $initial_file_section{$file};
 447
 448     my $file_is_C = $file =~ / \. [ch] $ /x;
 449
 450     # Count lines easier
 451     my $get_next_line = sub { $line_num++; return <$fh> };
 452
 453     # Read the file
 454     while ($in = $get_next_line->()) {
 455         last unless defined $in;
 456
 457         next unless (    $in =~ / ^ =for [ ]+ apidoc /x
 458                                       # =head1 lines only have effect in C files
 459                      || ($file_is_C && $in =~ /^=head1/));
 460
 461         # Here, the line introduces a portion of the input that we care about.
 462         # Either it is for an API element, or heading text which we expect
 463         # will be used for elements later in the file
 464
 465         my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
 466         my (@args, @items);
 467
 468         # If the line starts a new section ...
 469         if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
 470
 471             $section = $1;
 472             if ($section =~ / ^ \$ /x) {
 473                 $section .= '_scn' unless $section =~ / _scn $ /;
 474                 $section = eval "$section";
 475                 die "Unknown \$section variable '$section' in $file: $@" if $@;
 476             }
 477             die "Unknown section name '$section' in $file near line $.\n"
 478                                     unless defined $valid_sections{$section};
 479
 480         }
 481         elsif ($in=~ /^ =for [ ]+ apidoc \B /x) {   # Otherwise better be a
 482                                                     # plain apidoc line
 483             die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
 484             die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
 485         }
 486         else {  # Plain apidoc
 487
 488             ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
 489                                                 = check_api_doc_line($file, $in);
 490             # Override this line with any info in embed.fnc
 491             my ($embed_flags, $embed_ret_type, @embed_args)
 492                                                 = embed_override($element_name);
 493             if ($embed_ret_type) {
 494                 warn "embed.fnc entry overrides redundant information in"
 495                     . " '$proto_in_file' in $file"
 496                                                if $flags || $ret_type || @args;
 497                 $flags = $embed_flags;
 498                 $ret_type = $embed_ret_type;
 499                 @args = @embed_args;
 500             }
 501             elsif ($flags !~ /[my]/)  { # Not in embed.fnc, is missing if not
 502                                         # a macro or typedef
 503                 $missing{$element_name} = $file;
 504             }
 505
 506             die "flag '$1' is not legal (for function $element_name (from $file))"
 507                         if $flags =~ / ( [^AabCDdEeFfhiMmNnTOoPpRrSsUuWXxy] ) /x;
 508
 509             die "'u' flag must also have 'm' or 'y' flags' for $element_name"
 510                                             if $flags =~ /u/ && $flags !~ /[my]/;
 511             warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
 512                         if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
 513
 514             if (exists $seen{$element_name} && $flags !~ /h/) {
 515                 die ("'$element_name' in $file was already documented in $seen{$element_name}");
 516             }
 517             else {
 518                 $seen{$element_name} = $file;
 519             }
 520         }
 521
 522         # Here we have processed the initial line in the heading text or API
 523         # element, and have saved the important information from it into the
 524         # corresponding variables.  Now accumulate the text that applies to it
 525         # up to a terminating line, which is one of:
 526         # 1) =cut
 527         # 2) =head (in a C file only =head1)
 528         # 3) an end comment line in a C file: m:^\s*\*/:
 529         # 4) =for apidoc... (except apidoc_item lines)
 530         $text = "";
 531         my $head_ender_num = ($file_is_C) ? 1 : "";
 532         while (defined($in = $get_next_line->())) {
 533
 534             last if $in =~ /^=cut/x;
 535             last if $in =~ /^=head$head_ender_num/;
 536
 537             if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
 538
 539                 # End of comment line in C files is a fall-back terminator,
 540                 # but warn only if there actually is some accumulated text
 541                 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
 542                 last;
 543             }
 544
 545             if ($in !~ / ^ =for [ ]+ apidoc /x) {
 546                 $text .= $in;
 547                 next;
 548             }
 549
 550             # Here, the line is an apidoc line.  All but apidoc_item terminate
 551             # the text being accumulated.
 552             last if $in =~ / ^ =for [ ]+ apidoc_section /x;
 553
 554             my ($item_name, $item_flags, $item_ret_type, $is_item,
 555                     $item_proto, @item_args) = check_api_doc_line($file, $in);
 556             last unless $is_item;
 557
 558             # Here, is an apidoc_item_line; They can only come within apidoc
 559             # paragraphs.
 560             die "Unexpected api_doc_item line '$item_proto'"
 561                                                         unless $element_name;
 562
 563             # We accept blank lines between these, but nothing else;
 564             die "apidoc_item lines must immediately follow apidoc lines for "
 565               . " '$element_name' in $file"
 566                                                             if $text =~ /\S/;
 567             # Override this line with any info in embed.fnc
 568             my ($embed_flags, $embed_ret_type, @embed_args)
 569                                                 = embed_override($item_name);
 570             if ($embed_ret_type) {
 571                 warn "embed.fnc entry overrides redundant information in"
 572                     . " '$item_proto' in $file"
 573                                 if $item_flags || $item_ret_type || @item_args;
 574
 575                 $item_flags = $embed_flags;
 576                 $item_ret_type = $embed_ret_type;
 577                 @item_args = @embed_args;
 578             }
 579
 580             # Use the base entry flags if none for this item; otherwise add in
 581             # any non-display base entry flags.
 582             if ($item_flags) {
 583                 $item_flags .= $flags =~ s/[$display_flags]//rg;
 584             }
 585             else {
 586                 $item_flags = $flags;
 587             }
 588             $item_ret_type = $ret_type unless $item_ret_type;
 589             @item_args = @args unless @item_args;
 590             push @items, { name     => $item_name,
 591                            ret_type => $item_ret_type,
 592                            flags    => $item_flags,
 593                            args     => [ @item_args ],
 594                          };
 595
 596             # This line shows that this element is documented.
 597             delete $funcflags{$item_name};
 598         }
 599
 600         # Here, are done accumulating the text for this item.  Trim it
 601         $text =~ s/ ^ \s* //x;
 602         $text =~ s/ \s* $ //x;
 603         $text .= "\n" if $text ne "";
 604
 605         # And treat all-spaces as nothing at all
 606         undef $text unless $text =~ /\S/;
 607
 608         if ($element_name) {
 609
 610             # Here, we have accumulated into $text, the pod for $element_name
 611             my $where = $flags =~ /A/ ? 'api' : 'guts';
 612
 613             $section = "Functions in file $file" unless defined $section;
 614             die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
 615                                                     unless defined $section;
 616             if (exists $docs{$where}{$section}{$element_name}) {
 617                 warn "$0: duplicate API entry for '$element_name' in"
 618                     . " $where/$section\n";
 619                 next;
 620             }
 621
 622             # Override the text with just a link if the flags call for that
 623             my $is_link_only = ($flags =~ /h/);
 624             if ($is_link_only) {
 625                 if ($file_is_C) {
 626                     die "Can't currently handle link with items to it:\n$in" if @items;
 627                     redo;    # Don't put anything if C source
 628                 }
 629
 630                 # Here, is an 'h' flag in pod.  We add a reference to the pod (and
 631                 # nothing else) to perlapi/intern.  (It would be better to add a
 632                 # reference to the correct =item,=header, but something that makes
 633                 # it harder is that it that might be a duplicate, like '=item *';
 634                 # so that is a future enhancement XXX.  Another complication is
 635                 # there might be more than one deserving candidates.)
 636                 my $podname = $file =~ s!.*/!!r;    # Rmv directory name(s)
 637                 $podname =~ s/\.pod//;
 638                 $text = "Described in L<$podname>.\n";
 639
 640                 # Don't output a usage example for linked to documentation if
 641                 # it is trivial (has no arguments) and we aren't to add a
 642                 # semicolon
 643                 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
 644
 645                 # Keep track of all the pod files that we refer to.
 646                 push $described_elsewhere{$podname}->@*, $podname;
 647             }
 648
 649             $docs{$where}{$section}{$element_name}{flags} = $flags;
 650             $docs{$where}{$section}{$element_name}{pod} = $text;
 651             $docs{$where}{$section}{$element_name}{file} = $file;
 652             $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
 653             push $docs{$where}{$section}{$element_name}{args}->@*, @args;
 654             push $docs{$where}{$section}{$element_name}{items}->@*, @items;
 655         }
 656         elsif ($text) {
 657             $valid_sections{$section}{header} = "" unless
 658                                     defined $valid_sections{$section}{header};
 659             $valid_sections{$section}{header} .= "\n$text";
 660         }
 661
 662         # We already have the first line of what's to come in $in
 663         redo;
 664
 665     } # End of loop through input
 666 }
 667
 668 my %configs;
 669 my @has_defs;
 670 my @has_r_defs;     # Reentrant symbols
 671 my @include_defs;
 672
 673 sub parse_config_h {
 674     use re '/aa';   # Everthing is ASCII in this file
 675
 676     # Process config.h
 677     my $config_h = 'config.h';
 678     $config_h = 'win32/config.h' unless -e $config_h;
 679     die "Can't find $config_h" unless -e $config_h;
 680     open my $fh, '<', $config_h or die "Can't open $config_h: $!";
 681     while (<$fh>) {
 682
 683         # Look for lines like /* FOO_BAR:
 684         # By convention all config.h descriptions begin like that
 685         if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
 686             my $name = $1;
 687
 688             # Here we are starting the description for $name in config.h.  We
 689             # accumulate the entire description for it into @description.
 690             # Flowing text from one input line to another is appended into the
 691             # same array element to make a single flowing line element, but
 692             # verbatim lines are kept as separate elements in @description.
 693             # This will facilitate later doing pattern matching without regard
 694             # to line boundaries on non-verbatim text.
 695
 696             die "Multiple config.h entries for '$name'"
 697                                         if defined $configs{$name}{description};
 698
 699             # Get first line of description
 700             $_ = <$fh>;
 701
 702             # Each line in the description begins with blanks followed by '/*'
 703             # and some spaces.
 704             die "Unexpected config.h initial line for $name: '$_'"
 705                                             unless s/ ^ ( \s* \* \s* ) //x;
 706             my $initial_text = $1;
 707
 708             # Initialize the description with this first line (after having
 709             # stripped the prefix text)
 710             my @description = $_;
 711
 712             # The first line is used as a template for how much indentation
 713             # each normal succeeding line has.  Lines indented further
 714             # will be considered as intended to be verbatim.  But, empty lines
 715             # likely won't have trailing blanks, so just strip the whole thing
 716             # for them.
 717             my $strip_initial_qr = qr!   \s* \* \s* $
 718                                     | \Q$initial_text\E
 719                                     !x;
 720             $configs{$name}{verbatim} = 0;
 721
 722             # Read in the remainder of the description
 723             while (<$fh>) {
 724                 last if s| ^ \s* \* / ||x;  # A '*/' ends it
 725
 726                 die "Unexpected config.h description line for $name: '$_'"
 727                                                 unless s/$strip_initial_qr//;
 728
 729                 # Fix up the few flawed lines in config.h wherein a new
 730                 # sentence begins with a tab (and maybe a space after that).
 731                 # Although none of them currently do, let it recognize
 732                 # something like
 733                 #
 734                 #   "... text").  The next sentence ...
 735                 #
 736                 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1  $2/xg;
 737
 738                 # If this line has extra indentation or looks to have columns,
 739                 # it should be treated as verbatim.  Columns are indicated by
 740                 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
 741                 # not preceded by punctuation.
 742                 if ($_ !~ m/  ^ \s
 743                               | \S (?:                    \t
 744                                     |                     \s{3}
 745                                     |  (*nlb:[[:punct:]]) \s{2}
 746                                    )
 747                            /x)
 748                 {
 749                     # But here, is not a verbatim line.  Add an empty line if
 750                     # this is the first non-verbatim after a run of verbatims
 751                     if ($description[-1] =~ /^\s/) {
 752                         push @description, "\n", $_;
 753                     }
 754                     else {  # Otherwise, append this flowing line to the
 755                             # current flowing line
 756                         $description[-1] .= $_;
 757                     }
 758                 }
 759                 else {
 760                     $configs{$name}{verbatim} = 1;
 761
 762                     # The first verbatim line in a run of them is separated by an
 763                     # empty line from the flowing lines above it
 764                     push @description, "\n" if $description[-1] =~ /^\S/;
 765
 766                     $_ = Text::Tabs::expand($_);
 767
 768                     # Only a single space so less likely to wrap
 769                     s/ ^ \s* / /x;
 770
 771                     push @description, $_;
 772                 }
 773             }
 774
 775             push $configs{$name}{description}->@*, @description
 776
 777         }   # Not a description; see if it is a macro definition.
 778         elsif (m! ^
 779                   (?: / \* )?                   # Optional commented-out
 780                                                 # indication
 781                       \# \s* define \s+ ( \w+ ) # $1 is the name
 782                   (   \s* )                     # $2 indicates if args or not
 783                   (   .*? )                     # $3 is any definition
 784                   (?: / \s* \* \* / )?          # Optional trailing /**/ or / **/
 785                   $
 786                 !x)
 787         {
 788             my $name = $1;
 789
 790             # There can be multiple definitions for a name.  We want to know
 791             # if any of them has arguments, and if any has a body.
 792             $configs{$name}{has_args} //= $2 eq "";
 793             $configs{$name}{has_args} ||= $2 eq "";
 794             $configs{$name}{has_defn} //= $3 ne "";
 795             $configs{$name}{has_defn} ||= $3 ne "";
 796         }
 797     }
 798
 799     # We now have stored the description and information about every #define
 800     # in the file.  The description is in a form convenient to operate on to
 801     # convert to pod.  Do that now.
 802     foreach my $name (keys %configs) {
 803         next unless defined $configs{$name}{description};
 804
 805         # All adjacent non-verbatim lines of the description are appended
 806         # together in a single element in the array.  This allows the patterns
 807         # to work across input line boundaries.
 808
 809         my $pod = "";
 810         while (defined ($_ = shift $configs{$name}{description}->@*)) {
 811             chomp;
 812
 813             if (/ ^ \S /x) {  # Don't edit verbatim lines
 814
 815                 # Enclose known file/path names not already so enclosed
 816                 # with <...>.  (Some entries in config.h are already
 817                 # '<path/to/file>')
 818                 my $file_name_qr = qr! [ \w / ]+ \.
 819                                     (?: c | h | xs | p [lm] | pmc | PL
 820                                         | sh | SH | exe ) \b
 821                                     !xx;
 822                 my $path_name_qr = qr! (?: / \w+ )+ !x;
 823                 for my $re ($file_name_qr, $path_name_qr) {
 824                     s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
 825                 }
 826
 827                 # Enclose <... file/path names with F<...> (but no double
 828                 # angle brackets)
 829                 for my $re ($file_name_qr, $path_name_qr) {
 830                     s! < ( $re ) > !F<$1>!gxx;
 831                 }
 832
 833                 # Explain metaconfig units
 834                 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
 835
 836                 # Convert "See foo" to "See C<L</foo>>" if foo is described in
 837                 # this file.  Also create a link to the known file INSTALL.
 838                 # And, to be more general, handle "See also foo and bar", and
 839                 # "See also foo, bar, and baz"
 840                 while (m/ \b [Ss]ee \s+
 841                          (?: also \s+ )?    ( \w+ )
 842                          (?: ,  \s+         ( \w+ ) )?
 843                          (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
 844                     my @links = $1;
 845                     push @links, $2 if defined $2;
 846                     push @links, $3 if defined $3;
 847                     foreach my $link (@links) {
 848                         if ($link eq 'INSTALL') {
 849                             s/ \b INSTALL \b /C<L<INSTALL>>/xg;
 850                         }
 851                         elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
 852                             s| \b $link \b |C<L</$link>>|xg;
 853                             $configs{$link}{linked} = 1;
 854                             $configs{$name}{linked} = 1;
 855                         }
 856                     }
 857                 }
 858
 859                 # Enclose what we think are symbols with C<...>.
 860                 no warnings 'experimental::vlb';
 861                 s/ (*nlb:<)
 862                    (
 863                         # Any word followed immediately with parens or
 864                         # brackets
 865                         \b \w+ (?: \( [^)]* \)    # parameter list
 866                                  | \[ [^]]* \]    # or array reference
 867                                )
 868                     | (*plb: ^ | \s ) -D \w+    # Also -Dsymbols.
 869                     | \b (?: struct | union ) \s \w+
 870
 871                         # Words that contain underscores (which are
 872                         # definitely not text) or three uppercase letters in
 873                         # a row.  Length two ones, like IV, aren't enclosed,
 874                         # because they often don't look as nice.
 875                     | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
 876                    )
 877                     (*nla:>)
 878                  /C<$1>/xg;
 879
 880                 # These include foo when the name is HAS_foo.  This is a
 881                 # heuristic which works in most cases.
 882                 if ($name =~ / ^ HAS_ (.*) /x) {
 883                     my $symbol = lc $1;
 884
 885                     # Don't include path components, nor things already in
 886                     # <>, or with trailing '(', '['
 887                     s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
 888                 }
 889             }
 890
 891             $pod .=  "$_\n";
 892         }
 893         delete $configs{$name}{description};
 894
 895         $configs{$name}{pod} = $pod;
 896     }
 897
 898     # Now have converted the description to pod.  We also now have enough
 899     # information that we can do cross checking to find definitions without
 900     # corresponding pod, and see if they are mentioned in some description;
 901     # otherwise they aren't documented.
 902   NAME:
 903     foreach my $name (keys %configs) {
 904
 905         # A definition without pod
 906         if (! defined $configs{$name}{pod}) {
 907
 908             # Leading/trailing underscore means internal to config.h, e.g.,
 909             # _GNU_SOURCE
 910             next if $name =~ / ^ _ /x;
 911             next if $name =~ / _ $ /x;
 912
 913             # MiXeD case names are internal to config.h; the first 4
 914             # characters are sufficient to determine this
 915             next if $name =~ / ^ [[:upper:]] [[:lower:]]
 916                                  [[:upper:]] [[:lower:]]
 917                             /x;
 918
 919             # Here, not internal to config.h.  Look to see if this symbol is
 920             # mentioned in the pod of some other.  If so, assume it is
 921             # documented.
 922             foreach my $check_name (keys %configs) {
 923                 my $this_element = $configs{$check_name};
 924                 my $this_pod = $this_element->{pod};
 925                 if (defined $this_pod) {
 926                     next NAME if $this_pod =~ / \b $name \b /x;
 927                 }
 928             }
 929
 930             warn "$name has no documentation\n";
 931             $missing_macros{$name} = 'config.h';
 932
 933             next;
 934         }
 935
 936         my $has_defn = $configs{$name}{has_defn};
 937         my $has_args = $configs{$name}{has_args};
 938
 939         # Check if any section already has an entry for this element.
 940         # If so, it better be a placeholder, in which case we replace it
 941         # with this entry.
 942         foreach my $section (keys $docs{'api'}->%*) {
 943             if (exists $docs{'api'}{$section}{$name}) {
 944                 my $was = $docs{'api'}{$section}{$name}->{pod};
 945                 $was = "" unless $was;
 946                 chomp $was;
 947                 if ($was ne "" && $was !~ m/$link_text/) {
 948                     die "Multiple descriptions for $name\n"
 949                         . "$section contained '$was'";
 950                 }
 951                 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
 952                 $configs{$name}{section} = $section;
 953                 last;
 954             }
 955         }
 956
 957         my $handled = 0;    # Haven't handled this yet
 958
 959         if (defined $configs{$name}{'section'}) {
 960             # This has been taken care of elsewhere.
 961             $handled = 1;
 962         }
 963         else {
 964             my $flags = "";
 965             if ($has_defn && ! $has_args) {
 966                 $configs{$name}{args} = 1;
 967             }
 968
 969             # Symbols of the form I_FOO are for #include files.  They have
 970             # special usage information
 971             if ($name =~ / ^ I_ ( .* ) /x) {
 972                 my $file = lc $1 . '.h';
 973                 $configs{$name}{usage} = <<~"EOT";
 974                     #ifdef $name
 975                         #include <$file>
 976                     #endif
 977                     EOT
 978             }
 979
 980             # Compute what section this variable should go into.  This
 981             # heuristic was determined by manually inspecting the current
 982             # things in config.h, and should be adjusted as necessary as
 983             # deficiencies are found.
 984             #
 985             # This is the default section for macros with a definiton but
 986             # no arguments, meaning it is replaced unconditionally
 987             #
 988             my $sb = qr/ _ | \b /x; # segment boundary
 989             my $dash_or_spaces = qr/ - | \s+ /x;
 990             my $pod = $configs{$name}{pod};
 991             if ($name =~ / ^ USE_ /x) {
 992                 $configs{$name}{'section'} = $site_scn;
 993             }
 994             elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
 995             {
 996                 $configs{$name}{'section'} = $time_scn;
 997             }
 998             elsif (   $name =~ / ^ [[:alpha:]]+ f $ /x
 999                    && $configs{$name}{pod} =~ m/ \b format \b /ix)
1000             {
1001                 $configs{$name}{'section'} = $formats_scn;
1002             }
1003             elsif ($name =~ /  DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
1004                             | $sb CASTFLAGS $sb
1005                             | QUADMATH
1006                             | $sb (?: IS )? NAN
1007                             | $sb (?: IS )? FINITE
1008                             /x)
1009             {
1010                 $configs{$name}{'section'} =
1011                                     $floating_scn;
1012             }
1013             elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
1014                 $configs{$name}{'section'} = $filesystem_scn;
1015             }
1016             elsif (   $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
1017                    || $configs{$name}{pod} =~ m/ \b align /x)
1018             {
1019                 $configs{$name}{'section'} = $compiler_scn;
1020             }
1021             elsif ($name =~ / ^ [IU] [ \d V ]
1022                             | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
1023             {
1024                 $configs{$name}{'section'} = $integer_scn;
1025             }
1026             elsif ($name =~ / $sb t $sb /x) {
1027                 $configs{$name}{'section'} = $typedefs_scn;
1028                 $flags .= 'y';
1029             }
1030             elsif (   $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
1031                     && $configs{$name}{pod} =~ m/ \b format \b /ix)
1032             {
1033                 $configs{$name}{'section'} = $formats_scn;
1034             }
1035             elsif ($name =~ / BACKTRACE /x) {
1036                 $configs{$name}{'section'} = $debugging_scn;
1037             }
1038             elsif ($name =~ / ALLOC $sb /x) {
1039                 $configs{$name}{'section'} = $memory_scn;
1040             }
1041             elsif (   $name =~ /   STDIO | FCNTL | EOF | FFLUSH
1042                                 | $sb FILE $sb
1043                                 | $sb DIR $sb
1044                                 | $sb LSEEK
1045                                 | $sb INO $sb
1046                                 | $sb OPEN
1047                                 | $sb CLOSE
1048                                 | ^ DIR
1049                                 | ^ INO $sb
1050                                 | DIR $
1051                                 | FILENAMES
1052                                 /x
1053                     || $configs{$name}{pod} =~ m!  I/O | stdio
1054                                                 | file \s+ descriptor
1055                                                 | file \s* system
1056                                                 | statfs
1057                                                 !x)
1058             {
1059                 $configs{$name}{'section'} = $filesystem_scn;
1060             }
1061             elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
1062                 $configs{$name}{'section'} = $signals_scn;
1063             }
1064             elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
1065                 $configs{$name}{'section'} = $prototypes_scn;
1066             }
1067             elsif (   $name =~ / ^ LOC_ /x
1068                     || $configs{$name}{pod} =~ /full path/i)
1069             {
1070                 $configs{$name}{'section'} = $paths_scn;
1071             }
1072             elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
1073                 $configs{$name}{'section'} = $locale_scn;
1074             }
1075             elsif ($configs{$name}{pod} =~ /  GCC | C99 | C\+\+ /xi) {
1076                 $configs{$name}{'section'} = $compiler_scn;
1077             }
1078             elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
1079             {
1080                 $configs{$name}{'section'} = $password_scn;
1081             }
1082             elsif ($name =~ /  SOCKET | $sb SOCK /x) {
1083                 $configs{$name}{'section'} = $sockets_scn;
1084             }
1085             elsif (   $name =~ / THREAD | MULTIPLICITY /x
1086                     || $configs{$name}{pod} =~ m/ \b pthread /ix)
1087             {
1088                 $configs{$name}{'section'} = $concurrency_scn;
1089             }
1090             elsif ($name =~ /  PERL | ^ PRIV | SITE | ARCH | BIN
1091                                 | VENDOR | ^ USE
1092                             /x)
1093             {
1094                 $configs{$name}{'section'} = $site_scn;
1095             }
1096             elsif (   $pod =~ / \b floating $dash_or_spaces point \b /ix
1097                    || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
1098                    || $pod =~ / \b doubles \b /ix
1099                    || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
1100             {
1101                 $configs{$name}{'section'} =
1102                                     $floating_scn;
1103             }
1104             else {
1105                 # Above are the specific sections.  The rest go into a
1106                 # grab-bag of general configuration values.  However, we put
1107                 # two classes of them into lists of their names, without their
1108                 # descriptions, when we think that the description doesn't add
1109                 # any real value.  One list contains the #include variables:
1110                 # the description is basically boiler plate for each of these.
1111                 # The other list contains the very many things that are of the
1112                 # form HAS_foo, and \bfoo\b is contained in its description,
1113                 # and there is no verbatim text in the pod or links to/from it
1114                 # (which would add value).  That means that it is likely the
1115                 # intent of the variable can be gleaned from just its name,
1116                 # and unlikely the description adds signficant value, so just
1117                 # listing them suffices.  Giving their descriptions would
1118                 # expand this pod significantly with little added value.
1119                 if (   ! $has_defn
1120                     && ! $configs{$name}{verbatim}
1121                     && ! $configs{$name}{linked})
1122                 {
1123                     if ($name =~ / ^ I_ ( .* ) /x) {
1124                         push @include_defs, $name;
1125                         next;
1126                     }
1127                     elsif ($name =~ / ^ HAS_ ( .* ) /x) {
1128                         my $canonical_name = $1;
1129                         $canonical_name =~ s/_//g;
1130
1131                         my $canonical_pod = $configs{$name}{pod};
1132                         $canonical_pod =~ s/_//g;
1133
1134                         if ($canonical_pod =~ / \b $canonical_name \b /xi) {
1135                             if ($name =~ / $sb R $sb /x) {
1136                                 push @has_r_defs, $name;
1137                             }
1138                             else {
1139                                 push @has_defs, $name;
1140                             }
1141                             next;
1142                         }
1143                     }
1144                 }
1145
1146                 $configs{$name}{'section'} = $genconfig_scn;
1147             }
1148
1149             my $section = $configs{$name}{'section'};
1150             die "Internal error: '$section' not in \%valid_sections"
1151                             unless grep { $_ eq $section } keys %valid_sections;
1152             $flags .= 'AdmnT';
1153             $flags .= 'U' unless defined $configs{$name}{usage};
1154
1155             # All the information has been gathered; save it
1156             $docs{'api'}{$section}{$name}{flags} = $flags;
1157             $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1158             $docs{'api'}{$section}{$name}{ret_type} = "";
1159             $docs{'api'}{$section}{$name}{file} = 'config.h';
1160             $docs{'api'}{$section}{$name}{usage}
1161                 = $configs{$name}{usage} if defined $configs{$name}{usage};
1162             push $docs{'api'}{$section}{$name}{args}->@*, ();
1163             push $docs{'api'}{$section}{$name}{items}->@*, ();
1164         }
1165     }
1166 }
1167
1168 sub format_pod_indexes($) {
1169     my $entries_ref = shift;
1170
1171     # Output the X<> references to the names, packed since they don't get
1172     # displayed, but not too many per line so that when someone is editing the
1173     # file, it doesn't run on
1174
1175     my $text ="";
1176     my $line_length = 0;
1177     for my $name (sort dictionary_order $entries_ref->@*) {
1178         my $entry = "X<$name>";
1179         my $entry_length = length $entry;
1180
1181         # Don't loop forever if we have a verrry long name, and don't go too
1182         # far to the right.
1183         if ($line_length > 0 && $line_length + $entry_length > $max_width) {
1184             $text .= "\n";
1185             $line_length = 0;
1186         }
1187
1188         $text .= $entry;
1189         $line_length += $entry_length;
1190     }
1191
1192     return $text;
1193 }
1194
1195 sub docout ($$$) { # output the docs for one function
1196     my($fh, $element_name, $docref) = @_;
1197
1198     # Trim trailing space
1199     $element_name =~ s/\s*$//;
1200
1201     my $flags = $docref->{flags};
1202     my $pod = $docref->{pod} // "";
1203     my $file = $docref->{file};
1204
1205     my @items = $docref->{items}->@*;
1206
1207     # Make the main element the first of the items.  This allows uniform
1208     # treatment below
1209     unshift @items, {   name => $element_name,
1210                         flags => $flags,
1211                         ret_type => $docref->{ret_type},
1212                         args => [ $docref->{args}->@* ],
1213                     };
1214
1215     warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
1216
1217     print $fh "\n=over $description_indent\n";
1218     print $fh "\n=item C<$_->{name}>\n" for @items;
1219
1220     # If we're printing only a link to an element, this isn't the major entry,
1221     # so no X<> here.
1222     if ($flags !~ /h/) {
1223         print $fh "X<$_->{name}>" for @items;
1224         print $fh "\n";
1225     }
1226
1227     for my $item (@items) {
1228         if ($item->{flags} =~ /D/) {
1229             print $fh <<~"EOT";
1230
1231                 C<B<DEPRECATED!>>  It is planned to remove C<$item->{name}> from a
1232                 future release of Perl.  Do not use it for new code; remove it from
1233                 existing code.
1234                 EOT
1235         }
1236         elsif ($item->{flags} =~ /x/) {
1237             print $fh <<~"EOT";
1238
1239                 NOTE: C<$item->{name}> is B<experimental> and may change or be
1240                 removed without notice.
1241                 EOT
1242         }
1243     }
1244
1245     chomp $pod;     # Make sure prints pod with a single trailing \n
1246     print $fh "\n", $pod, "\n";
1247
1248     for my $item (@items) {
1249         my $item_flags = $item->{flags};
1250         my $item_name = $item->{name};
1251
1252         print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1253                                                     if $item_flags =~ /O/;
1254         # Is Perl_, but no #define foo # Perl_foo
1255         if (($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1256
1257              # Can't handle threaded varargs
1258          || ($item_flags =~ /f/ && $item_flags !~ /T/))
1259         {
1260             $item->{name} = "Perl_$item_name";
1261             print $fh <<~"EOT";
1262
1263                 NOTE: C<$item_name> must be explicitly called as
1264                 C<$item->{name}>
1265                 EOT
1266             print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1267             print $fh ".\n";
1268         }
1269     }
1270
1271     if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1272                             # to never warrant a usage line
1273         warn("U and s flags are incompatible")
1274                                             if $flags =~ /U/ && $flags =~ /s/;
1275         # nothing
1276     } else {
1277
1278         print $fh "\n=over $usage_indent\n";
1279
1280         if (defined $docref->{usage}) {     # An override of the usage section
1281             print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1282         }
1283         else {
1284
1285             # Add the thread context formal parameter on expanded-out names
1286             for my $item (@items) {
1287                 unshift $item->{args}->@*, (($item->{args}->@*)
1288                                             ? "pTHX_"
1289                                             : "pTHX")
1290                                                    if $item->{flags} !~ /T/
1291                                                    && $item->{name} =~ /^Perl_/;
1292             }
1293
1294             # Look through all the items in this entry.  If all have the same
1295             # return type and arguments, only the main entry is displayed.
1296             # Also, find the longest return type and longest name so that if
1297             # multiple ones are shown, they can be vertically aligned nicely
1298             my $need_individual_usage = 0;
1299             my $longest_name_length = length $items[0]->{name};
1300             my $base_ret_type = $items[0]->{ret_type};
1301             my $longest_ret = length $base_ret_type;
1302             my @base_args = $items[0]->{args}->@*;
1303             for (my $i = 1; $i < @items; $i++) {
1304                 no warnings 'experimental::smartmatch';
1305                 my $item = $items[$i];
1306                 $need_individual_usage = 1
1307                                     if    $item->{ret_type} ne $base_ret_type
1308                                     || ! ($item->{args}->@* ~~ @base_args);
1309                 my $ret_length = length $item->{ret_type};
1310                 $longest_ret = $ret_length if $ret_length > $longest_ret;
1311                 my $name_length = length $item->{name};
1312                 $longest_name_length = $name_length
1313                                         if $name_length > $longest_name_length;
1314             }
1315
1316             # If we're only showing one entry, only its length matters.
1317             $longest_name_length = length($items[0]->{name})
1318                                                 unless $need_individual_usage;
1319             print $fh "\n";
1320
1321             my $indent = 1;     # 1 is sufficient for verbatim; =over is used
1322                                 # for more
1323             my $ret_name_sep_length = 2; # spaces between return type and name
1324             my $name_indent = $indent + $longest_ret;
1325             $name_indent += $ret_name_sep_length if $longest_ret;
1326
1327             my $this_max_width =
1328                                $max_width - $description_indent - $usage_indent;
1329
1330             for my $item (@items) {
1331                 my $ret_type = $item->{ret_type};
1332                 my @args = $item->{args}->@*;
1333                 my $name = $item->{name};
1334                 my $item_flags = $item->{flags};
1335
1336                 # The return type
1337                 print $fh (" " x $indent), $ret_type;
1338
1339                 print $fh " " x (  $ret_name_sep_length
1340                                  + $longest_ret - length $ret_type);
1341                 print $fh $name;
1342
1343                 if ($item_flags =~ /n/) { # no args
1344                     warn("$file: $element_name: n flag without m")
1345                                                     unless $item_flags =~ /m/;
1346                     warn("$file: $name: n flag but apparently has args")
1347                                                                     if @args;
1348                 }
1349                 else {
1350                     # +1 for the '('
1351                     my $arg_indent = $name_indent + $longest_name_length + 1;
1352
1353                     # Align the argument lists of the items
1354                     print $fh " " x ($longest_name_length - length($name));
1355                     print $fh "(";
1356
1357                     # Display as many of the arguments on the same line as
1358                     # will fit.
1359                     my $total_length = $arg_indent;
1360                     my $first_line = 1;
1361                     for (my $i = 0; $i < @args; $i++) {
1362                         my $arg = $args[$i];
1363                         my $arg_length = length($arg);
1364
1365                         # All but the first arg are preceded by a blank
1366                         my $use_blank = $i > 0;
1367
1368                         # +1 here and below because either the argument has a
1369                         # trailing comma or trailing ')'
1370                         $total_length += $arg_length + $use_blank + 1;
1371
1372                         # We want none of the arguments to be positioned so
1373                         # they extend too far to the right.  Ideally, they
1374                         # should all start in the same column as the arguments
1375                         # on the first line of the function display do.  But, if
1376                         # necessary, outdent them so that they all start in
1377                         # another column, with the longest ending at the right
1378                         # margin, like so:
1379                         #                   void  function_name(pTHX_ short1,
1380                         #                                    short2,
1381                         #                                    very_long_argument,
1382                         #                                    short3)
1383                         if ($total_length > $this_max_width) {
1384
1385                             # If this is the first continuation line,
1386                             # calculate the longest argument; this will be the
1387                             # one we may have to outdent for.
1388                             if ($first_line) {
1389                                 $first_line = 0;
1390
1391                                 # We will need at least as much as the current
1392                                 # argument
1393                                 my $longest_arg_length = $arg_length
1394                                                        + $use_blank + 1;
1395
1396                                 # Look through the rest of the args to see if
1397                                 # any are longer than this one.
1398                                 for (my $j = $i + 1; $j < @args; $j++) {
1399
1400                                     # Include the trailing ',' or ')' in the
1401                                     # length.  No need to concern ourselves
1402                                     # with a leading blank, as the argument
1403                                     # would be positioned first on the next
1404                                     # line
1405                                     my $peek_arg_length = length ($args[$j])
1406                                                         + 1;
1407                                     $longest_arg_length = $peek_arg_length
1408                                       if $peek_arg_length > $longest_arg_length;
1409                                 }
1410
1411                                 # Calculate the new indent if necessary.
1412                                 $arg_indent =
1413                                         $this_max_width - $longest_arg_length
1414                                         if $arg_indent + $longest_arg_length
1415                                                             > $this_max_width;
1416                             }
1417
1418                             print $fh "\n", (" " x $arg_indent);
1419                             $total_length = $arg_indent + $arg_length + 1;
1420                             $use_blank = 0;
1421                         }
1422
1423                         # Display this argument
1424                         print $fh " " if $use_blank;
1425                         print $fh $arg;
1426                         print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1427
1428                     } # End of loop through args
1429
1430                     print $fh ")";
1431                 }
1432
1433                 print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;"
1434                 print $fh "\n";
1435
1436                 # Only the first entry is normally displayed
1437                 last unless $need_individual_usage;
1438             }
1439         }
1440
1441         print $fh "\n=back\n";
1442     }
1443
1444     print $fh "\n=back\n";
1445     print $fh "\n=for hackers\nFound in file $file\n";
1446 }
1447
1448 sub construct_missings_section {
1449     my ($pod_name, $missings_ref) = @_;
1450     my $text = "";
1451
1452     return $text unless $missings_ref->@*;
1453
1454     $text .= <<~EOT;
1455
1456         =head1 $undocumented_scn
1457
1458         EOT
1459     if ($pod_name eq 'perlapi') {
1460         $text .= <<~'EOT';
1461             The following functions have been flagged as part of the public
1462             API, but are currently undocumented.  Use them at your own risk,
1463             as the interfaces are subject to change.  Functions that are not
1464             listed in this document are not intended for public use, and
1465             should NOT be used under any circumstances.
1466
1467             If you feel you need to use one of these functions, first send
1468             email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
1469             It may be that there is a good reason for the function not being
1470             documented, and it should be removed from this list; or it may
1471             just be that no one has gotten around to documenting it.  In the
1472             latter case, you will be asked to submit a patch to document the
1473             function.  Once your patch is accepted, it will indicate that the
1474             interface is stable (unless it is explicitly marked otherwise) and
1475             usable by you.
1476             EOT
1477     }
1478     else {
1479         $text .= <<~'EOT';
1480             The following functions are currently undocumented.  If you use
1481             one of them, you may wish to consider creating and submitting
1482             documentation for it.
1483             EOT
1484     }
1485
1486     $text .= "\n" . format_pod_indexes($missings_ref);
1487
1488     # Sort the elements.
1489     my @missings = sort dictionary_order $missings_ref->@*;
1490
1491
1492     $text .= "\n";
1493
1494     use integer;
1495
1496     # Look through all the elements in the list and see how many columns we
1497     # could place them in the output what will fit in the available width.
1498     my $min_spacer = 2;     # Need this much space between columns
1499     my $columns;
1500     my $rows;
1501     my @col_widths;
1502
1503   COLUMN:
1504     # We start with more columns, and work down until we find a number that
1505     # can accommodate all the data.  This algorithm doesn't require the
1506     # resulting columns to all have the same width.  This can allow for
1507     # as tight of packing as the data will possibly allow.
1508     for ($columns = 7; $columns > 1; $columns--) {
1509
1510         # For this many columns, we will need this many rows (final row might
1511         # not be completely filled)
1512         $rows = (@missings + $columns - 1) / $columns;
1513
1514         my $row_width = 0;
1515         my $i = 0;  # Which missing element
1516
1517         # For each column ...
1518         for my $col (0 .. $columns - 1) {
1519
1520             # Calculate how wide the column needs to be, which is based on the
1521             # widest element in it
1522             $col_widths[$col] = 0;
1523
1524             # Look through all the rows to find the widest element
1525             for my $row (0 .. $rows - 1) {
1526
1527                 # Skip if this row doesn't have an entry for this column
1528                 last if $i >= @missings;
1529
1530                 # This entry occupies this many bytes.
1531                 my $this_width = length $missings[$i];
1532
1533                 # All but the final column need a spacer between it and the
1534                 # next column over.
1535                 $this_width += $min_spacer if $col < $columns - 1;
1536
1537
1538                 # This column will need to have enough width to accommodate
1539                 # this element
1540                 if ($this_width > $col_widths[$col]) {
1541
1542                     # We can't have this many columns if the total width
1543                     # exceeds the available; bail now and try fewer columns
1544                     next COLUMN if $row_width + $this_width > $max_width;
1545
1546                     $col_widths[$col] = $this_width;
1547                 }
1548
1549                 $i++;   # The next row will contain the next item
1550             }
1551
1552             $row_width += $col_widths[$col];
1553             next COLUMN if $row_width > $max_width;
1554         }
1555
1556         # If we get this far, this many columns works
1557         last;
1558     }
1559
1560     # Here, have calculated the number of rows ($rows) and columns ($columns)
1561     # required to list the elements.  @col_widths contains the width of each
1562     # column.
1563
1564     $text .= "\n\n=over $description_indent\n\n";
1565
1566     # Assemble the output
1567     for my $row (0 .. $rows - 1) {
1568         for my $col (0 .. $columns - 1) {
1569             $text .= " " if $col == 0;  # Indent one to mark as verbatim
1570
1571             my $index = $row + $rows * $col;  # Convert 2 dimensions to 1
1572
1573             # Skip if this row doesn't have an entry for this column
1574             next if $index >= @missings;
1575
1576             my $element = $missings[$index];
1577             $text .= $element;
1578
1579             # Add alignment spaces for all but final column
1580             $text .= " " x ($col_widths[$col] - length $element)
1581                                                         if $col < $columns - 1;
1582         }
1583
1584         $text .= "\n";  # End of row
1585     }
1586
1587     $text .= "\n=back\n";
1588
1589     return $text;
1590 }
1591
1592 sub dictionary_order {
1593     # Do a case-insensitive dictionary sort, with only alphabetics
1594     # significant, falling back to using everything for determinancy
1595     return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
1596            || uc($a) cmp uc($b)
1597            || $a cmp $b;
1598 }
1599
1600 sub output {
1601     my ($podname, $header, $dochash, $missings_ref, $footer) = @_;
1602     #
1603     # strip leading '|' from each line which had been used to hide
1604     # pod from pod checkers.
1605     s/^\|//gm for $header, $footer;
1606
1607     my $fh = open_new("pod/$podname.pod", undef,
1608                       {by => "$0 extracting documentation",
1609                        from => 'the C source files'}, 1);
1610
1611     print $fh $header, "\n";
1612
1613     for my $section_name (sort dictionary_order keys %valid_sections) {
1614         my $section_info = $dochash->{$section_name};
1615
1616         # We allow empty sections in perlintern.
1617         if (! $section_info && $podname eq 'perlapi') {
1618             warn "Empty section '$section_name'; skipped";
1619             next;
1620         }
1621
1622         print $fh "\n=head1 $section_name\n";
1623
1624         if ($podname eq 'perlapi') {
1625             print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1626                                 if defined $valid_sections{$section_name}{header};
1627
1628             # Output any heading-level documentation and delete so won't get in
1629             # the way later
1630             if (exists $section_info->{""}) {
1631                 print $fh "\n", $section_info->{""}, "\n";
1632                 delete $section_info->{""};
1633             }
1634         }
1635
1636         if ($section_info && keys $section_info->%*) {
1637             for my $function_name (sort dictionary_order keys %$section_info) {
1638                 docout($fh, $function_name, $section_info->{$function_name});
1639             }
1640         }
1641         else {
1642             print $fh "\nThere are only public API items currently in $section_name\n";
1643         }
1644
1645         print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
1646                             if $podname eq 'perlapi'
1647                             && defined $valid_sections{$section_name}{footer};
1648     }
1649
1650     print $fh construct_missings_section($podname, $missings_ref);
1651
1652     print $fh "\n$footer\n=cut\n";
1653
1654     read_only_bottom_close_and_rename($fh);
1655 }
1656
1657 foreach (@{(setup_embed())[0]}) {
1658     next if @$_ < 2;
1659     my ($flags, $ret_type, $func, @args) = @$_;
1660     s/\b(?:NN|NULLOK)\b\s+//g for @args;
1661
1662     $funcflags{$func} = {
1663                          flags => $flags,
1664                          ret_type => $ret_type,
1665                          args => \@args,
1666                         };
1667 }
1668
1669 # glob() picks up docs from extra .c or .h files that may be in unclean
1670 # development trees.
1671 open my $fh, '<', 'MANIFEST'
1672     or die "Can't open MANIFEST: $!";
1673 while (my $line = <$fh>) {
1674     next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
1675
1676     # Don't pick up pods from these.  (We may pick up generated stuff from
1677     # /lib though)
1678     next if $file =~ m! ^ ( cpan | dist | ext ) / !x;
1679
1680     open F, '<', $file or die "Cannot open $file for docs: $!\n";
1681     autodoc(\*F,$file);
1682     close F or die "Error closing $file: $!\n";
1683 }
1684 close $fh or die "Error whilst reading MANIFEST: $!";
1685
1686 parse_config_h();
1687
1688 for (sort keys %funcflags) {
1689     next unless $funcflags{$_}{flags} =~ /d/;
1690     next if $funcflags{$_}{flags} =~ /h/;
1691     warn "no docs for $_\n";
1692 }
1693
1694 foreach (sort keys %missing) {
1695     warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
1696 }
1697
1698 # List of funcs in the public API that aren't also marked as core-only,
1699 # experimental nor deprecated.
1700 my @missing_api = grep $funcflags{$_}{flags} =~ /A/
1701                     && $funcflags{$_}{flags} !~ /[xD]/
1702                     && !$docs{api}{$_}, keys %funcflags;
1703 push @missing_api, keys %missing_macros;
1704
1705 my @other_places = ( qw(perlclib perlxs), keys %described_elsewhere );
1706 my $places_other_than_intern = join ", ",
1707             map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
1708 my $places_other_than_api = join ", ",
1709             map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
1710
1711 # The S< > makes things less densely packed, hence more readable
1712 my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
1713 my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs;
1714 $valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1715 $valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1716
1717 my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
1718 $valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1719
1720 my $section_list = join "\n\n", map { "=item L</$_>" }
1721                                 sort(dictionary_order keys %valid_sections),
1722                                 $undocumented_scn;  # Keep last
1723
1724 output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
1725 |=encoding UTF-8
1726 |
1727 |=head1 NAME
1728 |
1729 |perlapi - autogenerated documentation for the perl public API
1730 |
1731 |=head1 DESCRIPTION
1732 |X<Perl API> X<API> X<api>
1733 |
1734 |This file contains most of the documentation of the perl public API, as
1735 |generated by F<embed.pl>.  Specifically, it is a listing of functions,
1736 |macros, flags, and variables that may be used by extension writers.  Besides
1737 |L<perlintern> and F<config.h>, some items are listed here as being actually
1738 |documented in another pod.
1739 |
1740 |L<At the end|/$undocumented_scn> is a list of functions which have yet
1741 |to be documented.  Patches welcome!  The interfaces of these are subject to
1742 |change without notice.
1743 |
1744 |Some of the functions documented here are consolidated so that a single entry
1745 |serves for multiple functions which all do basically the same thing, but have
1746 |some slight differences.  For example, one form might process magic, while
1747 |another doesn't.  The name of each variation is listed at the top of the
1748 |single entry.  But if all have the same signature (arguments and return type)
1749 |except for their names, only the usage for the base form is shown.  If any
1750 |one of the forms has a different signature (such as returning C<const> or
1751 |not) every function's signature is explicitly displayed.
1752 |
1753 |Anything not listed here or in the other mentioned pods is not part of the
1754 |public API, and should not be used by extension writers at all.  For these
1755 |reasons, blindly using functions listed in F<proto.h> is to be avoided when
1756 |writing extensions.
1757 |
1758 |In Perl, unlike C, a string of characters may generally contain embedded
1759 |C<NUL> characters.  Sometimes in the documentation a Perl string is referred
1760 |to as a "buffer" to distinguish it from a C string, but sometimes they are
1761 |both just referred to as strings.
1762 |
1763 |Note that all Perl API global variables must be referenced with the C<PL_>
1764 |prefix.  Again, those not listed here are not to be used by extension writers,
1765 |and can be changed or removed without notice; same with macros.
1766 |Some macros are provided for compatibility with the older,
1767 |unadorned names, but this support may be disabled in a future release.
1768 |
1769 |Perl was originally written to handle US-ASCII only (that is characters
1770 |whose ordinal numbers are in the range 0 - 127).
1771 |And documentation and comments may still use the term ASCII, when
1772 |sometimes in fact the entire range from 0 - 255 is meant.
1773 |
1774 |The non-ASCII characters below 256 can have various meanings, depending on
1775 |various things.  (See, most notably, L<perllocale>.)  But usually the whole
1776 |range can be referred to as ISO-8859-1.  Often, the term "Latin-1" (or
1777 |"Latin1") is used as an equivalent for ISO-8859-1.  But some people treat
1778 |"Latin1" as referring just to the characters in the range 128 through 255, or
1779 |sometimes from 160 through 255.
1780 |This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1781 |
1782 |Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1783 |L<perlebcdic>).  Most of the documentation (and even comments in the code)
1784 |ignore the EBCDIC possibility.
1785 |For almost all purposes the differences are transparent.
1786 |As an example, under EBCDIC,
1787 |instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1788 |whenever this documentation refers to C<utf8>
1789 |(and variants of that name, including in function names),
1790 |it also (essentially transparently) means C<UTF-EBCDIC>.
1791 |But the ordinals of characters differ between ASCII, EBCDIC, and
1792 |the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1793 |number of bytes than in UTF-8.
1794 |
1795 |The organization of this document is tentative and subject to change.
1796 |Suggestions and patches welcome
1797 |L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1798 |
1799 |The sections in this document currently are
1800 |
1801 |=over
1802
1803 |$section_list
1804 |
1805 |=back
1806 |
1807 |The listing below is alphabetical, case insensitive.
1808 _EOB_
1809 |=head1 AUTHORS
1810 |
1811 |Until May 1997, this document was maintained by Jeff Okamoto
1812 |<okamoto\@corp.hp.com>.  It is now maintained as part of Perl itself.
1813 |
1814 |With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1815 |Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1816 |Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1817 |Stephen McCamant, and Gurusamy Sarathy.
1818 |
1819 |API Listing originally by Dean Roehrich <roehrich\@cray.com>.
1820 |
1821 |Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1822 |
1823 |=head1 SEE ALSO
1824 |
1825 |F<config.h>, $places_other_than_api
1826 _EOE_
1827
1828 # List of non-static internal functions
1829 my @missing_guts =
1830  grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
1831
1832 output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
1833 |=head1 NAME
1834 |
1835 |perlintern - autogenerated documentation of purely B<internal>
1836 |Perl functions
1837 |
1838 |=head1 DESCRIPTION
1839 |X<internal Perl functions> X<interpreter functions>
1840 |
1841 |This file is the autogenerated documentation of functions in the
1842 |Perl interpreter that are documented using Perl's internal documentation
1843 |format but are not marked as part of the Perl API.  In other words,
1844 |B<they are not for use in extensions>!
1845
1846 |It has the same sections as L<perlapi>, though some may be empty.
1847 |
1848 _EOB_
1849 |
1850 |=head1 AUTHORS
1851 |
1852 |The autodocumentation system was originally added to the Perl core by
1853 |Benjamin Stuhl.  Documentation is by whoever was kind enough to
1854 |document their functions.
1855 |
1856 |=head1 SEE ALSO
1857 |
1858 |F<config.h>, $places_other_than_intern
1859 _EOE_