Correct code-like snippet in documentation

[perl5.git] / autodoc.pl
diff --git a/autodoc.pl b/autodoc.pl

index e0c09c0..9583901 100644 (file)
--- a/autodoc.pl
+++ b/autodoc.pl
@@ -11,7 +11,10 @@ use Text::Tabs;
  #
  #    embed.fnc
  #    plus all the core .c, .h, and .pod files listed in MANIFEST
-#
+#    plus %extra_input_pods
+
+my %extra_input_pods = ( 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => 1 );
+
  # Has an optional arg, which is the directory to chdir to before reading
  # MANIFEST and the files
  #
@@ -24,15 +27,21 @@ use Text::Tabs;
  # Throughout the files read by this script are lines like
  #
  # =for apidoc_section Section Name
+# =for apidoc_section $section_name_variable
  #
  # "Section Name" (after having been stripped of leading space) must be one of
-# the legal section names, or an error is thrown.  All API elements defined
-# between this line and the next 'apidoc_section' line will go into the
-# section "Section Name", sorted by dictionary order within it.  perlintern
-# and perlapi are parallel documents, each potentially with a section "Section
-# Name".  Each element is marked as to which document it goes into.  If there
-# are none for a particular section in perlapi, that section is
-# omitted.
+# the legal section names, or an error is thrown.  $section_name_variable must
+# be one of the legal section name variables defined below; these expand to
+# legal section names.  This form is used so that minor wording changes in
+# these titles can be confined to this file.  All the names of the variables
+# end in '_scn'; this suffix is optional in the apidoc_section lines.
+#
+# All API elements defined between this line and the next 'apidoc_section'
+# line will go into the section "Section Name" (or $section_name_variable),
+# sorted by dictionary order within it.  perlintern and perlapi are parallel
+# documents, each potentially with a section "Section Name".  Each element is
+# marked as to which document it goes into.  If there are none for a
+# particular section in perlapi, that section is omitted.
  #
  # Also, in .[ch] files, there may be
  #
@@ -44,7 +53,7 @@ use Text::Tabs;
  # are used as a heading for section "Section Name" (in both perlintern and
  # perlapi).  This includes any =head[2-5].  If more than one '=head1 Section
  # Name' line has content, they appear in the generated pod in an undefined
-# order.
+# order.  Note that you can't use a $section_name_variable in =head1 lines
  #
  # The next =head1, =for apidoc_section, or file end terminates what goes into
  # the current section
@@ -60,9 +69,10 @@ use Text::Tabs;
  use strict;
  use warnings;
  
-# 80 column terminal - 1 for pager adding a column; -7 for nroff
-# indent;
-my $max_width = 80 - 1 - 7;
+my $nroff_min_indent = 4;   # for non-heading lines
+# 80 column terminal - 2 for pager adding 2 columns;
+my $max_width = 80 - 2 - $nroff_min_indent;
+my $standard_indent = 4;  # Any additional indentations
  
  if (@ARGV) {
      my $workdir = shift;
@@ -92,7 +102,7 @@ my $link_text = "Described in";
  my $description_indent = 4;
  my $usage_indent = 3;   # + initial blank yields 4 total
  
-my $av_scn = 'AV Handling';
+my $AV_scn = 'AV Handling';
  my $callback_scn = 'Callback Functions';
  my $casting_scn = 'Casting';
  my $casing_scn = 'Character case changing';
@@ -102,57 +112,66 @@ my $scope_scn = 'Compile-time scope hooks';
  my $compiler_scn = 'Compiler and Preprocessor information';
  my $directives_scn = 'Compiler directives';
  my $concurrency_scn = 'Concurrency';
-my $COP_scn = 'COP Hint Hashes';
+my $COP_scn = 'COPs and Hint Hashes';
  my $CV_scn = 'CV Handling';
  my $custom_scn = 'Custom Operators';
-my $dump_scn = 'Display and Dump functions';
-my $embedding_scn = 'Embedding and Interpreter Cloning';
+my $debugging_scn = 'Debugging';
+my $display_scn = 'Display functions';
+my $embedding_scn = 'Embedding, Threads, and Interpreter Cloning';
  my $errno_scn = 'Errno';
  my $exceptions_scn = 'Exception Handling (simple) Macros';
  my $filesystem_scn = 'Filesystem configuration values';
-my $floating_scn = 'Floating point configuration values';
-my $formats_scn = 'Formats';
+my $filters_scn = 'Source Filters';
+my $floating_scn = 'Floating point';
  my $genconfig_scn = 'General Configuration';
  my $globals_scn = 'Global Variables';
-my $GV_scn = 'GV Handling';
+my $GV_scn = 'GV Handling and Stashes';
  my $hook_scn = 'Hook manipulation';
  my $HV_scn = 'HV Handling';
  my $io_scn = 'Input/Output';
-my $integer_scn = 'Integer configuration values';
+my $io_formats_scn = 'I/O Formats';
+my $integer_scn = 'Integer';
  my $lexer_scn = 'Lexer interface';
-my $locales_scn = 'Locales';
+my $locale_scn = 'Locales';
  my $magic_scn = 'Magic';
  my $memory_scn = 'Memory Management';
-my $mro_scn = 'MRO';
+my $MRO_scn = 'MRO';
  my $multicall_scn = 'Multicall Functions';
  my $numeric_scn = 'Numeric Functions';
-my $optree_construction_scn = 'Optree construction';
-my $optree_manipulation_scn = 'Optree Manipulation Functions';
+
+# Now combined, as unclear which functions go where, but separate names kept
+# to avoid 1) other code changes; 2) in case it seems better to split again
+my $optrees_scn = 'Optrees';
+my $optree_construction_scn = $optrees_scn; # Was 'Optree construction';
+my $optree_manipulation_scn = $optrees_scn; # Was 'Optree Manipulation Functions'
  my $pack_scn = 'Pack and Unpack';
  my $pad_scn = 'Pad Data Structures';
  my $password_scn = 'Password and Group access';
+my $reports_scn = 'Reports and Formats';
  my $paths_scn = 'Paths to system commands';
-my $intrpvar_scn = 'Per-Interpreter Variables';
  my $prototypes_scn = 'Prototype information';
  my $regexp_scn = 'REGEXP Functions';
  my $signals_scn = 'Signals';
  my $site_scn = 'Site configuration';
  my $sockets_scn = 'Sockets configuration values';
-my $filters_scn = 'Source Filters';
  my $stack_scn = 'Stack Manipulation Macros';
  my $string_scn = 'String Handling';
  my $SV_flags_scn = 'SV Flags';
  my $SV_scn = 'SV Handling';
+my $tainting_scn = 'Tainting';
  my $time_scn = 'Time';
  my $typedefs_scn = 'Typedef names';
  my $unicode_scn = 'Unicode Support';
  my $utility_scn = 'Utility Functions';
  my $versioning_scn = 'Versioning';
  my $warning_scn = 'Warning and Dieing';
-my $xs_scn = 'XS';
+my $XS_scn = 'XS';
+
+# Kept separate at end
+my $undocumented_scn = 'Undocumented elements';
  
  my %valid_sections = (
-    $av_scn => {},
+    $AV_scn => {},
      $callback_scn => {},
      $casting_scn => {},
      $casing_scn => {},
@@ -162,9 +181,17 @@ my %valid_sections = (
      $directives_scn => {},
      $concurrency_scn => {},
      $COP_scn => {},
-    $CV_scn => {},
+    $CV_scn => {
+        header => <<~'EOT',
+            This section documents functions to manipulate CVs which are
+            code-values, meaning subroutines.  For more information, see
+            L<perlguts>.
+            EOT
+    },
+
      $custom_scn => {},
-    $dump_scn => {},
+    $debugging_scn => {},
+    $display_scn => {},
      $embedding_scn => {},
      $errno_scn => {},
      $exceptions_scn => {},
@@ -173,6 +200,7 @@ my %valid_sections = (
              Also see L</List of capability HAS_foo symbols>.
              EOT
          },
+    $filters_scn => {},
      $floating_scn => {
          header => <<~'EOT',
              Also L</List of capability HAS_foo symbols> lists capabilities
@@ -180,21 +208,6 @@ my %valid_sections = (
              hyperbolic sine function.
              EOT
          },
-    $formats_scn => {
-        header => <<~'EOT',
-            These are used for formatting the corresponding type For example,
-            instead of saying
-
-             Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
-
-            use
-
-             Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
-
-            This keeps you from having to know if, say an IV, needs to be
-            printed as C<%d>, C<%ld>, or something else.
-            EOT
-      },
      $genconfig_scn => {
          header => <<~'EOT',
              This section contains configuration information not otherwise
@@ -204,7 +217,7 @@ my %valid_sections = (
              need to C<#include> files to get the corresponding functionality.
              EOT
  
-        footer => <<~'EOT',
+        footer => <<~EOT,
  
              =head2 List of capability C<HAS_I<foo>> symbols
  
@@ -217,7 +230,7 @@ my %valid_sections = (
              think that the expansion would add little or no value and take up
              a lot of space (because there are so many).  If you think certain
              ones should be expanded, send email to
-            L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
+            L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
  
              Each symbol here will be C<#define>d if and only if the platform
              has the capability.  If you need more detail, see the
@@ -233,7 +246,7 @@ my %valid_sections = (
  
              Example usage:
  
-            =over
+            =over $standard_indent
  
               #ifdef HAS_STRNLEN
                 use strnlen()
@@ -255,7 +268,7 @@ my %valid_sections = (
  
              Example usage:
  
-            =over
+            =over $standard_indent
  
               #ifdef I_WCHAR
                 #include <wchar.h>
@@ -269,23 +282,44 @@ my %valid_sections = (
      $hook_scn => {},
      $HV_scn => {},
      $io_scn => {},
+    $io_formats_scn => {
+        header => <<~'EOT',
+            These are used for formatting the corresponding type For example,
+            instead of saying
+
+             Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
+
+            use
+
+             Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
+
+            This keeps you from having to know if, say an IV, needs to be
+            printed as C<%d>, C<%ld>, or something else.
+            EOT
+      },
      $integer_scn => {},
      $lexer_scn => {},
-    $locales_scn => {},
+    $locale_scn => {},
      $magic_scn => {},
      $memory_scn => {},
-    $mro_scn => {},
+    $MRO_scn => {},
      $multicall_scn => {},
      $numeric_scn => {},
+    $optrees_scn => {},
      $optree_construction_scn => {},
      $optree_manipulation_scn => {},
      $pack_scn => {},
      $pad_scn => {},
      $password_scn => {},
      $paths_scn => {},
-    $intrpvar_scn => {},
      $prototypes_scn => {},
      $regexp_scn => {},
+    $reports_scn => {
+        header => <<~"EOT",
+            These are used in the simple report generation feature of Perl.
+            See L<perlform>.
+            EOT
+      },
      $signals_scn => {},
      $site_scn => {
          header => <<~'EOT',
@@ -295,22 +329,32 @@ my %valid_sections = (
              EOT
        },
      $sockets_scn => {},
-    $filters_scn => {},
      $stack_scn => {},
      $string_scn => {
-        header => <<~'EOT',
-            See also C<L</Unicode Support>>.
+        header => <<~EOT,
+            See also C<L</$unicode_scn>>.
              EOT
        },
      $SV_flags_scn => {},
      $SV_scn => {},
+    $tainting_scn => {},
      $time_scn => {},
      $typedefs_scn => {},
-    $unicode_scn => {},
+    $unicode_scn => {
+        header => <<~EOT,
+            L<perlguts/Unicode Support> has an introduction to this API.
+
+            See also C<L</$classification_scn>>,
+            C<L</$casing_scn>>,
+            and C<L</$string_scn>>.
+            Various functions outside this section also work specially with
+            Unicode.  Search for the string "utf8" in this document.
+            EOT
+      },
      $utility_scn => {},
      $versioning_scn => {},
      $warning_scn => {},
-    $xs_scn => {},
+    $XS_scn => {},
  );
  
  # Somewhat loose match for an apidoc line so we can catch minor typos.
@@ -325,7 +369,7 @@ my $apidoc_re = qr/ ^ (\s*)            # $1
                        (.*?)            # $7
                        \s* \n /x;
  # Only certain flags, dealing with display, are acceptable for apidoc_item
-my $display_flags = "fFnDopsT";
+my $display_flags = "fFnDopTx;";
  
  sub check_api_doc_line ($$) {
      my ($file, $in) = @_;
@@ -337,8 +381,9 @@ sub check_api_doc_line ($$) {
                           && length $2 > 0
                           && length $3 == 0
                           && length $4 > 0
-                         && length $6 > 0
-                         && length $7 > 0;
+                         && length $7 > 0
+                         && (    length $6 > 0
+                             || ($is_item && substr($7, 0, 1) eq '|'));
      my $proto_in_file = $7;
      my $proto = $proto_in_file;
      $proto = "||$proto" if $proto !~ /\|/;
@@ -354,7 +399,7 @@ Expected:
  (or 'apidoc_item')
  EOS
  
-    die "Only [$display_flags] allowed in apidoc_item"
+    die "Only [$display_flags] allowed in apidoc_item:\n$in"
                              if $is_item && $flags =~ /[^$display_flags]/;
  
      return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
@@ -376,10 +421,55 @@ sub embed_override($) {
      return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
  }
  
+# The section that is in effect at the beginning of the given file.  If not
+# listed here, an apidoc_section line must precede any apidoc lines.
+# This allows the files listed here that generally are single-purpose, to not
+# have to worry about the autodoc section
+my %initial_file_section = (
+                            'av.c' => $AV_scn,
+                            'av.h' => $AV_scn,
+                            'cv.h' => $CV_scn,
+                            'deb.c' => $debugging_scn,
+                            'dist/ExtUtils-ParseXS/lib/perlxs.pod' => $XS_scn,
+                            'doio.c' => $io_scn,
+                            'gv.c' => $GV_scn,
+                            'gv.h' => $GV_scn,
+                            'hv.h' => $HV_scn,
+                            'locale.c' => $locale_scn,
+                            'malloc.c' => $memory_scn,
+                            'numeric.c' => $numeric_scn,
+                            'opnames.h' => $optree_construction_scn,
+                            'pad.h'=> $pad_scn,
+                            'patchlevel.h' => $versioning_scn,
+                            'perlio.h' => $io_scn,
+                            'pod/perlapio.pod' => $io_scn,
+                            'pod/perlcall.pod' => $callback_scn,
+                            'pod/perlembed.pod' => $embedding_scn,
+                            'pod/perlfilter.pod' => $filters_scn,
+                            'pod/perliol.pod' => $io_scn,
+                            'pod/perlmroapi.pod' => $MRO_scn,
+                            'pod/perlreguts.pod' => $regexp_scn,
+                            'pp_pack.c' => $pack_scn,
+                            'pp_sort.c' => $SV_scn,
+                            'regcomp.c' => $regexp_scn,
+                            'regexp.h' => $regexp_scn,
+                            'sv.h' => $SV_scn,
+                            'sv.c' => $SV_scn,
+                            'sv_inline.h' => $SV_scn,
+                            'taint.c' => $tainting_scn,
+                            'unicode_constants.h' => $unicode_scn,
+                            'utf8.c' => $unicode_scn,
+                            'utf8.h' => $unicode_scn,
+                            'vutil.c' => $versioning_scn,
+                           );
+
  sub autodoc ($$) { # parse a file and extract documentation info
      my($fh,$file) = @_;
      my($in, $line_num, $header, $section);
  
+    $section = $initial_file_section{$file}
+                                    if defined $initial_file_section{$file};
+
      my $file_is_C = $file =~ / \. [ch] $ /x;
  
      # Count lines easier
@@ -404,6 +494,11 @@ sub autodoc ($$) { # parse a file and extract documentation info
          if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
  
              $section = $1;
+            if ($section =~ / ^ \$ /x) {
+                $section .= '_scn' unless $section =~ / _scn $ /;
+                $section = eval "$section";
+                die "Unknown \$section variable '$section' in $file: $@" if $@;
+            }
              die "Unknown section name '$section' in $file near line $.\n"
                                      unless defined $valid_sections{$section};
  
@@ -433,14 +528,20 @@ sub autodoc ($$) { # parse a file and extract documentation info
                  $missing{$element_name} = $file;
              }
  
-            die "flag $1 is not legal (for function $element_name (from $file))"
-                        if $flags =~ / ( [^AabCDdEeFfhiMmNnTOoPpRrSsUuWXxy] ) /x;
+            die "flag '$1' is not legal (for function $element_name (from $file))"
+                        if $flags =~ / ( [^AabCDdEeFfGhiIMmNnTOoPpRrSsUuWXxy;#] ) /x;
  
              die "'u' flag must also have 'm' or 'y' flags' for $element_name"
                                              if $flags =~ /u/ && $flags !~ /[my]/;
              warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
                          if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
  
+            if ($flags =~ /#/) {
+                die "Return type must be empty for '$element_name'"
+                                                                   if $ret_type;
+                $ret_type = '#ifdef';
+            }
+
              if (exists $seen{$element_name} && $flags !~ /h/) {
                  die ("'$element_name' in $file was already documented in $seen{$element_name}");
              }
@@ -491,7 +592,8 @@ sub autodoc ($$) { # parse a file and extract documentation info
                                                          unless $element_name;
  
              # We accept blank lines between these, but nothing else;
-            die "apidoc_item lines must immediately follow apidoc lines"
+            die "apidoc_item lines must immediately follow apidoc lines for "
+              . " '$element_name' in $file"
                                                              if $text =~ /\S/;
              # Override this line with any info in embed.fnc
              my ($embed_flags, $embed_ret_type, @embed_args)
@@ -537,22 +639,23 @@ sub autodoc ($$) { # parse a file and extract documentation info
          if ($element_name) {
  
              # Here, we have accumulated into $text, the pod for $element_name
-            my $where = $flags =~ /A/ ? 'api' : 'guts';
+            my $where = $flags =~ /A/ ? 'api' : 'intern';
  
-            $section = "Functions in file $file" unless defined $section;
              die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
                                                      unless defined $section;
-            if (exists $docs{$where}{$section}{$element_name}) {
+            my $is_link_only = ($flags =~ /h/);
+            if (! $is_link_only && exists $docs{$where}{$section}{$element_name}) {
                  warn "$0: duplicate API entry for '$element_name' in"
                      . " $where/$section\n";
                  next;
              }
  
              # Override the text with just a link if the flags call for that
-            my $is_link_only = ($flags =~ /h/);
              if ($is_link_only) {
                  if ($file_is_C) {
-                    die "Can't currently handle link with items to it" if @items;
+                    die "Can't currently handle link with items to it:\n$in"
+                                                                       if @items;
+                    $docs{$where}{$section}{X_tags}{$element_name} = $file;
                      redo;    # Don't put anything if C source
                  }
  
@@ -569,7 +672,7 @@ sub autodoc ($$) { # parse a file and extract documentation info
                  # Don't output a usage example for linked to documentation if
                  # it is trivial (has no arguments) and we aren't to add a
                  # semicolon
-                $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
+                $flags .= 'U' if $flags =~ /n/ && $flags !~ /[U;]/;
  
                  # Keep track of all the pod files that we refer to.
                  push $described_elsewhere{$podname}->@*, $podname;
@@ -598,6 +701,7 @@ my %configs;
  my @has_defs;
  my @has_r_defs;     # Reentrant symbols
  my @include_defs;
+
  sub parse_config_h {
      use re '/aa';   # Everthing is ASCII in this file
  
@@ -923,6 +1027,11 @@ sub parse_config_h {
              {
                  $configs{$name}{'section'} = $time_scn;
              }
+            elsif (   $name =~ / ^ [[:alpha:]]+ f $ /x
+                   && $configs{$name}{pod} =~ m/ \b format \b /ix)
+            {
+                $configs{$name}{'section'} = $io_formats_scn;
+            }
              elsif ($name =~ /  DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
                              | $sb CASTFLAGS $sb
                              | QUADMATH
@@ -953,10 +1062,10 @@ sub parse_config_h {
              elsif (   $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
                      && $configs{$name}{pod} =~ m/ \b format \b /ix)
              {
-                $configs{$name}{'section'} = $formats_scn;
+                $configs{$name}{'section'} = $io_formats_scn;
              }
              elsif ($name =~ / BACKTRACE /x) {
-                $configs{$name}{'section'} = $dump_scn;
+                $configs{$name}{'section'} = $debugging_scn;
              }
              elsif ($name =~ / ALLOC $sb /x) {
                  $configs{$name}{'section'} = $memory_scn;
@@ -993,7 +1102,7 @@ sub parse_config_h {
                  $configs{$name}{'section'} = $paths_scn;
              }
              elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
-                $configs{$name}{'section'} = $locales_scn;
+                $configs{$name}{'section'} = $locale_scn;
              }
              elsif ($configs{$name}{pod} =~ /  GCC | C99 | C\+\+ /xi) {
                  $configs{$name}{'section'} = $compiler_scn;
@@ -1017,9 +1126,9 @@ sub parse_config_h {
                  $configs{$name}{'section'} = $site_scn;
              }
              elsif (   $pod =~ / \b floating $dash_or_spaces point \b /ix
-                    || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
-                    || $pod =~ / \b doubles \b /ix
-                    || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
+                   || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
+                   || $pod =~ / \b doubles \b /ix
+                   || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
              {
                  $configs{$name}{'section'} =
                                      $floating_scn;
@@ -1075,6 +1184,7 @@ sub parse_config_h {
              $flags .= 'AdmnT';
              $flags .= 'U' unless defined $configs{$name}{usage};
  
+            # All the information has been gathered; save it
              $docs{'api'}{$section}{$name}{flags} = $flags;
              $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
              $docs{'api'}{$section}{$name}{ret_type} = "";
@@ -1087,7 +1197,34 @@ sub parse_config_h {
      }
  }
  
-sub docout ($$$) { # output the docs for one function
+sub format_pod_indexes($) {
+    my $entries_ref = shift;
+
+    # Output the X<> references to the names, packed since they don't get
+    # displayed, but not too many per line so that when someone is editing the
+    # file, it doesn't run on
+
+    my $text ="";
+    my $line_length = 0;
+    for my $name (sort dictionary_order $entries_ref->@*) {
+        my $entry = "X<$name>";
+        my $entry_length = length $entry;
+
+        # Don't loop forever if we have a verrry long name, and don't go too
+        # far to the right.
+        if ($line_length > 0 && $line_length + $entry_length > $max_width) {
+            $text .= "\n";
+            $line_length = 0;
+        }
+
+        $text .= $entry;
+        $line_length += $entry_length;
+    }
+
+    return $text;
+}
+
+sub docout ($$$) { # output the docs for one function group
      my($fh, $element_name, $docref) = @_;
  
      # Trim trailing space
@@ -1119,21 +1256,55 @@ sub docout ($$$) { # output the docs for one function
          print $fh "\n";
      }
  
+    my @deprecated;
+    my @experimental;
      for my $item (@items) {
-        if ($item->{flags} =~ /D/) {
-            print $fh <<~"EOT";
+        push @deprecated,   "C<$item->{name}>" if $item->{flags} =~ /D/;
+        push @experimental, "C<$item->{name}>" if $item->{flags} =~ /x/;
+    }
  
-                C<B<DEPRECATED!>>  It is planned to remove C<$item->{name}> from a
-                future release of Perl.  Do not use it for new code; remove it from
-                existing code.
-                EOT
-        }
-        elsif ($item->{flags} =~ /x/) {
-            print $fh <<~"EOT";
+    for my $which (\@deprecated, \@experimental) {
+        if ($which->@*) {
+            my $is;
+            my $it;
+            my $list;
  
-                NOTE: C<$item->{name}> is B<experimental> and may change or be
-                removed without notice.
-                EOT
+            if ($which->@* == 1) {
+                $is = 'is';
+                $it = 'it';
+                $list = $which->[0];
+            }
+            elsif ($which->@* == @items) {
+                $is = 'are';
+                $it = 'them';
+                $list = (@items == 2)
+                         ? "both forms"
+                         : "all these forms";
+            }
+            else {
+                $is = 'are';
+                $it = 'them';
+                my $final = pop $which->@*;
+                $list = "the " . join ", ", $which->@*;
+                $list .= "," if $which->@* > 1;
+                $list .= " and $final forms";
+            }
+
+            if ($which == \@deprecated) {
+                print $fh <<~"EOT";
+
+                    C<B<DEPRECATED!>>  It is planned to remove $list
+                    from a future release of Perl.  Do not use $it for
+                    new code; remove $it from existing code.
+                    EOT
+            }
+            else {
+                print $fh <<~"EOT";
+
+                    NOTE: $list $is B<experimental> and may change or be
+                    removed without notice.
+                    EOT
+            }
          }
      }
  
@@ -1147,10 +1318,10 @@ sub docout ($$$) { # output the docs for one function
          print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
                                                      if $item_flags =~ /O/;
          # Is Perl_, but no #define foo # Perl_foo
-        if (($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
+        if (   ($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
  
-             # Can't handle threaded varargs
-         || ($item_flags =~ /f/ && $item_flags !~ /T/))
+                # Can't handle threaded varargs
+            || ($item_flags =~ /f/ && $item_flags !~ /T/))
          {
              $item->{name} = "Perl_$item_name";
              print $fh <<~"EOT";
@@ -1165,8 +1336,8 @@ sub docout ($$$) { # output the docs for one function
  
      if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
                              # to never warrant a usage line
-        warn("U and s flags are incompatible")
-                                            if $flags =~ /U/ && $flags =~ /s/;
+        warn("U and ; flags are incompatible")
+                                            if $flags =~ /U/ && $flags =~ /;/;
          # nothing
      } else {
  
@@ -1187,7 +1358,8 @@ sub docout ($$$) { # output the docs for one function
              }
  
              # Look through all the items in this entry.  If all have the same
-            # return type and arguments, only the main entry is displayed.
+            # return type and arguments (including thread context), only the
+            # main entry is displayed.
              # Also, find the longest return type and longest name so that if
              # multiple ones are shown, they can be vertically aligned nicely
              my $need_individual_usage = 0;
@@ -1195,12 +1367,16 @@ sub docout ($$$) { # output the docs for one function
              my $base_ret_type = $items[0]->{ret_type};
              my $longest_ret = length $base_ret_type;
              my @base_args = $items[0]->{args}->@*;
+            my $base_thread_context = $items[0]->{flags} =~ /T/;
              for (my $i = 1; $i < @items; $i++) {
-                no warnings 'experimental::smartmatch';
                  my $item = $items[$i];
+                my $args_are_equal = $item->{args}->@* == @base_args
+                  && !grep $item->{args}[$_] ne $base_args[$_], keys @base_args;
                  $need_individual_usage = 1
                                      if    $item->{ret_type} ne $base_ret_type
-                                    || ! ($item->{args}->@* ~~ @base_args);
+                                    || !  $args_are_equal
+                                    ||   (   $item->{flags} =~ /T/
+                                          != $base_thread_context);
                  my $ret_length = length $item->{ret_type};
                  $longest_ret = $ret_length if $ret_length > $longest_ret;
                  my $name_length = length $item->{name};
@@ -1219,9 +1395,8 @@ sub docout ($$$) { # output the docs for one function
              my $name_indent = $indent + $longest_ret;
              $name_indent += $ret_name_sep_length if $longest_ret;
  
-            # 80 column terminal - 1 for pager adding a column; -7 for nroff
-            # indent;
-            my $max_length = 80 - 1 - 7 - $description_indent - $usage_indent;
+            my $this_max_width =
+                               $max_width - $description_indent - $usage_indent;
  
              for my $item (@items) {
                  my $ret_type = $item->{ret_type};
@@ -1276,7 +1451,7 @@ sub docout ($$$) { # output the docs for one function
                          #                                    short2,
                          #                                    very_long_argument,
                          #                                    short3)
-                        if ($total_length > $max_length) {
+                        if ($total_length > $this_max_width) {
  
                              # If this is the first continuation line,
                              # calculate the longest argument; this will be the
@@ -1305,9 +1480,10 @@ sub docout ($$$) { # output the docs for one function
                                  }
  
                                  # Calculate the new indent if necessary.
-                                $arg_indent = $max_length - $longest_arg_length
+                                $arg_indent =
+                                        $this_max_width - $longest_arg_length
                                          if $arg_indent + $longest_arg_length
-                                                                > $max_length;
+                                                            > $this_max_width;
                              }
  
                              print $fh "\n", (" " x $arg_indent);
@@ -1325,7 +1501,7 @@ sub docout ($$$) { # output the docs for one function
                      print $fh ")";
                  }
  
-                print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;"
+                print $fh ";" if $item_flags =~ /;/; # semicolon: "dTHR;"
                  print $fh "\n";
  
                  # Only the first entry is normally displayed
@@ -1341,66 +1517,20 @@ sub docout ($$$) { # output the docs for one function
  }
  
  sub construct_missings_section {
-    my ($pod_name, $missings_ref) = @_;
+    my ($missings_hdr, $missings_ref) = @_;
      my $text = "";
  
-    return $text unless $missings_ref->@*;
-
-    $text .= <<~EOT;
-
-        =head1 Undocumented functions
-
-        EOT
-    if ($pod_name eq 'perlapi') {
-        $text .= <<~'EOT';
-            The following functions have been flagged as part of the public
-            API, but are currently undocumented.  Use them at your own risk,
-            as the interfaces are subject to change.  Functions that are not
-            listed in this document are not intended for public use, and
-            should NOT be used under any circumstances.
-
-            If you feel you need to use one of these functions, first send
-            email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
-            It may be that there is a good reason for the function not being
-            documented, and it should be removed from this list; or it may
-            just be that no one has gotten around to documenting it.  In the
-            latter case, you will be asked to submit a patch to document the
-            function.  Once your patch is accepted, it will indicate that the
-            interface is stable (unless it is explicitly marked otherwise) and
-            usable by you.
-            EOT
-    }
-    else {
-        $text .= <<~'EOT';
-            The following functions are currently undocumented.  If you use
-            one of them, you may wish to consider creating and submitting
-            documentation for it.
-            EOT
-    }
+    $text .= "$missings_hdr\n" . format_pod_indexes($missings_ref);
  
-    $text .= "\n";
+    if ($missings_ref->@* == 0) {
+        return $text . "\nThere are currently no items of this type\n";
+    }
  
      # Sort the elements.
      my @missings = sort dictionary_order $missings_ref->@*;
  
-    # Output the X<> references to the names, packed since they don't get
-    # displayed, but not too many per line so that when someone is editing the
-    # file, it doesn't run on
-    my $line_length = 0;
-    for my $missing (sort dictionary_order @missings) {
-        my $entry = "X<$missing>";
-        my $entry_length = length $entry;
  
-        # Don't loop forever if we have a verrry long name, and don't go too
-        # far to the right.
-        if ($line_length > 0 && $line_length + $entry_length > $max_width) {
-            $text .= "\n";
-            $line_length = 0;
-        }
-
-        $text .= $entry;
-        $line_length += $entry_length;
-    }
+    $text .= "\n";
  
      use integer;
  
@@ -1416,13 +1546,17 @@ sub construct_missings_section {
      # can accommodate all the data.  This algorithm doesn't require the
      # resulting columns to all have the same width.  This can allow for
      # as tight of packing as the data will possibly allow.
-    for ($columns = 7; $columns > 1; $columns--) {
+    for ($columns = 7; $columns >= 1; $columns--) {
  
          # For this many columns, we will need this many rows (final row might
          # not be completely filled)
          $rows = (@missings + $columns - 1) / $columns;
  
-        my $row_width = 0;
+        # We only need to execute this final iteration to calculate the number
+        # of rows, as we can't get fewer than a single column.
+        last if $columns == 1;
+
+        my $row_width = 1;  # For 1 space indent
          my $i = 0;  # Which missing element
  
          # For each column ...
@@ -1472,7 +1606,7 @@ sub construct_missings_section {
      # required to list the elements.  @col_widths contains the width of each
      # column.
  
-    $text .= "\n\n=over $description_indent\n\n";
+    $text .= "\n";
  
      # Assemble the output
      for my $row (0 .. $rows - 1) {
@@ -1495,25 +1629,78 @@ sub construct_missings_section {
          $text .= "\n";  # End of row
      }
  
-    $text .= "\n=back\n";
-
      return $text;
  }
  
  sub dictionary_order {
-    # Do a case-insensitive dictionary sort, with only alphabetics
-    # significant, falling back to using everything for determinancy
-    return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
-           || uc($a) cmp uc($b)
-           || $a cmp $b;
+    # Do a case-insensitive dictionary sort, falling back in stages to using
+    # everything for determinancy.  The initial comparison ignores
+    # all non-word characters and non-trailing underscores and digits, with
+    # trailing ones collating to after any other characters.  This collation
+    # order continues in case tie breakers are needed; sequences of digits
+    # that do get looked at always compare numerically.  The first tie
+    # breaker takes all digits and underscores into account.  The next tie
+    # breaker uses a caseless character-by-character comparison of everything
+    # (including non-word characters).  Finally is a cased comparison.
+    #
+    # This gives intuitive results, but obviously could be tweaked.
+
+    no warnings 'non_unicode';
+
+    local $a = $a;
+    local $b = $b;
+
+    # Convert all digit sequences to same length with leading zeros, so for
+    # example, 8 will compare less than 16 (using a fill length value that
+    # should be longer than any sequence in the input).
+    $a =~ s/(\d+)/sprintf "%06d", $1/ge;
+    $b =~ s/(\d+)/sprintf "%06d", $1/ge;
+
+    # Translate any underscores and digits so they compare after all Unicode
+    # characters
+    $a =~ tr[_0-9]/\x{110000}-\x{11000A}/;
+    $b =~ tr[_0-9]/\x{110000}-\x{11000A}/;
+
+    use feature 'state';
+    # Modify \w, \W to reflect the changes.
+    state $ud = '\x{110000}-\x{11000A}';    # xlated underscore, digits
+    state $w = "\\w$ud";                    # new \w string
+    state $mod_w = qr/[$w]/;
+    state $mod_W = qr/[^$w]/;
+
+    # Only \w for initial comparison
+    my $a_only_word = uc($a =~ s/$mod_W//gr);
+    my $b_only_word = uc($b =~ s/$mod_W//gr);
+
+    # And not initial nor interior underscores nor digits (by squeezing them
+    # out)
+    my $a_stripped = $a_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
+    my $b_stripped = $b_only_word =~ s/ (*atomic:[$ud]+) (*pla: $mod_w ) //grxx;
+
+    # If the stripped versions differ, use that as the comparison.
+    my $cmp = $a_stripped cmp $b_stripped;
+    return $cmp if $cmp;
+
+    # For the first tie breaker, repeat, but consider initial and interior
+    # underscores and digits, again having those compare after all Unicode
+    # characters
+    $cmp = $a_only_word cmp $b_only_word;
+    return $cmp if $cmp;
+
+    # Next tie breaker is just a caseless comparison
+    $cmp = uc($a) cmp uc($b);
+    return $cmp if $cmp;
+
+    # Finally a straight comparison
+    return $a cmp $b;
  }
  
  sub output {
-    my ($podname, $header, $dochash, $missings_ref, $footer) = @_;
+    my ($podname, $header, $dochash, $footer, @missings_refs) = @_;
      #
      # strip leading '|' from each line which had been used to hide
      # pod from pod checkers.
-    s/^\|//gm for $header, $footer;
+    s/^\|//gm for $header, $footer, @missings_refs;
  
      my $fh = open_new("pod/$podname.pod", undef,
                        {by => "$0 extracting documentation",
@@ -1532,6 +1719,12 @@ sub output {
  
          print $fh "\n=head1 $section_name\n";
  
+        if ($section_info->{X_tags}) {
+            print $fh "X<$_>" for sort keys $section_info->{X_tags}->%*;
+            print $fh "\n";
+            delete $section_info->{X_tags};
+        }
+
          if ($podname eq 'perlapi') {
              print $fh "\n", $valid_sections{$section_name}{header}, "\n"
                                  if defined $valid_sections{$section_name}{header};
@@ -1544,14 +1737,15 @@ sub output {
              }
          }
  
-
-        if ($section_info) {
+        if ($section_info && keys $section_info->%*) {
              for my $function_name (sort dictionary_order keys %$section_info) {
                  docout($fh, $function_name, $section_info->{$function_name});
              }
          }
          else {
-            print $fh "\nThere are only public API items currently in $section_name\n";
+            my $pod_type = ($podname eq 'api') ? "public" : "internal";
+            print $fh "\nThere are currently no $pod_type API items in ",
+                      $section_name, "\n";
          }
  
          print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
@@ -1559,7 +1753,23 @@ sub output {
                              && defined $valid_sections{$section_name}{footer};
      }
  
-    print $fh construct_missings_section($podname, $missings_ref);
+
+    my $first_time = 1;
+    while (1) {
+        my $missings_hdr = shift @missings_refs or last;
+        my $missings_ref = shift @missings_refs or die "Foo";
+
+        if ($first_time) {
+            $first_time = 0;
+            print $fh <<~EOT;
+
+                =head1 $undocumented_scn
+
+                EOT
+        }
+
+        print $fh construct_missings_section($missings_hdr, $missings_ref);
+    }
  
      print $fh "\n$footer\n=cut\n";
  
@@ -1567,14 +1777,16 @@ sub output {
  }
  
  foreach (@{(setup_embed())[0]}) {
-    next if @$_ < 2;
-    my ($flags, $ret_type, $func, @args) = @$_;
-    s/\b(?:NN|NULLOK)\b\s+//g for @args;
+    my $embed= $_->{embed}
+        or next;
+    my ($flags, $ret_type, $func, $args) = @{$embed}{qw(flags return_type name args)};
+    my @munged_args= @$args;
+    s/\b(?:NN|NULLOK)\b\s+//g for @munged_args;
  
      $funcflags{$func} = {
                           flags => $flags,
                           ret_type => $ret_type,
-                         args => \@args,
+                         args => \@munged_args,
                          };
  }
  
@@ -1585,9 +1797,9 @@ open my $fh, '<', 'MANIFEST'
  while (my $line = <$fh>) {
      next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
  
-    # Don't pick up pods from these.  (We may pick up generated stuff from
-    # /lib though)
-    next if $file =~ m! ^ ( cpan | dist | ext ) / !x;
+    # Don't pick up pods from these.
+    next if $file =~ m! ^ ( cpan | dist | ext ) / !x
+         && ! defined $extra_input_pods{$file};
  
      open F, '<', $file or die "Cannot open $file for docs: $!\n";
      autodoc(\*F,$file);
@@ -1609,13 +1821,32 @@ foreach (sort keys %missing) {
  
  # List of funcs in the public API that aren't also marked as core-only,
  # experimental nor deprecated.
-my @missing_api = grep $funcflags{$_}{flags} =~ /A/
-                    && $funcflags{$_}{flags} !~ /[xD]/
-                    && !$docs{api}{$_}, keys %funcflags;
+
+my @undocumented_api =    grep {        $funcflags{$_}{flags} =~ /A/
+                                   && ! $docs{api}{$_}
+                               } keys %funcflags;
+my @undocumented_intern = grep {        $funcflags{$_}{flags} !~ /[AS]/
+                                   && ! $docs{intern}{$_}
+                               } keys %funcflags;
+my @undocumented_deprecated_api    = grep { $funcflags{$_}{flags} =~ /D/ }
+                                                            @undocumented_api;
+my @undocumented_deprecated_intern = grep { $funcflags{$_}{flags} =~ /D/ }
+                                                           @undocumented_intern;
+my @undocumented_experimental_api    =  grep { $funcflags{$_}{flags} =~ /x/ }
+                                                            @undocumented_api;
+my @undocumented_experimental_intern =  grep { $funcflags{$_}{flags} =~ /x/ }
+                                                           @undocumented_intern;
+my @missing_api = grep { $funcflags{$_}{flags} !~ /[xD]/ } @undocumented_api;
  push @missing_api, keys %missing_macros;
  
-my $other_places = join ", ", map { "L<$_>" } sort dictionary_order qw( perlclib perlxs),
-                                                               keys %described_elsewhere;
+my @missing_intern = grep { $funcflags{$_}{flags} !~ /[xD]/ }
+                                                           @undocumented_intern;
+
+my @other_places = ( qw(perlclib ), keys %described_elsewhere );
+my $places_other_than_intern = join ", ",
+            map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
+my $places_other_than_api = join ", ",
+            map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
  
  # The S< > makes things less densely packed, hence more readable
  my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
@@ -1626,9 +1857,13 @@ $valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
  my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
  $valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
  
-my $section_list = join "\n\n", map { "=item L</$_>" } sort dictionary_order keys %valid_sections;
+my $section_list = join "\n\n", map { "=item L</$_>" }
+                                sort(dictionary_order keys %valid_sections),
+                                $undocumented_scn;  # Keep last
  
-output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
+# Leading '|' is to hide these lines from pod checkers.  khw is unsure if this
+# is still needed.
+my $api_hdr = <<"_EOB_";
  |=encoding UTF-8
  |
  |=head1 NAME
@@ -1644,7 +1879,7 @@ output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
  |L<perlintern> and F<config.h>, some items are listed here as being actually
  |documented in another pod.
  |
-|L<At the end|/Undocumented functions> is a list of functions which have yet
+|L<At the end|/$undocumented_scn> is a list of functions which have yet
  |to be documented.  Patches welcome!  The interfaces of these are subject to
  |change without notice.
  |
@@ -1669,7 +1904,7 @@ output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
  |
  |Note that all Perl API global variables must be referenced with the C<PL_>
  |prefix.  Again, those not listed here are not to be used by extension writers,
-|and can be changed or removed without notice; same with macros.
+|and may be changed or removed without notice; same with macros.
  |Some macros are provided for compatibility with the older,
  |unadorned names, but this support may be disabled in a future release.
  |
@@ -1705,7 +1940,7 @@ output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
  |
  |The sections in this document currently are
  |
-|=over
+|=over $standard_indent
  
  |$section_list
  |
@@ -1713,6 +1948,8 @@ output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
  |
  |The listing below is alphabetical, case insensitive.
  _EOB_
+
+my $api_footer = <<"_EOE_";
  |=head1 AUTHORS
  |
  |Until May 1997, this document was maintained by Jeff Okamoto
@@ -1729,14 +1966,48 @@ _EOB_
  |
  |=head1 SEE ALSO
  |
-|F<config.h>, L<perlintern>, $other_places
+|F<config.h>, $places_other_than_api
  _EOE_
  
-# List of non-static internal functions
-my @missing_guts =
- grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
+my $api_missings_hdr = <<'_EOT_';
+|The following functions have been flagged as part of the public
+|API, but are currently undocumented.  Use them at your own risk,
+|as the interfaces are subject to change.  Functions that are not
+|listed in this document are not intended for public use, and
+|should NOT be used under any circumstances.
+|
+|If you feel you need to use one of these functions, first send
+|email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
+|It may be that there is a good reason for the function not being
+|documented, and it should be removed from this list; or it may
+|just be that no one has gotten around to documenting it.  In the
+|latter case, you will be asked to submit a patch to document the
+|function.  Once your patch is accepted, it will indicate that the
+|interface is stable (unless it is explicitly marked otherwise) and
+|usable by you.
+_EOT_
+
+my $api_experimental_hdr = <<"_EOT_";
+|
+|Next are the API-flagged elements that are considered experimental.  Using one
+|of these is even more risky than plain undocumented ones.  They are listed
+|here because they should be listed somewhere (so their existence doesn't get
+|lost) and this is the best place for them.
+_EOT_
+
+my $api_deprecated_hdr = <<"_EOT_";
+|
+|Finally are deprecated undocumented API elements.
+|Do not use any for new code; remove all occurrences of all of these from
+|existing code.
+_EOT_
+
+output('perlapi', $api_hdr, $docs{api}, $api_footer,
+       $api_missings_hdr, \@missing_api,
+       $api_experimental_hdr, \@undocumented_experimental_api,
+       $api_deprecated_hdr, \@undocumented_deprecated_api);
  
-output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
+my $intern_hdr = <<"_EOB_";
  |=head1 NAME
  |
  |perlintern - autogenerated documentation of purely B<internal>
@@ -1753,6 +2024,8 @@ output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
  |It has the same sections as L<perlapi>, though some may be empty.
  |
  _EOB_
+
+my $intern_footer = <<"_EOE_";
  |
  |=head1 AUTHORS
  |
@@ -1762,5 +2035,35 @@ _EOB_
  |
  |=head1 SEE ALSO
  |
-|F<config.h>, L<perlapi>, $other_places
+|F<config.h>, $places_other_than_intern
  _EOE_
+
+my $intern_missings_hdr = <<"_EOT_";
+|
+|This section lists the elements that are otherwise undocumented.  If you use
+|any of them, please consider creating and submitting documentation for it.
+|
+|Experimental and deprecated undocumented elements are listed separately at the
+|end.
+|
+_EOT_
+
+my $intern_experimental_hdr = <<"_EOT_";
+|
+|Next are the experimental undocumented elements
+|
+_EOT_
+
+my $intern_deprecated_hdr = <<"_EOT_";
+|
+|Finally are the deprecated undocumented elements.
+|Do not use any for new code; remove all occurrences of all of these from
+|existing code.
+|
+_EOT_
+
+output('perlintern', $intern_hdr, $docs{intern}, $intern_footer,
+       $intern_missings_hdr, \@missing_intern,
+       $intern_experimental_hdr, \@undocumented_experimental_intern,
+       $intern_deprecated_hdr, \@undocumented_deprecated_intern
+      );