5 # Unconditionally regenerate:
10 # from information stored in
13 # plus all the core .c, .h, and .pod files listed in MANIFEST
15 # Has an optional arg, which is the directory to chdir to before reading
16 # MANIFEST and the files
18 # This script is invoked as part of 'make all'
20 # The generated pod consists of sections of related elements, functions,
21 # macros, and variables. The keys of %valid_sections give the current legal
22 # ones. Just add a new key to add a section.
24 # Throughout the files read by this script are lines like
26 # =for apidoc_section Section Name
28 # "Section Name" (after having been stripped of leading space) must be one of
29 # the legal section names, or an error is thrown. All API elements defined
30 # between this line and the next 'apidoc_section' line will go into the
31 # section "Section Name", sorted by dictionary order within it. perlintern
32 # and perlapi are parallel documents, each potentially with a section "Section
33 # Name". Each element is marked as to which document it goes into. If there
34 # are none for a particular section in perlapi, that section is
37 # Also, in .[ch] files, there may be
41 # lines in comments. These are also used by this program to switch to section
42 # "Section Name". The difference is that if there are any lines after the
43 # =head1, inside the same comment, and before any =for apidoc-ish lines, they
44 # are used as a heading for section "Section Name" (in both perlintern and
45 # perlapi). This includes any =head[2-5]. If more than one '=head1 Section
46 # Name' line has content, they appear in the generated pod in an undefined
49 # The next =head1, =for apidoc_section, or file end terminates what goes into
52 # The %valid_sections hash below also can have header content, which will
53 # appear before any =head1 content. The hash can also have footer content
54 # content, which will appear at the end of the section, after all the
57 # The lines that define the actual functions, etc are documented in embed.fnc,
58 # because they have flags which must be kept in sync with that file.
66 or die "Couldn't chdir to '$workdir': $!";
68 require './regen/regen_lib.pl';
69 require './regen/embed_lib.pl';
71 my %described_elsewhere;
74 # See database of global and static function prototypes in embed.fnc
75 # This is used to generate prototype headers under various configurations,
76 # export symbols lists for different platforms, and macros to provide an
77 # implicit interpreter context argument.
86 my $link_text = "Described in";
88 my $description_indent = 4;
89 my $usage_indent = 3; # + initial blank yields 4 total
91 my %valid_sections = (
93 'Callback Functions' => {},
95 'Character case changing' => {},
96 'Character classification' => {},
97 'Compile-time scope hooks' => {},
98 'Compiler and Preprocessor information' => {},
99 'Compiler directives' => {},
101 'COP Hint Hashes' => {},
103 'Custom Operators' => {},
104 'Display and Dump functions' => {},
105 'Embedding and Interpreter Cloning' => {},
107 'Exception Handling (simple) Macros' => {},
108 'Filesystem configuration values' => {
110 Also see L</List of capability HAS_foo symbols>.
113 'Floating point configuration values' => {
115 Also L</List of capability HAS_foo symbols> lists capabilities
116 that arent in this section. For example C<HAS_ASINH>, for the
117 hyperbolic sine function.
122 These are used for formatting the corresponding type For example,
125 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
129 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
131 This keeps you from having to know if, say an IV, needs to be
132 printed as C<%d>, C<%ld>, or something else.
135 'General Configuration' => {
137 This section contains configuration information not otherwise
138 found in the more specialized sections of this document. At the
139 end is a list of C<#defines> whose name should be enough to tell
140 you what they do, and a list of #defines which tell you if you
141 need to C<#include> files to get the corresponding functionality.
146 =head2 List of capability C<HAS_I<foo>> symbols
148 This is a list of those symbols that dont appear elsewhere in ths
149 document that indicate if the current platform has a certain
150 capability. Their names all begin with C<HAS_>. Only those
151 symbols whose capability is directly derived from the name are
152 listed here. All others have their meaning expanded out elsewhere
153 in this document. This (relatively) compact list is because we
154 think that the expansion would add little or no value and take up
155 a lot of space (because there are so many). If you think certain
156 ones should be expanded, send email to
157 L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
159 Each symbol here will be C<#define>d if and only if the platform
160 has the capability. If you need more detail, see the
161 corresponding entry in F<config.h>. For convenience, the list is
162 split so that the ones that indicate there is a reentrant version
163 of a capability are listed separately
167 And, the reentrant capabilities:
178 use an alternative implementation
183 =head2 List of C<#include> needed symbols
185 This list contains symbols that indicate if certain C<#include>
186 files are present on the platform. If your code accesses the
187 functionality that one of these is for, you will need to
188 C<#include> it if the symbol on this list is C<#define>d. For
189 more detail, see the corresponding entry in F<config.h>.
204 'Global Variables' => {},
206 'Hook manipulation' => {},
208 'Input/Output' => {},
209 'Integer configuration values' => {},
210 'Lexer interface' => {},
213 'Memory Management' => {},
215 'Multicall Functions' => {},
216 'Numeric Functions' => {},
217 'Optree construction' => {},
218 'Optree Manipulation Functions' => {},
219 'Pack and Unpack' => {},
220 'Pad Data Structures' => {},
221 'Password and Group access' => {},
222 'Paths to system commands' => {},
223 'Per-Interpreter Variables' => {},
224 'Prototype information' => {},
225 'REGEXP Functions' => {},
227 'Site configuration' => {
229 These variables give details as to where various libraries,
230 installation destinations, I<etc.>, go, as well as what various
231 installation options were selected
234 'Sockets configuration values' => {},
235 'Source Filters' => {},
236 'Stack Manipulation Macros' => {},
237 'String Handling' => {
239 See also C<L</Unicode Support>>.
245 'Typedef names' => {},
246 'Unicode Support' => {},
247 'Utility Functions' => {},
249 'Warning and Dieing' => {},
253 # Somewhat loose match for an apidoc line so we can catch minor typos.
254 # Parentheses are used to capture portions so that below we verify
255 # that things are the actual correct syntax.
256 my $apidoc_re = qr/ ^ (\s*) # $1
264 # Only certain flags, dealing with display, are acceptable for apidoc_item
265 my $display_flags = "fFnDopsT";
267 sub check_api_doc_line ($$) {
268 my ($file, $in) = @_;
270 return unless $in =~ $apidoc_re;
272 my $is_item = defined $5;
273 my $is_in_proper_form = length $1 == 0
279 my $proto_in_file = $7;
280 my $proto = $proto_in_file;
281 $proto = "||$proto" if $proto !~ /\|/;
282 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
284 $name && $is_in_proper_form or die <<EOS;
285 Bad apidoc at $file line $.:
288 =for apidoc flags|returntype|name|arg|arg|...
289 =for apidoc flags|returntype|name
294 die "Only [$display_flags] allowed in apidoc_item"
295 if $is_item && $flags =~ /[^$display_flags]/;
297 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
300 sub embed_override($) {
301 my ($element_name) = shift;
303 # If the entry is also in embed.fnc, it should be defined
304 # completely there, but not here
305 my $embed_docref = delete $funcflags{$element_name};
307 return unless $embed_docref and %$embed_docref;
309 my $flags = $embed_docref->{'flags'};
310 warn "embed.fnc entry '$element_name' missing 'd' flag"
311 unless $flags =~ /d/;
313 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
316 sub autodoc ($$) { # parse a file and extract documentation info
318 my($in, $line_num, $header, $section);
320 my $file_is_C = $file =~ / \. [ch] $ /x;
323 my $get_next_line = sub { $line_num++; return <$fh> };
326 while ($in = $get_next_line->()) {
327 last unless defined $in;
329 next unless ( $in =~ / ^ =for [ ]+ apidoc /x
330 # =head1 lines only have effect in C files
331 || ($file_is_C && $in =~ /^=head1/));
333 # Here, the line introduces a portion of the input that we care about.
334 # Either it is for an API element, or heading text which we expect
335 # will be used for elements later in the file
337 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
340 # If the line starts a new section ...
341 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
344 die "Unknown section name '$section' in $file near line $.\n"
345 unless defined $valid_sections{$section};
348 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a
350 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
351 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
353 else { # Plain apidoc
355 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
356 = check_api_doc_line($file, $in);
357 # Override this line with any info in embed.fnc
358 my ($embed_flags, $embed_ret_type, @embed_args)
359 = embed_override($element_name);
360 if ($embed_ret_type) {
361 warn "embed.fnc entry overrides redundant information in"
362 . " '$proto_in_file' in $file"
363 if $flags || $ret_type || @args;
364 $flags = $embed_flags;
365 $ret_type = $embed_ret_type;
368 elsif ($flags !~ /m/) { # Not in embed.fnc, is missing if not a macro
369 $missing{$element_name} = $file;
372 die "flag $1 is not legal (for function $element_name (from $file))"
373 if $flags =~ / ( [^AabCDdEeFfhiMmNnTOoPpRrSsUuWXx] ) /x;
375 die "'u' flag must also have 'm' flag' for $element_name"
376 if $flags =~ /u/ && $flags !~ /m/;
377 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
378 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
380 if (exists $seen{$element_name} && $flags !~ /h/) {
381 die ("'$element_name' in $file was already documented in $seen{$element_name}");
384 $seen{$element_name} = $file;
388 # Here we have processed the initial line in the heading text or API
389 # element, and have saved the important information from it into the
390 # corresponding variables. Now accumulate the text that applies to it
391 # up to a terminating line, which is one of:
393 # 2) =head (in a C file only =head1)
394 # 3) an end comment line in a C file: m:^\s*\*/:
395 # 4) =for apidoc... (except apidoc_item lines)
397 my $head_ender_num = ($file_is_C) ? 1 : "";
398 while (defined($in = $get_next_line->())) {
400 last if $in =~ /^=cut/x;
401 last if $in =~ /^=head$head_ender_num/;
403 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
405 # End of comment line in C files is a fall-back terminator,
406 # but warn only if there actually is some accumulated text
407 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
411 if ($in !~ / ^ =for [ ]+ apidoc /x) {
416 # Here, the line is an apidoc line. All but apidoc_item terminate
417 # the text being accumulated.
418 last if $in =~ / ^ =for [ ]+ apidoc_section /x;
420 my ($item_name, $item_flags, $item_ret_type, $is_item,
421 $item_proto, @item_args) = check_api_doc_line($file, $in);
422 last unless $is_item;
424 # Here, is an apidoc_item_line; They can only come within apidoc
426 die "Unexpected api_doc_item line '$item_proto'"
427 unless $element_name;
429 # We accept blank lines between these, but nothing else;
430 die "apidoc_item lines must immediately follow apidoc lines"
432 # Override this line with any info in embed.fnc
433 my ($embed_flags, $embed_ret_type, @embed_args)
434 = embed_override($item_name);
435 if ($embed_ret_type) {
436 warn "embed.fnc entry overrides redundant information in"
437 . " '$item_proto' in $file"
438 if $item_flags || $item_ret_type || @item_args;
440 $item_flags = $embed_flags;
441 $item_ret_type = $embed_ret_type;
442 @item_args = @embed_args;
445 # Use the base entry flags if none for this item; otherwise add in
446 # any non-display base entry flags.
448 $item_flags .= $flags =~ s/[$display_flags]//rg;
451 $item_flags = $flags;
453 $item_ret_type = $ret_type unless $item_ret_type;
454 @item_args = @args unless @item_args;
455 push @items, { name => $item_name,
456 ret_type => $item_ret_type,
457 flags => $item_flags,
458 args => [ @item_args ],
461 # This line shows that this element is documented.
462 delete $funcflags{$item_name};
465 # Here, are done accumulating the text for this item. Trim it
466 $text =~ s/ ^ \s* //x;
467 $text =~ s/ \s* $ //x;
468 $text .= "\n" if $text ne "";
470 # And treat all-spaces as nothing at all
471 undef $text unless $text =~ /\S/;
475 # Here, we have accumulated into $text, the pod for $element_name
476 my $where = $flags =~ /A/ ? 'api' : 'guts';
478 $section = "Functions in file $file" unless defined $section;
479 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
480 unless defined $section;
481 if (exists $docs{$where}{$section}{$element_name}) {
482 warn "$0: duplicate API entry for '$element_name' in"
483 . " $where/$section\n";
487 # Override the text with just a link if the flags call for that
488 my $is_link_only = ($flags =~ /h/);
491 die "Can't currently handle link with items to it" if @items;
492 redo; # Don't put anything if C source
495 # Here, is an 'h' flag in pod. We add a reference to the pod (and
496 # nothing else) to perlapi/intern. (It would be better to add a
497 # reference to the correct =item,=header, but something that makes
498 # it harder is that it that might be a duplicate, like '=item *';
499 # so that is a future enhancement XXX. Another complication is
500 # there might be more than one deserving candidates.)
501 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s)
502 $podname =~ s/\.pod//;
503 $text = "Described in L<$podname>.\n";
505 # Don't output a usage example for linked to documentation if
506 # it is trivial (has no arguments) and we aren't to add a
508 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
510 # Keep track of all the pod files that we refer to.
511 push $described_elsewhere{$podname}->@*, $podname;
514 $docs{$where}{$section}{$element_name}{flags} = $flags;
515 $docs{$where}{$section}{$element_name}{pod} = $text;
516 $docs{$where}{$section}{$element_name}{file} = $file;
517 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
518 push $docs{$where}{$section}{$element_name}{args}->@*, @args;
519 push $docs{$where}{$section}{$element_name}{items}->@*, @items;
522 $valid_sections{$section}{header} = "" unless
523 defined $valid_sections{$section}{header};
524 $valid_sections{$section}{header} .= "\n$text";
527 # We already have the first line of what's to come in $in
530 } # End of loop through input
535 my @has_r_defs; # Reentrant symbols
538 use re '/aa'; # Everthing is ASCII in this file
541 my $config_h = 'config.h';
542 $config_h = 'win32/config.h' unless -e $config_h;
543 die "Can't find $config_h" unless -e $config_h;
544 open my $fh, '<', $config_h or die "Can't open $config_h: $!";
547 # Look for lines like /* FOO_BAR:
548 # By convention all config.h descriptions begin like that
549 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
552 # Here we are starting the description for $name in config.h. We
553 # accumulate the entire description for it into @description.
554 # Flowing text from one input line to another is appended into the
555 # same array element to make a single flowing line element, but
556 # verbatim lines are kept as separate elements in @description.
557 # This will facilitate later doing pattern matching without regard
558 # to line boundaries on non-verbatim text.
560 die "Multiple config.h entries for '$name'"
561 if defined $configs{$name}{description};
563 # Get first line of description
566 # Each line in the description begins with blanks followed by '/*'
568 die "Unexpected config.h initial line for $name: '$_'"
569 unless s/ ^ ( \s* \* \s* ) //x;
570 my $initial_text = $1;
572 # Initialize the description with this first line (after having
573 # stripped the prefix text)
574 my @description = $_;
576 # The first line is used as a template for how much indentation
577 # each normal succeeding line has. Lines indented further
578 # will be considered as intended to be verbatim. But, empty lines
579 # likely won't have trailing blanks, so just strip the whole thing
581 my $strip_initial_qr = qr! \s* \* \s* $
584 $configs{$name}{verbatim} = 0;
586 # Read in the remainder of the description
588 last if s| ^ \s* \* / ||x; # A '*/' ends it
590 die "Unexpected config.h description line for $name: '$_'"
591 unless s/$strip_initial_qr//;
593 # Fix up the few flawed lines in config.h wherein a new
594 # sentence begins with a tab (and maybe a space after that).
595 # Although none of them currently do, let it recognize
598 # "... text"). The next sentence ...
600 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg;
602 # If this line has extra indentation or looks to have columns,
603 # it should be treated as verbatim. Columns are indicated by
604 # use of interior: tabs, 3 spaces in a row, or even 2 spaces
605 # not preceded by punctuation.
609 | (*nlb:[[:punct:]]) \s{2}
613 # But here, is not a verbatim line. Add an empty line if
614 # this is the first non-verbatim after a run of verbatims
615 if ($description[-1] =~ /^\s/) {
616 push @description, "\n", $_;
618 else { # Otherwise, append this flowing line to the
619 # current flowing line
620 $description[-1] .= $_;
624 $configs{$name}{verbatim} = 1;
626 # The first verbatim line in a run of them is separated by an
627 # empty line from the flowing lines above it
628 push @description, "\n" if $description[-1] =~ /^\S/;
630 $_ = Text::Tabs::expand($_);
632 # Only a single space so less likely to wrap
635 push @description, $_;
639 push $configs{$name}{description}->@*, @description
641 } # Not a description; see if it is a macro definition.
643 (?: / \* )? # Optional commented-out
645 \# \s* define \s+ ( \w+ ) # $1 is the name
646 ( \s* ) # $2 indicates if args or not
647 ( .*? ) # $3 is any definition
648 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/
654 # There can be multiple definitions for a name. We want to know
655 # if any of them has arguments, and if any has a body.
656 $configs{$name}{has_args} //= $2 eq "";
657 $configs{$name}{has_args} ||= $2 eq "";
658 $configs{$name}{has_defn} //= $3 ne "";
659 $configs{$name}{has_defn} ||= $3 ne "";
663 # We now have stored the description and information about every #define
664 # in the file. The description is in a form convenient to operate on to
665 # convert to pod. Do that now.
666 foreach my $name (keys %configs) {
667 next unless defined $configs{$name}{description};
669 # All adjacent non-verbatim lines of the description are appended
670 # together in a single element in the array. This allows the patterns
671 # to work across input line boundaries.
674 while (defined ($_ = shift $configs{$name}{description}->@*)) {
677 if (/ ^ \S /x) { # Don't edit verbatim lines
679 # Enclose known file/path names not already so enclosed
680 # with <...>. (Some entries in config.h are already
682 my $file_name_qr = qr! [ \w / ]+ \.
683 (?: c | h | xs | p [lm] | pmc | PL
686 my $path_name_qr = qr! (?: / \w+ )+ !x;
687 for my $re ($file_name_qr, $path_name_qr) {
688 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
691 # Enclose <... file/path names with F<...> (but no double
693 for my $re ($file_name_qr, $path_name_qr) {
694 s! < ( $re ) > !F<$1>!gxx;
697 # Explain metaconfig units
698 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
700 # Convert "See foo" to "See C<L</foo>>" if foo is described in
701 # this file. Also create a link to the known file INSTALL.
702 # And, to be more general, handle "See also foo and bar", and
703 # "See also foo, bar, and baz"
704 while (m/ \b [Ss]ee \s+
705 (?: also \s+ )? ( \w+ )
707 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
709 push @links, $2 if defined $2;
710 push @links, $3 if defined $3;
711 foreach my $link (@links) {
712 if ($link eq 'INSTALL') {
713 s/ \b INSTALL \b /C<L<INSTALL>>/xg;
715 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
716 s| \b $link \b |C<L</$link>>|xg;
717 $configs{$link}{linked} = 1;
718 $configs{$name}{linked} = 1;
723 # Enclose what we think are symbols with C<...>.
724 no warnings 'experimental::vlb';
727 # Any word followed immediately with parens or
729 \b \w+ (?: \( [^)]* \) # parameter list
730 | \[ [^]]* \] # or array reference
732 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols.
733 | \b (?: struct | union ) \s \w+
735 # Words that contain underscores (which are
736 # definitely not text) or three uppercase letters in
737 # a row. Length two ones, like IV, aren't enclosed,
738 # because they often don't look as nice.
739 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
744 # These include foo when the name is HAS_foo. This is a
745 # heuristic which works in most cases.
746 if ($name =~ / ^ HAS_ (.*) /x) {
749 # Don't include path components, nor things already in
750 # <>, or with trailing '(', '['
751 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
757 delete $configs{$name}{description};
759 $configs{$name}{pod} = $pod;
762 # Now have converted the description to pod. We also now have enough
763 # information that we can do cross checking to find definitions without
764 # corresponding pod, and see if they are mentioned in some description;
765 # otherwise they aren't documented.
767 foreach my $name (keys %configs) {
769 # A definition without pod
770 if (! defined $configs{$name}{pod}) {
772 # Leading/trailing underscore means internal to config.h, e.g.,
774 next if $name =~ / ^ _ /x;
775 next if $name =~ / _ $ /x;
777 # MiXeD case names are internal to config.h; the first 4
778 # characters are sufficient to determine this
779 next if $name =~ / ^ [[:upper:]] [[:lower:]]
780 [[:upper:]] [[:lower:]]
783 # Here, not internal to config.h. Look to see if this symbol is
784 # mentioned in the pod of some other. If so, assume it is
786 foreach my $check_name (keys %configs) {
787 my $this_element = $configs{$check_name};
788 my $this_pod = $this_element->{pod};
789 if (defined $this_pod) {
790 next NAME if $this_pod =~ / \b $name \b /x;
794 warn "$name has no documentation\n";
795 $missing_macros{$name} = 'config.h';
800 my $has_defn = $configs{$name}{has_defn};
801 my $has_args = $configs{$name}{has_args};
803 # Check if any section already has an entry for this element.
804 # If so, it better be a placeholder, in which case we replace it
806 foreach my $section (keys $docs{'api'}->%*) {
807 if (exists $docs{'api'}{$section}{$name}) {
808 my $was = $docs{'api'}{$section}{$name}->{pod};
809 $was = "" unless $was;
811 if ($was ne "" && $was !~ m/$link_text/) {
812 die "Multiple descriptions for $name\n"
813 . "$section contained '$was'";
815 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
816 $configs{$name}{section} = $section;
821 my $handled = 0; # Haven't handled this yet
823 if (defined $configs{$name}{'section'}) {
824 # This has been taken care of elsewhere.
828 if ($has_defn && ! $has_args) {
829 $configs{$name}{args} = 1;
832 # Symbols of the form I_FOO are for #include files. They have
833 # special usage information
834 if ($name =~ / ^ I_ ( .* ) /x) {
835 my $file = lc $1 . '.h';
836 $configs{$name}{usage} = <<~"EOT";
843 # Compute what section this variable should go into. This
844 # heuristic was determined by manually inspecting the current
845 # things in config.h, and should be adjusted as necessary as
846 # deficiencies are found.
848 # This is the default section for macros with a definiton but
849 # no arguments, meaning it is replaced unconditionally
851 my $sb = qr/ _ | \b /x; # segment boundary
852 my $dash_or_spaces = qr/ - | \s+ /x;
853 my $pod = $configs{$name}{pod};
854 if ($name =~ / ^ USE_ /x) {
855 $configs{$name}{'section'} = 'Site configuration';
857 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
859 $configs{$name}{'section'} = 'Time';
861 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
865 | $sb (?: IS )? FINITE
868 $configs{$name}{'section'} =
869 'Floating point configuration values';
871 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
872 $configs{$name}{'section'} = 'Filesystem configuration values';
874 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
875 || $configs{$name}{pod} =~ m/ \b align /x)
877 $configs{$name}{'section'} = 'Compiler and Preprocessor information';
879 elsif ($name =~ / ^ [IU] [ \d V ]
880 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
882 $configs{$name}{'section'} = 'Integer configuration values';
884 elsif ($name =~ / $sb t $sb /x) {
885 $configs{$name}{'section'} = 'Typedef names';
887 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
888 && $configs{$name}{pod} =~ m/ \b format \b /ix)
890 $configs{$name}{'section'} = 'Formats';
892 elsif ($name =~ / BACKTRACE /x) {
893 $configs{$name}{'section'} = 'Display and Dump functions';
895 elsif ($name =~ / ALLOC $sb /x) {
896 $configs{$name}{'section'} = 'Memory Management';
898 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH
910 || $configs{$name}{pod} =~ m! I/O | stdio
911 | file \s+ descriptor
916 $configs{$name}{'section'} = 'Filesystem configuration values';
918 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
919 $configs{$name}{'section'} = 'Signals';
921 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
922 $configs{$name}{'section'} = 'Prototype information';
924 elsif ( $name =~ / ^ LOC_ /x
925 || $configs{$name}{pod} =~ /full path/i)
927 $configs{$name}{'section'} = 'Paths to system commands';
929 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
930 $configs{$name}{'section'} = 'Locales';
932 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) {
933 $configs{$name}{'section'} = 'Compiler and Preprocessor information';
935 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
937 $configs{$name}{'section'} = 'Password and Group access';
939 elsif ($name =~ / SOCKET | $sb SOCK /x) {
940 $configs{$name}{'section'} = 'Sockets configuration values';
942 elsif ( $name =~ / THREAD | MULTIPLICITY /x
943 || $configs{$name}{pod} =~ m/ \b pthread /ix)
945 $configs{$name}{'section'} = 'Concurrency';
947 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN
951 $configs{$name}{'section'} = 'Site configuration';
953 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix
954 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
955 || $pod =~ / \b doubles \b /ix
956 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
958 $configs{$name}{'section'} =
959 'Floating point configuration values';
962 # Above are the specific sections. The rest go into a
963 # grab-bag of general configuration values. However, we put
964 # two classes of them into lists of their names, without their
965 # descriptions, when we think that the description doesn't add
966 # any real value. One list contains the #include variables:
967 # the description is basically boiler plate for each of these.
968 # The other list contains the very many things that are of the
969 # form HAS_foo, and \bfoo\b is contained in its description,
970 # and there is no verbatim text in the pod or links to/from it
971 # (which would add value). That means that it is likely the
972 # intent of the variable can be gleaned from just its name,
973 # and unlikely the description adds signficant value, so just
974 # listing them suffices. Giving their descriptions would
975 # expand this pod significantly with little added value.
977 && ! $configs{$name}{verbatim}
978 && ! $configs{$name}{linked})
980 if ($name =~ / ^ I_ ( .* ) /x) {
981 push @include_defs, $name;
984 elsif ($name =~ / ^ HAS_ ( .* ) /x) {
985 my $canonical_name = $1;
986 $canonical_name =~ s/_//g;
988 my $canonical_pod = $configs{$name}{pod};
989 $canonical_pod =~ s/_//g;
991 if ($canonical_pod =~ / \b $canonical_name \b /xi) {
992 if ($name =~ / $sb R $sb /x) {
993 push @has_r_defs, $name;
996 push @has_defs, $name;
1003 $configs{$name}{'section'} = 'General Configuration';
1006 my $section = $configs{$name}{'section'};
1007 die "Internal error: '$section' not in \%valid_sections"
1008 unless grep { $_ eq $section } keys %valid_sections;
1009 my $flags = 'AdmnT';
1010 $flags .= 'U' unless defined $configs{$name}{usage};
1011 $docs{'api'}{$section}{$name}{flags} = $flags;
1012 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1013 $docs{'api'}{$section}{$name}{ret_type} = "";
1014 $docs{'api'}{$section}{$name}{file} = 'config.h';
1015 $docs{'api'}{$section}{$name}{usage}
1016 = $configs{$name}{usage} if defined $configs{$name}{usage};
1017 push $docs{'api'}{$section}{$name}{args}->@*, ();
1018 push $docs{'api'}{$section}{$name}{items}->@*, ();
1023 sub docout ($$$) { # output the docs for one function
1024 my($fh, $element_name, $docref) = @_;
1026 # Trim trailing space
1027 $element_name =~ s/\s*$//;
1029 my $flags = $docref->{flags};
1030 my $pod = $docref->{pod} // "";
1031 my $file = $docref->{file};
1033 my @items = $docref->{items}->@*;
1035 # Make the main element the first of the items. This allows uniform
1037 unshift @items, { name => $element_name,
1039 ret_type => $docref->{ret_type},
1040 args => [ $docref->{args}->@* ],
1043 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
1045 print $fh "\n=over $description_indent\n";
1046 print $fh "\n=item C<$_->{name}>\n" for @items;
1048 # If we're printing only a link to an element, this isn't the major entry,
1050 if ($flags !~ /h/) {
1051 print $fh "X<$_->{name}>" for @items;
1055 for my $item (@items) {
1056 if ($item->{flags} =~ /D/) {
1059 C<B<DEPRECATED!>> It is planned to remove C<$item->{name}> from a
1060 future release of Perl. Do not use it for new code; remove it from
1064 elsif ($item->{flags} =~ /x/) {
1067 NOTE: C<$item->{name}> is B<experimental> and may change or be
1068 removed without notice.
1073 chomp $pod; # Make sure prints pod with a single trailing \n
1074 print $fh "\n", $pod, "\n";
1076 for my $item (@items) {
1077 my $item_flags = $item->{flags};
1078 my $item_name = $item->{name};
1080 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1081 if $item_flags =~ /O/;
1082 # Is Perl_, but no #define foo # Perl_foo
1083 if (($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1085 # Can't handle threaded varargs
1086 || ($item_flags =~ /f/ && $item_flags !~ /T/))
1088 $item->{name} = "Perl_$item_name";
1091 NOTE: C<$item_name> must be explicitly called as
1094 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1099 if ($flags =~ /U/) { # no usage
1100 warn("U and s flags are incompatible") if $flags =~ /s/;
1104 print $fh "\n=over $usage_indent\n";
1106 if (defined $docref->{usage}) { # An override of the usage section
1107 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1111 # Add the thread context formal parameter on expanded-out names
1112 for my $item (@items) {
1113 unshift $item->{args}->@*, (($item->{args}->@*)
1116 if $item->{flags} !~ /T/
1117 && $item->{name} =~ /^Perl_/;
1120 # Look through all the items in this entry. If all have the same
1121 # return type and arguments, only the main entry is displayed.
1122 # Also, find the longest return type and longest name so that if
1123 # multiple ones are shown, they can be vertically aligned nicely
1124 my $need_individual_usage = 0;
1125 my $longest_name_length = length $items[0]->{name};
1126 my $base_ret_type = $items[0]->{ret_type};
1127 my $longest_ret = length $base_ret_type;
1128 my @base_args = $items[0]->{args}->@*;
1129 for (my $i = 1; $i < @items; $i++) {
1130 no warnings 'experimental::smartmatch';
1131 my $item = $items[$i];
1132 $need_individual_usage = 1
1133 if $item->{ret_type} ne $base_ret_type
1134 || ! ($item->{args}->@* ~~ @base_args);
1135 my $ret_length = length $item->{ret_type};
1136 $longest_ret = $ret_length if $ret_length > $longest_ret;
1137 my $name_length = length $item->{name};
1138 $longest_name_length = $name_length
1139 if $name_length > $longest_name_length;
1142 # If we're only showing one entry, only its length matters.
1143 $longest_name_length = length($items[0]->{name})
1144 unless $need_individual_usage;
1147 my $indent = 1; # 1 is sufficient for verbatim; =over is used
1149 my $ret_name_sep_length = 2; # spaces between return type and name
1150 my $name_indent = $indent + $longest_ret;
1151 $name_indent += $ret_name_sep_length if $longest_ret;
1153 # 80 column terminal - 1 for pager adding a column; -7 for nroff
1155 my $max_length = 80 - 1 - 7 - $description_indent - $usage_indent;
1157 for my $item (@items) {
1158 my $ret_type = $item->{ret_type};
1159 my @args = $item->{args}->@*;
1160 my $name = $item->{name};
1161 my $item_flags = $item->{flags};
1164 print $fh (" " x $indent), $ret_type;
1166 print $fh " " x ( $ret_name_sep_length
1167 + $longest_ret - length $ret_type);
1170 if ($item_flags =~ /n/) { # no args
1171 warn("$file: $element_name: n flag without m")
1172 unless $item_flags =~ /m/;
1173 warn("$file: $name: n flag but apparently has args")
1178 my $arg_indent = $name_indent + $longest_name_length + 1;
1180 # Align the argument lists of the items
1181 print $fh " " x ($longest_name_length - length($name));
1184 # Display as many of the arguments on the same line as
1186 my $total_length = $arg_indent;
1188 for (my $i = 0; $i < @args; $i++) {
1189 my $arg = $args[$i];
1190 my $arg_length = length($arg);
1192 # All but the first arg are preceded by a blank
1193 my $use_blank = $i > 0;
1195 # +1 here and below because either the argument has a
1196 # trailing comma or trailing ')'
1197 $total_length += $arg_length + $use_blank + 1;
1199 # We want none of the arguments to be positioned so
1200 # they extend too far to the right. Ideally, they
1201 # should all start in the same column as the arguments
1202 # on the first line of the function display do. But, if
1203 # necessary, outdent them so that they all start in
1204 # another column, with the longest ending at the right
1206 # void function_name(pTHX_ short1,
1208 # very_long_argument,
1210 if ($total_length > $max_length) {
1212 # If this is the first continuation line,
1213 # calculate the longest argument; this will be the
1214 # one we may have to outdent for.
1218 # We will need at least as much as the current
1220 my $longest_arg_length = $arg_length
1223 # Look through the rest of the args to see if
1224 # any are longer than this one.
1225 for (my $j = $i + 1; $j < @args; $j++) {
1227 # Include the trailing ',' or ')' in the
1228 # length. No need to concern ourselves
1229 # with a leading blank, as the argument
1230 # would be positioned first on the next
1232 my $peek_arg_length = length ($args[$j])
1234 $longest_arg_length = $peek_arg_length
1235 if $peek_arg_length > $longest_arg_length;
1238 # Calculate the new indent if necessary.
1239 $arg_indent = $max_length - $longest_arg_length
1240 if $arg_indent + $longest_arg_length
1244 print $fh "\n", (" " x $arg_indent);
1245 $total_length = $arg_indent + $arg_length + 1;
1249 # Display this argument
1250 print $fh " " if $use_blank;
1252 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1254 } # End of loop through args
1259 print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;"
1262 # Only the first entry is normally displayed
1263 last unless $need_individual_usage;
1267 print $fh "\n=back\n";
1270 print $fh "\n=back\n";
1271 print $fh "\n=for hackers\nFound in file $file\n";
1275 # Do a case-insensitive dictionary sort, with only alphabetics
1276 # significant, falling back to using everything for determinancy
1277 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
1278 || uc($a) cmp uc($b)
1283 my ($podname, $header, $dochash, $missing, $footer) = @_;
1285 # strip leading '|' from each line which had been used to hide
1286 # pod from pod checkers.
1287 s/^\|//gm for $header, $footer;
1289 my $fh = open_new("pod/$podname.pod", undef,
1290 {by => "$0 extracting documentation",
1291 from => 'the C source files'}, 1);
1293 print $fh $header, "\n";
1295 for my $section_name (sort sort_helper keys %valid_sections) {
1296 my $section_info = $dochash->{$section_name};
1298 # We allow empty sections in perlintern.
1299 if (! $section_info && $podname eq 'perlapi') {
1300 warn "Empty section '$section_name'; skipped";
1304 print $fh "\n=head1 $section_name\n";
1306 if ($podname eq 'perlapi') {
1307 print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1308 if defined $valid_sections{$section_name}{header};
1310 # Output any heading-level documentation and delete so won't get in
1312 if (exists $section_info->{""}) {
1313 print $fh "\n", $section_info->{""}, "\n";
1314 delete $section_info->{""};
1319 if ($section_info) {
1320 for my $function_name (sort sort_helper keys %$section_info) {
1321 docout($fh, $function_name, $section_info->{$function_name});
1325 print $fh "\nThere are only public API items currently in $section_name\n";
1328 print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
1329 if $podname eq 'perlapi'
1330 && defined $valid_sections{$section_name}{footer};
1334 print $fh "\n=head1 Undocumented functions\n";
1335 print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
1337 The following functions have been flagged as part of the public API,
1338 but are currently undocumented. Use them at your own risk, as the
1339 interfaces are subject to change. Functions that are not listed in this
1340 document are not intended for public use, and should NOT be used under any
1343 If you feel you need to use one of these functions, first send email to
1344 L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. It may be
1345 that there is a good reason for the function not being documented, and it
1346 should be removed from this list; or it may just be that no one has gotten
1347 around to documenting it. In the latter case, you will be asked to submit a
1348 patch to document the function. Once your patch is accepted, it will indicate
1349 that the interface is stable (unless it is explicitly marked otherwise) and
1353 The following functions are currently undocumented. If you use one of
1354 them, you may wish to consider creating and submitting documentation for
1358 print $fh "\n=over $description_indent\n";
1360 for my $missing (sort sort_helper @$missing) {
1361 print $fh "\n=item C<$missing>\nX<$missing>\n";
1363 print $fh "\n=back\n";
1366 print $fh "\n$footer\n=cut\n";
1368 read_only_bottom_close_and_rename($fh);
1371 foreach (@{(setup_embed())[0]}) {
1373 my ($flags, $ret_type, $func, @args) = @$_;
1374 s/\b(?:NN|NULLOK)\b\s+//g for @args;
1376 $funcflags{$func} = {
1378 ret_type => $ret_type,
1383 # glob() picks up docs from extra .c or .h files that may be in unclean
1384 # development trees.
1385 open my $fh, '<', 'MANIFEST'
1386 or die "Can't open MANIFEST: $!";
1387 while (my $line = <$fh>) {
1388 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
1390 # Don't pick up pods from these. (We may pick up generated stuff from
1392 next if $file =~ m! ^ ( cpan | dist | ext ) / !x;
1394 open F, '<', $file or die "Cannot open $file for docs: $!\n";
1396 close F or die "Error closing $file: $!\n";
1398 close $fh or die "Error whilst reading MANIFEST: $!";
1402 for (sort keys %funcflags) {
1403 next unless $funcflags{$_}{flags} =~ /d/;
1404 next if $funcflags{$_}{flags} =~ /h/;
1405 warn "no docs for $_\n";
1408 foreach (sort keys %missing) {
1409 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
1412 # List of funcs in the public API that aren't also marked as core-only,
1413 # experimental nor deprecated.
1414 my @missing_api = grep $funcflags{$_}{flags} =~ /A/
1415 && $funcflags{$_}{flags} !~ /[xD]/
1416 && !$docs{api}{$_}, keys %funcflags;
1417 push @missing_api, keys %missing_macros;
1419 my $other_places = join ", ", map { "L<$_>" } sort sort_helper qw( perlclib perlxs),
1420 keys %described_elsewhere;
1422 # The S< > makes things less densely packed, hence more readable
1423 my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @has_defs;
1424 my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @has_r_defs;
1425 $valid_sections{'General Configuration'}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1426 $valid_sections{'General Configuration'}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1428 my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort sort_helper @include_defs;
1429 $valid_sections{'General Configuration'}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1431 my $section_list = join "\n\n", map { "=item L</$_>" } sort sort_helper keys %valid_sections;
1433 output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
1438 |perlapi - autogenerated documentation for the perl public API
1441 |X<Perl API> X<API> X<api>
1443 |This file contains most of the documentation of the perl public API, as
1444 |generated by F<embed.pl>. Specifically, it is a listing of functions,
1445 |macros, flags, and variables that may be used by extension writers. Besides
1446 |L<perlintern> and F<config.h>, some items are listed here as being actually
1447 |documented in another pod.
1449 |L<At the end|/Undocumented functions> is a list of functions which have yet
1450 |to be documented. Patches welcome! The interfaces of these are subject to
1451 |change without notice.
1453 |Some of the functions documented here are consolidated so that a single entry
1454 |serves for multiple functions which all do basically the same thing, but have
1455 |some slight differences. For example, one form might process magic, while
1456 |another doesn't. The name of each variation is listed at the top of the
1457 |single entry. But if all have the same signature (arguments and return type)
1458 |except for their names, only the usage for the base form is shown. If any
1459 |one of the forms has a different signature (such as returning C<const> or
1460 |not) every function's signature is explicitly displayed.
1462 |Anything not listed here or in the other mentioned pods is not part of the
1463 |public API, and should not be used by extension writers at all. For these
1464 |reasons, blindly using functions listed in F<proto.h> is to be avoided when
1465 |writing extensions.
1467 |In Perl, unlike C, a string of characters may generally contain embedded
1468 |C<NUL> characters. Sometimes in the documentation a Perl string is referred
1469 |to as a "buffer" to distinguish it from a C string, but sometimes they are
1470 |both just referred to as strings.
1472 |Note that all Perl API global variables must be referenced with the C<PL_>
1473 |prefix. Again, those not listed here are not to be used by extension writers,
1474 |and can be changed or removed without notice; same with macros.
1475 |Some macros are provided for compatibility with the older,
1476 |unadorned names, but this support may be disabled in a future release.
1478 |Perl was originally written to handle US-ASCII only (that is characters
1479 |whose ordinal numbers are in the range 0 - 127).
1480 |And documentation and comments may still use the term ASCII, when
1481 |sometimes in fact the entire range from 0 - 255 is meant.
1483 |The non-ASCII characters below 256 can have various meanings, depending on
1484 |various things. (See, most notably, L<perllocale>.) But usually the whole
1485 |range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
1486 |"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
1487 |"Latin1" as referring just to the characters in the range 128 through 255, or
1488 |sometimes from 160 through 255.
1489 |This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1491 |Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1492 |L<perlebcdic>). Most of the documentation (and even comments in the code)
1493 |ignore the EBCDIC possibility.
1494 |For almost all purposes the differences are transparent.
1495 |As an example, under EBCDIC,
1496 |instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1497 |whenever this documentation refers to C<utf8>
1498 |(and variants of that name, including in function names),
1499 |it also (essentially transparently) means C<UTF-EBCDIC>.
1500 |But the ordinals of characters differ between ASCII, EBCDIC, and
1501 |the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1502 |number of bytes than in UTF-8.
1504 |The organization of this document is tentative and subject to change.
1505 |Suggestions and patches welcome
1506 |L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1508 |The sections in this document currently are
1516 |The listing below is alphabetical, case insensitive.
1520 |Until May 1997, this document was maintained by Jeff Okamoto
1521 |<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
1523 |With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1524 |Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1525 |Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1526 |Stephen McCamant, and Gurusamy Sarathy.
1528 |API Listing originally by Dean Roehrich <roehrich\@cray.com>.
1530 |Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1534 |F<config.h>, L<perlintern>, $other_places
1537 # List of non-static internal functions
1539 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
1541 output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
1544 |perlintern - autogenerated documentation of purely B<internal>
1548 |X<internal Perl functions> X<interpreter functions>
1550 |This file is the autogenerated documentation of functions in the
1551 |Perl interpreter that are documented using Perl's internal documentation
1552 |format but are not marked as part of the Perl API. In other words,
1553 |B<they are not for use in extensions>!
1555 |It has the same sections as L<perlapi>, though some may be empty.
1561 |The autodocumentation system was originally added to the Perl core by
1562 |Benjamin Stuhl. Documentation is by whoever was kind enough to
1563 |document their functions.
1567 |F<config.h>, L<perlapi>, $other_places