3 # Unconditionally regenerate:
8 # from information stored in
11 # plus all the .c and .h files listed in MANIFEST
13 # Has an optional arg, which is the directory to chdir to before reading
14 # MANIFEST and *.[ch].
16 # This script is invoked as part of 'make all'
18 # '=head1' are the only headings looked for. If the first non-blank line after
19 # the heading begins with a word character, it is considered to be the first
20 # line of documentation that applies to the heading itself. That is, it is
21 # output immediately after the heading, before the first function, and not
22 # indented. The next input line that is a pod directive terminates this
23 # heading-level documentation.
25 # The meanings of the flags fields in embed.fnc and the source code is
26 # documented at the top of embed.fnc.
34 or die "Couldn't chdir to '$workdir': $!";
36 require './regen/regen_lib.pl';
37 require './regen/embed_lib.pl';
39 my %described_elsewhere;
42 # See database of global and static function prototypes in embed.fnc
43 # This is used to generate prototype headers under various configurations,
44 # export symbols lists for different platforms, and macros to provide an
45 # implicit interpreter context argument.
54 # Somewhat loose match for an apidoc line so we can catch minor typos.
55 # Parentheses are used to capture portions so that below we verify
56 # that things are the actual correct syntax.
57 my $apidoc_re = qr/ ^ (\s*) # $1
65 # Only certain flags, dealing with display, are acceptable for apidoc_item
66 my $display_flags = "fFnDopsT";
68 sub check_api_doc_line ($$) {
71 return unless $in =~ $apidoc_re;
73 my $is_item = defined $5;
74 my $is_in_proper_form = length $1 == 0
80 my $proto_in_file = $7;
81 my $proto = $proto_in_file;
82 $proto = "||$proto" if $proto !~ /\|/;
83 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
85 $name && $is_in_proper_form or die <<EOS;
86 Bad apidoc at $file line $.:
89 =for apidoc flags|returntype|name|arg|arg|...
90 =for apidoc flags|returntype|name
95 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
98 sub autodoc ($$) { # parse a file and extract documentation info
100 my($in, $line_num, $header, $section);
102 my $file_is_C = $file =~ / \. [ch] $ /x;
105 my $get_next_line = sub { $line_num++; return <$fh> };
108 while ($in = $get_next_line->()) {
109 last unless defined $in;
111 next unless ( $in =~ / ^ =for [ ]+ apidoc /x
112 # =head1 lines only have effect in C files
113 || ($file_is_C && $in =~ /^=head1/));
115 # Here, the line introduces a portion of the input that we care about.
116 # Either it is for an API element, or heading text which we expect
117 # will be used for elements later in the file
119 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
122 # If the line starts a new section ...
123 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
127 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a
129 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
130 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
132 else { # Plain apidoc
134 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
135 = check_api_doc_line($file, $in);
137 # If the entry is also in embed.fnc, it should be defined
138 # completely there, but not here
139 my $embed_docref = delete $funcflags{$element_name};
140 if ($embed_docref and %$embed_docref) {
141 warn "embed.fnc entry overrides redundant information in"
142 . " '$proto_in_file' in $file" if $flags || $ret_type || @args;
143 $flags = $embed_docref->{'flags'};
144 warn "embed.fnc entry '$element_name' missing 'd' flag"
145 unless $flags =~ /d/;
146 $ret_type = $embed_docref->{'ret_type'};
147 @args = @{$embed_docref->{args}};
148 } elsif ($flags !~ /m/) { # Not in embed.fnc, is missing if not a
150 $missing{$element_name} = $file;
153 die "flag $1 is not legal (for function $element_name (from $file))"
154 if $flags =~ / ( [^AabCDdEeFfhiMmNnTOoPpRrSsUuWXx] ) /x;
156 die "'u' flag must also have 'm' flag' for $element_name"
157 if $flags =~ /u/ && $flags !~ /m/;
158 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
159 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
161 if (exists $seen{$element_name} && $flags !~ /h/) {
162 die ("'$element_name' in $file was already documented in $seen{$element_name}");
165 $seen{$element_name} = $file;
169 # Here we have processed the initial line in the heading text or API
170 # element, and have saved the important information from it into the
171 # corresponding variables. Now accumulate the text that applies to it
172 # up to a terminating line, which is one of:
174 # 2) =head (in a C file only =head1)
175 # 3) an end comment line in a C file: m:^\s*\*/:
176 # 4) =for apidoc... (except apidoc_item lines)
178 my $head_ender_num = ($file_is_C) ? 1 : "";
179 while (defined($in = $get_next_line->())) {
181 last if $in =~ /^=cut/x;
182 last if $in =~ /^=head$head_ender_num/;
184 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
186 # End of comment line in C files is a fall-back terminator,
187 # but warn only if there actually is some accumulated text
188 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
192 if ($in !~ / ^ =for [ ]+ apidoc /x) {
197 # Here, the line is an apidoc line. All but apidoc_item terminate
198 # the text being accumulated.
199 last if $in =~ / ^ =for [ ]+ apidoc_section /x;
201 my ($item_name, $item_flags, $item_ret_type, $is_item,
202 undef, @item_args) = check_api_doc_line($file, $in);
203 last unless $is_item;
205 # Here, is an apidoc_item_line; They can only come within apidoc
207 die "Unexpected api_doc_item line '$in'" unless $element_name;
209 # We accept blank lines between these, but nothing else;
210 die "apidoc_item lines must immediately follow apidoc lines"
213 # Use the base entry flags if none for this item; otherwise add in
214 # any non-display base entry flags.
216 $item_flags .= $flags =~ s/[$display_flags]//rg;
219 $item_flags = $flags;
221 $item_ret_type = $ret_type unless $item_ret_type;
222 @item_args = @args unless @item_args;
223 push @items, { name => $item_name,
224 ret_type => $item_ret_type,
225 flags => $item_flags,
226 args => [ @item_args ],
229 # This line shows that this element is documented.
230 delete $funcflags{$item_name};
233 # Here, are done accumulating the text for this item. Trim it
234 $text =~ s/ ^ \s* //x;
235 $text =~ s/ \s* $ //x;
236 $text .= "\n" if $text ne "";
238 # And treat all-spaces as nothing at all
239 undef $text unless $text =~ /\S/;
243 # Here, we have accumulated into $text, the pod for $element_name
244 my $where = $flags =~ /A/ ? 'api' : 'guts';
246 $section = "Functions in file $file" unless defined $section;
247 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
248 unless defined $section;
249 if (exists $docs{$where}{$section}{$element_name}) {
250 warn "$0: duplicate API entry for '$element_name' in"
251 . " $where/$section\n";
255 # Override the text with just a link if the flags call for that
256 my $is_link_only = ($flags =~ /h/);
259 die "Can't currently handle link with items to it" if @items;
260 redo; # Don't put anything if C source
263 # Here, is an 'h' flag in pod. We add a reference to the pod (and
264 # nothing else) to perlapi/intern. (It would be better to add a
265 # reference to the correct =item,=header, but something that makes
266 # it harder is that it that might be a duplicate, like '=item *';
267 # so that is a future enhancement XXX. Another complication is
268 # there might be more than one deserving candidates.)
269 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s)
270 $podname =~ s/\.pod//;
271 $text = "Described in L<$podname>.\n";
273 # Don't output a usage example for linked to documentation if
274 # it is trivial (has no arguments) and we aren't to add a
276 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
278 # Keep track of all the pod files that we refer to.
279 push $described_elsewhere{$podname}->@*, $podname;
282 $docs{$where}{$section}{$element_name}{flags} = $flags;
283 $docs{$where}{$section}{$element_name}{pod} = $text;
284 $docs{$where}{$section}{$element_name}{file} = $file;
285 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
286 push $docs{$where}{$section}{$element_name}{args}->@*, @args;
287 push $docs{$where}{$section}{$element_name}{items}->@*, @items;
290 $valid_sections{$section}{header} = "" unless
291 defined $valid_sections{$section}{header};
292 $valid_sections{$section}{header} .= "\n$text";
295 # We already have the first line of what's to come in $in
298 } # End of loop through input
301 sub docout ($$$) { # output the docs for one function
302 my($fh, $element_name, $docref) = @_;
304 my $flags = $docref->{flags};
305 my $pod = $docref->{pod} // "";
306 my $ret_type = $docref->{ret_type};
307 my $file = $docref->{file};
308 my @args = $docref->{args}->@*;
309 my @items = $docref->{items}->@*;
311 $element_name =~ s/\s*$//;
313 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
316 my $function = $flags =~ /n/ ? 'definition' : 'function';
318 C<B<DEPRECATED!>> It is planned to remove this $function from a
319 future release of Perl. Do not use it for new code; remove it from
325 elsif ($flags =~ /x/) {
327 NOTE: this function is B<experimental> and may change or be
328 removed without notice.
334 # Is Perl_, but no #define foo # Perl_foo
335 my $p = (($flags =~ /p/ && $flags =~ /o/ && $flags !~ /M/)
336 || ($flags =~ /f/ && $flags !~ /T/)); # Can't handle threaded varargs
338 $pod .= "\nNOTE: the C<perl_> form of this function is B<deprecated>.\n"
341 $pod .= "\nNOTE: this function must be explicitly called as C<Perl_$element_name>\n";
342 $pod .= "with an C<aTHX_> parameter.\n" if $flags !~ /T/;
345 for my $item ($element_name, @items) {
346 print $fh "\n=item $item\n";
348 # If we're printing only a link to an element, this isn't the major entry,
350 print $fh "X<$element_name>\n" unless $flags =~ /h/;
353 chomp $pod; # Make sure prints pod with a single trailing \n
354 print $fh "\n$pod\n";
356 if ($flags =~ /U/) { # no usage
357 warn("U and s flags are incompatible") if $flags =~ /s/;
360 if ($flags =~ /n/) { # no args
361 warn("$file: $element_name: n flag without m") unless $flags =~ /m/;
362 warn("$file: $element_name: n flag but apparently has args") if @args;
363 print $fh "\n\t$ret_type\t$element_name";
364 } else { # full usage
365 my $n = "Perl_"x$p . $element_name;
366 my $large_ret = length $ret_type > 7;
367 my $indent_size = 7+8 # nroff: 7 under =head + 8 under =item
368 +8+($large_ret ? 1 + length $ret_type : 8)
371 print $fh "\n\t$ret_type" . ($large_ret ? ' ' : "\t") . "$n(";
374 if ($indent_size + 2 + length > 79) {
376 $indent_size -= length($n) - 3;
381 if ($flags !~ /T/ && ($p || ($flags =~ /m/ && $element_name =~ /^Perl_/))) {
382 $args = @args ? "pTHX_ " : "pTHX";
383 if ($long_args) { print $fh $args; $args = '' }
385 $long_args and print $fh "\n";
386 my $first = !$long_args;
390 && $indent_size + 3 + length($args[0]) + length $args > 79
395 "\t".($large_ret ? " " x (1+length $ret_type) : "\t")
396 ." "x($long_args ? 4 : 1 + length $n)
398 $args, (","x($args ne 'pTHX_ ') . "\n")x!!@args;
402 $args .= ", "x!!(length $args && $args ne 'pTHX_ ')
405 if ($long_args) { print $fh "\n", substr $indent, 0, -4 }
408 print $fh ";" if $flags =~ /s/; # semicolon "dTHR;"
411 print $fh "\n=for hackers\nFound in file $file\n";
415 # Do a case-insensitive dictionary sort, with only alphabetics
416 # significant, falling back to using everything for determinancy
417 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
423 my ($podname, $header, $dochash, $missing, $footer) = @_;
425 # strip leading '|' from each line which had been used to hide
426 # pod from pod checkers.
427 s/^\|//gm for $header, $footer;
429 my $fh = open_new("pod/$podname.pod", undef,
430 {by => "$0 extracting documentation",
431 from => 'the C source files'}, 1);
433 print $fh $header, "\n";
435 for my $section_name (sort sort_helper keys %$dochash) {
436 my $section_info = $dochash->{$section_name};
437 next unless keys %$section_info; # Skip empty
438 print $fh "\n=head1 $section_name\n";
440 print $fh "\n", $valid_sections{$section_name}{header}, "\n"
441 if $podname eq 'perlapi'
442 && defined $valid_sections{$section_name}{header};
444 # Output any heading-level documentation and delete so won't get in
446 if (exists $section_info->{""}) {
447 print $fh "\n", $section_info->{""}, "\n";
448 delete $section_info->{""};
450 next unless keys %$section_info; # Skip empty
451 print $fh "\n=over 8\n";
453 for my $function_name (sort sort_helper keys %$section_info) {
454 docout($fh, $function_name, $section_info->{$function_name});
456 print $fh "\n=back\n";
460 print $fh "\n=head1 Undocumented functions\n";
461 print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
463 The following functions have been flagged as part of the public API,
464 but are currently undocumented. Use them at your own risk, as the
465 interfaces are subject to change. Functions that are not listed in this
466 document are not intended for public use, and should NOT be used under any
469 If you feel you need to use one of these functions, first send email to
470 L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. It may be
471 that there is a good reason for the function not being documented, and it
472 should be removed from this list; or it may just be that no one has gotten
473 around to documenting it. In the latter case, you will be asked to submit a
474 patch to document the function. Once your patch is accepted, it will indicate
475 that the interface is stable (unless it is explicitly marked otherwise) and
479 The following functions are currently undocumented. If you use one of
480 them, you may wish to consider creating and submitting documentation for
484 print $fh "\n=over\n";
486 for my $missing (sort sort_helper @$missing) {
487 print $fh "\n=item C<$missing>\nX<$missing>\n";
489 print $fh "\n=back\n";
492 print $fh "\n$footer\n=cut\n";
494 read_only_bottom_close_and_rename($fh);
497 foreach (@{(setup_embed())[0]}) {
499 my ($flags, $ret_type, $func, @args) = @$_;
500 s/\b(?:NN|NULLOK)\b\s+//g for @args;
502 $funcflags{$func} = {
504 ret_type => $ret_type,
509 # glob() picks up docs from extra .c or .h files that may be in unclean
511 open my $fh, '<', 'MANIFEST'
512 or die "Can't open MANIFEST: $!";
513 while (my $line = <$fh>) {
514 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
516 # Don't pick up pods from these. (We may pick up generated stuff from
518 next if $file =~ m! ^ ( cpan | dist | ext ) / !x;
520 open F, '<', $file or die "Cannot open $file for docs: $!\n";
522 close F or die "Error closing $file: $!\n";
524 close $fh or die "Error whilst reading MANIFEST: $!";
526 for (sort keys %funcflags) {
527 next unless $funcflags{$_}{flags} =~ /d/;
528 next if $funcflags{$_}{flags} =~ /h/;
529 warn "no docs for $_\n"
532 foreach (sort keys %missing) {
533 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
536 # walk table providing an array of components in each line to
537 # subroutine, printing the result
539 # List of funcs in the public API that aren't also marked as core-only,
540 # experimental nor deprecated.
541 my @missing_api = grep $funcflags{$_}{flags} =~ /A/
542 && $funcflags{$_}{flags} !~ /[xD]/
543 && !$docs{api}{$_}, keys %funcflags;
545 my $other_places = join ", ", map { "L<$_>" } sort sort_helper qw( perlclib perlxs),
546 keys %described_elsewhere;
548 output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
553 |perlapi - autogenerated documentation for the perl public API
556 |X<Perl API> X<API> X<api>
558 |This file contains most of the documentation of the perl public API, as
559 |generated by F<embed.pl>. Specifically, it is a listing of functions,
560 |macros, flags, and variables that may be used by extension writers. Besides
561 |L<perlintern> and F<config.h>, some items are listed here as being actually
562 |documented in another pod.
564 |L<At the end|/Undocumented functions> is a list of functions which have yet
565 |to be documented. Patches welcome! The interfaces of these are subject to
566 |change without notice.
568 |Anything not listed here or in the other mentioned pods is not part of the
569 |public API, and should not be used by extension writers at all. For these
570 |reasons, blindly using functions listed in F<proto.h> is to be avoided when
573 |In Perl, unlike C, a string of characters may generally contain embedded
574 |C<NUL> characters. Sometimes in the documentation a Perl string is referred
575 |to as a "buffer" to distinguish it from a C string, but sometimes they are
576 |both just referred to as strings.
578 |Note that all Perl API global variables must be referenced with the C<PL_>
579 |prefix. Again, those not listed here are not to be used by extension writers,
580 |and can be changed or removed without notice; same with macros.
581 |Some macros are provided for compatibility with the older,
582 |unadorned names, but this support may be disabled in a future release.
584 |Perl was originally written to handle US-ASCII only (that is characters
585 |whose ordinal numbers are in the range 0 - 127).
586 |And documentation and comments may still use the term ASCII, when
587 |sometimes in fact the entire range from 0 - 255 is meant.
589 |The non-ASCII characters below 256 can have various meanings, depending on
590 |various things. (See, most notably, L<perllocale>.) But usually the whole
591 |range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or
592 |"Latin1") is used as an equivalent for ISO-8859-1. But some people treat
593 |"Latin1" as referring just to the characters in the range 128 through 255, or
594 |sometimes from 160 through 255.
595 |This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
597 |Note that Perl can be compiled and run under either ASCII or EBCDIC (See
598 |L<perlebcdic>). Most of the documentation (and even comments in the code)
599 |ignore the EBCDIC possibility.
600 |For almost all purposes the differences are transparent.
601 |As an example, under EBCDIC,
602 |instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
603 |whenever this documentation refers to C<utf8>
604 |(and variants of that name, including in function names),
605 |it also (essentially transparently) means C<UTF-EBCDIC>.
606 |But the ordinals of characters differ between ASCII, EBCDIC, and
607 |the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
608 |number of bytes than in UTF-8.
610 |The listing below is alphabetical, case insensitive.
614 |Until May 1997, this document was maintained by Jeff Okamoto
615 |<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself.
617 |With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
618 |Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
619 |Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
620 |Stephen McCamant, and Gurusamy Sarathy.
622 |API Listing originally by Dean Roehrich <roehrich\@cray.com>.
624 |Updated to be autogenerated from comments in the source by Benjamin Stuhl.
628 |F<config.h>, L<perlintern>, $other_places
631 # List of non-static internal functions
633 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
635 output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
638 |perlintern - autogenerated documentation of purely B<internal>
642 |X<internal Perl functions> X<interpreter functions>
644 |This file is the autogenerated documentation of functions in the
645 |Perl interpreter that are documented using Perl's internal documentation
646 |format but are not marked as part of the Perl API. In other words,
647 |B<they are not for use in extensions>!
653 |The autodocumentation system was originally added to the Perl core by
654 |Benjamin Stuhl. Documentation is by whoever was kind enough to
655 |document their functions.
659 |F<config.h>, L<perlapi>, $other_places