This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
makedef.pl typos
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
0e4548d5 2use strict;
54310121 3require Exporter;
0e4548d5
GS
4
5use vars qw($VERSION @ISA @EXPORT);
2a28b791 6$VERSION = 1.03;
0e4548d5 7@ISA = qw(Exporter);
54310121 8@EXPORT = qw(pod2html htmlify);
54310121
PP
9
10use Carp;
0e4548d5
GS
11use Config;
12use Cwd;
13use File::Spec::Unix;
14use Getopt::Long;
15use Pod::Functions;
54310121 16
3ec07288
FK
17use locale; # make \w work right in non-ASCII lands
18
54310121
PP
19=head1 NAME
20
7b8d334a 21Pod::Html - module to convert pod files to HTML
54310121
PP
22
23=head1 SYNOPSIS
24
7b8d334a 25 use Pod::Html;
54310121
PP
26 pod2html([options]);
27
28=head1 DESCRIPTION
29
30Converts files from pod format (see L<perlpod>) to HTML format. It
31can automatically generate indexes and cross-references, and it keeps
32a cache of things it knows how to cross-reference.
33
34=head1 ARGUMENTS
35
36Pod::Html takes the following arguments:
37
38=over 4
39
0e4548d5
GS
40=item backlink
41
42 --backlink="Back to Top"
43
44Adds "Back to Top" links in front of every HEAD1 heading (except for
45the first). By default, no backlink are being generated.
46
47=item css
48
49 --css=stylesheet
50
51Specify the URL of a cascading style sheet.
52
53=item flush
54
55 --flush
56
57Flushes the item and directory caches.
58
59=item header
60
61 --header
62 --noheader
63
64Creates header and footer blocks containing the text of the NAME
65section. By default, no headers are being generated.
66
54310121
PP
67=item help
68
69 --help
70
71Displays the usage message.
72
5a039dd3
BS
73=item htmldir
74
75 --htmldir=name
76
77Sets the directory in which the resulting HTML file is placed. This
29f227c9
BS
78is used to generate relative links to other files. Not passing this
79causes all links to be absolute, since this is the value that tells
80Pod::Html the root of the documentation tree.
5a039dd3 81
54310121
PP
82=item htmlroot
83
84 --htmlroot=name
85
86Sets the base URL for the HTML files. When cross-references are made,
87the HTML root is prepended to the URL.
88
0e4548d5
GS
89=item index
90
91 --index
92 --noindex
93
94Generate an index at the top of the HTML file. This is the default
95behaviour.
96
54310121
PP
97=item infile
98
99 --infile=name
100
101Specify the pod file to convert. Input is taken from STDIN if no
102infile is specified.
103
54310121
PP
104=item libpods
105
106 --libpods=name:...:name
107
108List of page names (eg, "perlfunc") which contain linkable C<=item>s.
109
110=item netscape
111
112 --netscape
0e4548d5 113 --nonetscape
54310121 114
0e4548d5
GS
115Use Netscape HTML directives when applicable. By default, they will
116B<not> be used.
54310121 117
0e4548d5 118=item outfile
54310121 119
0e4548d5 120 --outfile=name
54310121 121
0e4548d5
GS
122Specify the HTML file to create. Output goes to STDOUT if no outfile
123is specified.
54310121 124
0e4548d5 125=item podpath
54310121 126
0e4548d5 127 --podpath=name:...:name
54310121 128
0e4548d5
GS
129Specify which subdirectories of the podroot contain pod files whose
130HTML converted forms can be linked-to in cross-references.
54310121 131
0e4548d5 132=item podroot
54310121 133
0e4548d5 134 --podroot=name
54310121 135
0e4548d5 136Specify the base directory for finding library pods.
54310121 137
0e4548d5 138=item quiet
54310121 139
0e4548d5
GS
140 --quiet
141 --noquiet
54310121 142
0e4548d5
GS
143Don't display I<mostly harmless> warning messages. These messages
144will be displayed by default. But this is not the same as C<verbose>
145mode.
54310121 146
0e4548d5 147=item recurse
54310121 148
0e4548d5 149 --recurse
54310121
PP
150 --norecurse
151
0e4548d5 152Recurse into subdirectories specified in podpath (default behaviour).
54310121
PP
153
154=item title
155
156 --title=title
157
158Specify the title of the resulting HTML file.
159
160=item verbose
161
162 --verbose
0e4548d5 163 --noverbose
54310121 164
0e4548d5 165Display progress messages. By default, they won't be displayed.
34db337b 166
54310121
PP
167=back
168
169=head1 EXAMPLE
170
171 pod2html("pod2html",
172 "--podpath=lib:ext:pod:vms",
173 "--podroot=/usr/src/perl",
174 "--htmlroot=/perl/nmanual",
175 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
176 "--recurse",
177 "--infile=foo.pod",
178 "--outfile=/perl/nmanual/foo.html");
179
34db337b
JD
180=head1 ENVIRONMENT
181
182Uses $Config{pod2html} to setup default options.
183
54310121
PP
184=head1 AUTHOR
185
186Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
187
54310121
PP
188=head1 SEE ALSO
189
190L<perlpod>
191
192=head1 COPYRIGHT
193
194This program is distributed under the Artistic License.
195
196=cut
197
1f763251
GS
198my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
199my $dircache = "pod2htmd$cache_ext";
200my $itemcache = "pod2htmi$cache_ext";
54310121
PP
201
202my @begin_stack = (); # begin/end stack
203
29f227c9
BS
204my @libpods = (); # files to search for links from C<> directives
205my $htmlroot = "/"; # http-server base directory from which all
54310121 206 # relative paths in $podpath stem.
5a039dd3
BS
207my $htmldir = ""; # The directory to which the html pages
208 # will (eventually) be written.
54310121 209my $htmlfile = ""; # write to stdout by default
29f227c9 210my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3
BS
211 # refer to this file. This is only used
212 # to make relative urls that point to
213 # other files.
54310121
PP
214my $podfile = ""; # read from stdin by default
215my @podpath = (); # list of directories containing library pods.
216my $podroot = "."; # filesystem base directory from which all
217 # relative paths in $podpath stem.
34db337b 218my $css = ''; # Cascading style sheet
54310121 219my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 220my $quiet = 0; # not quiet by default
54310121
PP
221my $verbose = 0; # not verbose by default
222my $doindex = 1; # non-zero if we should generate an index
0e4548d5 223my $backlink = ''; # text for "back to top" links
54310121 224my $listlevel = 0; # current list depth
2a28b791
GS
225my @listend = (); # the text to use to end the list.
226my $after_lpar = 0; # set to true after a par in an =item
54310121
PP
227my $ignore = 1; # whether or not to format text. we don't
228 # format text until we hit our first pod
229 # directive.
230
231my %items_named = (); # for the multiples of the same item in perlfunc
232my @items_seen = ();
233my $netscape = 0; # whether or not to use netscape directives.
234my $title; # title to give the pod(s)
34db337b 235my $header = 0; # produce block header/footer
54310121
PP
236my $top = 1; # true if we are at the top of the doc. used
237 # to prevent the first <HR> directive.
238my $paragraph; # which paragraph we're processing (used
239 # for error messages)
2a28b791 240my $ptQuote = 0; # status of double-quote conversion
54310121
PP
241my %pages = (); # associative array used to find the location
242 # of pages referenced by L<> links.
243my %sections = (); # sections within this page
244my %items = (); # associative array used to find the location
245 # of =item directives referenced by C<> links
2a28b791 246my %local_items = (); # local items - avoid destruction of %items
39e571d4
ML
247my $Is83; # is dos with short filenames (8.3)
248
54310121 249sub init_globals {
04f720db
JD
250$dircache = "pod2htmd$cache_ext";
251$itemcache = "pod2htmi$cache_ext";
54310121
PP
252
253@begin_stack = (); # begin/end stack
254
255@libpods = (); # files to search for links from C<> directives
256$htmlroot = "/"; # http-server base directory from which all
257 # relative paths in $podpath stem.
67398a75
GS
258$htmldir = ""; # The directory to which the html pages
259 # will (eventually) be written.
54310121
PP
260$htmlfile = ""; # write to stdout by default
261$podfile = ""; # read from stdin by default
262@podpath = (); # list of directories containing library pods.
263$podroot = "."; # filesystem base directory from which all
264 # relative paths in $podpath stem.
34db337b 265$css = ''; # Cascading style sheet
54310121 266$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 267$quiet = 0; # not quiet by default
54310121
PP
268$verbose = 0; # not verbose by default
269$doindex = 1; # non-zero if we should generate an index
0e4548d5 270$backlink = ''; # text for "back to top" links
54310121 271$listlevel = 0; # current list depth
2a28b791
GS
272@listend = (); # the text to use to end the list.
273$after_lpar = 0; # set to true after a par in an =item
54310121
PP
274$ignore = 1; # whether or not to format text. we don't
275 # format text until we hit our first pod
276 # directive.
277
278@items_seen = ();
279%items_named = ();
280$netscape = 0; # whether or not to use netscape directives.
34db337b 281$header = 0; # produce block header/footer
54310121
PP
282$title = ''; # title to give the pod(s)
283$top = 1; # true if we are at the top of the doc. used
284 # to prevent the first <HR> directive.
285$paragraph = ''; # which paragraph we're processing (used
286 # for error messages)
54310121 287%sections = (); # sections within this page
3e3baf6d
TB
288
289# These are not reinitialised here but are kept as a cache.
290# See get_cache and related cache management code.
291#%pages = (); # associative array used to find the location
292 # of pages referenced by L<> links.
293#%items = (); # associative array used to find the location
54310121 294 # of =item directives referenced by C<> links
2a28b791 295%local_items = ();
39e571d4 296$Is83=$^O eq 'dos';
54310121
PP
297}
298
2a28b791
GS
299#
300# clean_data: global clean-up of pod data
301#
302sub clean_data($){
303 my( $dataref ) = @_;
304 my $i;
305 for( $i = 0; $i <= $#$dataref; $i++ ){
306 ${$dataref}[$i] =~ s/\s+\Z//;
307
308 # have a look for all-space lines
309 if( ${$dataref}[$i] =~ /^\s+$/m ){
310 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
311 splice( @$dataref, $i, 1, @chunks );
312 }
313 }
314}
315
316
54310121
PP
317sub pod2html {
318 local(@ARGV) = @_;
319 local($/);
320 local $_;
321
322 init_globals();
323
39e571d4
ML
324 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
325
54310121 326 # cache of %pages and %items from last time we ran pod2html
54310121
PP
327
328 #undef $opt_help if defined $opt_help;
329
330 # parse the command-line parameters
331 parse_command_line();
332
333 # set some variables to their default values if necessary
334 local *POD;
335 unless (@ARGV && $ARGV[0]) {
336 $podfile = "-" unless $podfile; # stdin
337 open(POD, "<$podfile")
338 || die "$0: cannot open $podfile file for input: $!\n";
339 } else {
340 $podfile = $ARGV[0]; # XXX: might be more filenames
341 *POD = *ARGV;
342 }
343 $htmlfile = "-" unless $htmlfile; # stdout
344 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
fe6f1558 345 $htmldir =~ s#/\z## ; # so we don't get a //
29f227c9
BS
346 if ( $htmlroot eq ''
347 && defined( $htmldir )
348 && $htmldir ne ''
349 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
350 )
5a039dd3 351 {
29f227c9
BS
352 # Set the 'base' url for this file, so that we can use it
353 # as the location from which to calculate relative links
354 # to other files. If this is '', then absolute links will
355 # be used throughout.
356 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 357 }
54310121
PP
358
359 # read the pod a paragraph at a time
360 warn "Scanning for sections in input file(s)\n" if $verbose;
361 $/ = "";
362 my @poddata = <POD>;
363 close(POD);
2a28b791 364 clean_data( \@poddata );
54310121
PP
365
366 # scan the pod for =head[1-6] directives and build an index
367 my $index = scan_headings(\%sections, @poddata);
368
3e3baf6d 369 unless($index) {
31e56455 370 warn "No headings in $podfile\n" if $verbose;
3e3baf6d
TB
371 }
372
54310121
PP
373 # open the output file
374 open(HTML, ">$htmlfile")
375 || die "$0: cannot open $htmlfile file for output: $!\n";
376
d011ffae
GS
377 # put a title in the HTML file if one wasn't specified
378 if ($title eq '') {
379 TITLE_SEARCH: {
380 for (my $i = 0; $i < @poddata; $i++) {
381 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
382 for my $para ( @poddata[$i, $i+1] ) {
383 last TITLE_SEARCH
384 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
385 }
386 }
54310121 387
d011ffae
GS
388 }
389 }
390 }
fe6f1558 391 if (!$title and $podfile =~ /\.pod\z/) {
3e3baf6d
TB
392 # probably a split pod so take first =head[12] as title
393 for (my $i = 0; $i < @poddata; $i++) {
394 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
395 }
396 warn "adopted '$title' as title for $podfile\n"
397 if $verbose and $title;
398 }
7b8d334a
GS
399 if ($title) {
400 $title =~ s/\s*\(.*\)//;
401 } else {
34db337b 402 warn "$0: no title for $podfile" unless $quiet;
fe6f1558 403 $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
54310121 404 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 405 warn "using $title" if $verbose;
54310121 406 }
34db337b
JD
407 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
408 $csslink =~ s,\\,/,g;
409 $csslink =~ s,(/.):,$1|,;
410
411 my $block = $header ? <<END_OF_BLOCK : '';
412<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
413<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
414<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
415</TD></TR>
416</TABLE>
417END_OF_BLOCK
418
54310121 419 print HTML <<END_OF_HEAD;
7b8d334a
GS
420<HTML>
421<HEAD>
34db337b 422<TITLE>$title</TITLE>$csslink
7b8d334a
GS
423<LINK REV="made" HREF="mailto:$Config{perladmin}">
424</HEAD>
54310121 425
7b8d334a 426<BODY>
34db337b 427$block
54310121
PP
428END_OF_HEAD
429
3e3baf6d
TB
430 # load/reload/validate/cache %pages and %items
431 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121
PP
432
433 # scan the pod for =item directives
2a28b791 434 scan_items( \%local_items, "", @poddata);
54310121
PP
435
436 # put an index at the top of the file. note, if $doindex is 0 we
437 # still generate an index, but surround it with an html comment.
438 # that way some other program can extract it if desired.
439 $index =~ s/--+/-/g;
2a28b791 440 print HTML "<A NAME=\"__index__\"></A>\n";
54310121
PP
441 print HTML "<!-- INDEX BEGIN -->\n";
442 print HTML "<!--\n" unless $doindex;
443 print HTML $index;
444 print HTML "-->\n" unless $doindex;
445 print HTML "<!-- INDEX END -->\n\n";
31e56455 446 print HTML "<HR>\n" if $doindex and $index;
54310121
PP
447
448 # now convert this file
2a28b791
GS
449 my $after_item; # set to true after an =item
450 warn "Converting input file $podfile\n" if $verbose;
451 foreach my $i (0..$#poddata){
452 $ptQuote = 0; # status of quote conversion
453
54310121
PP
454 $_ = $poddata[$i];
455 $paragraph = $i+1;
456 if (/^(=.*)/s) { # is it a pod directive?
457 $ignore = 0;
2a28b791 458 $after_item = 0;
54310121
PP
459 $_ = $1;
460 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
461 process_begin($1, $2);
462 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
463 process_end($1, $2);
464 } elsif (/^=cut/) { # =cut
465 process_cut();
466 } elsif (/^=pod/) { # =pod
467 process_pod();
468 } else {
469 next if @begin_stack && $begin_stack[-1] ne 'html';
470
7b8d334a 471 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791
GS
472 process_head( $1, $2, $doindex && $index );
473 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
474 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 475 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791
GS
476 process_item( $1 );
477 $after_item = 1;
54310121
PP
478 } elsif (/^=over\s*(.*)/) { # =over N
479 process_over();
480 } elsif (/^=back/) { # =back
481 process_back();
a45bd81d 482 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121
PP
483 process_for($1,$2);
484 } else {
485 /^=(\S*)\s*/;
486 warn "$0: $podfile: unknown pod directive '$1' in "
487 . "paragraph $paragraph. ignoring.\n";
488 }
489 }
490 $top = 0;
491 }
492 else {
493 next if $ignore;
494 next if @begin_stack && $begin_stack[-1] ne 'html';
495 my $text = $_;
2a28b791
GS
496 if( $text =~ /\A\s+/ ){
497 process_pre( \$text );
498 print HTML "<PRE>\n$text</PRE>\n";
499
500 } else {
501 process_text( \$text );
502
503 # experimental: check for a paragraph where all lines
504 # have some ...\t...\t...\n pattern
505 if( $text =~ /\t/ ){
506 my @lines = split( "\n", $text );
507 if( @lines > 1 ){
508 my $all = 2;
509 foreach my $line ( @lines ){
510 if( $line =~ /\S/ && $line !~ /\t/ ){
511 $all--;
512 last if $all == 0;
513 }
514 }
515 if( $all > 0 ){
516 $text =~ s/\t+/<TD>/g;
517 $text =~ s/^/<TR><TD>/gm;
518 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
519 $text . '</TABLE>';
520 }
521 }
522 }
523 ## end of experimental
524
525 if( $after_item ){
526 print HTML "$text\n";
527 $after_lpar = 1;
528 } else {
529 print HTML "<P>$text</P>\n";
530 }
531 }
532 $after_item = 0;
54310121
PP
533 }
534 }
535
536 # finish off any pending directives
537 finish_list();
2a28b791
GS
538
539 # link to page index
0e4548d5
GS
540 print HTML "<P><A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A></P>\n"
541 if $doindex and $index and $backlink;
2a28b791 542
54310121 543 print HTML <<END_OF_TAIL;
34db337b 544$block
7b8d334a 545</BODY>
54310121 546
7b8d334a 547</HTML>
54310121
PP
548END_OF_TAIL
549
550 # close the html file
551 close(HTML);
552
553 warn "Finished\n" if $verbose;
554}
555
556##############################################################################
557
558my $usage; # see below
559sub usage {
560 my $podfile = shift;
561 warn "$0: $podfile: @_\n" if @_;
562 die $usage;
563}
564
565$usage =<<END_OF_USAGE;
566Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
567 --podpath=<name>:...:<name> --podroot=<name>
568 --libpods=<name>:...:<name> --recurse --verbose --index
569 --netscape --norecurse --noindex
570
0e4548d5
GS
571 --backlink - set text for "back to top" links (default: none).
572 --css - stylesheet URL
573 --flush - flushes the item and directory caches.
574 --[no]header - produce block header/footer (default is no headers).
575 --help - prints this message.
576 --htmldir - directory for resulting HTML files.
577 --htmlroot - http-server base directory from which all relative paths
578 in podpath stem (default is /).
579 --[no]index - generate an index at the top of the resulting html
580 (default behaviour).
581 --infile - filename for the pod to convert (input taken from stdin
582 by default).
583 --libpods - colon-separated list of pages to search for =item pod
584 directives in as targets of C<> and implicit links (empty
585 by default). note, these are not filenames, but rather
586 page names like those that appear in L<> links.
587 --[no]netscape - will use netscape html directives when applicable.
588 (default is not to use them).
589 --outfile - filename for the resulting html file (output sent to
590 stdout by default).
591 --podpath - colon-separated list of directories containing library
592 pods (empty by default).
593 --podroot - filesystem base directory from which all relative paths
594 in podpath stem (default is .).
595 --[no]quiet - supress some benign warning messages (default is off).
596 --[no]recurse - recurse on those subdirectories listed in podpath
597 (default behaviour).
598 --title - title that will appear in resulting html file.
599 --[no]verbose - self-explanatory (off by default).
54310121
PP
600
601END_OF_USAGE
602
603sub parse_command_line {
0e4548d5
GS
604 my ($opt_backlink,$opt_css,$opt_flush,$opt_header,$opt_help,$opt_htmldir,
605 $opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,
606 $opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,$opt_recurse,
607 $opt_title,$opt_verbose);
608
34db337b 609 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 610 my $result = GetOptions(
0e4548d5
GS
611 'backlink=s' => \$opt_backlink,
612 'css=s' => \$opt_css,
29f227c9 613 'flush' => \$opt_flush,
0e4548d5 614 'header!' => \$opt_header,
29f227c9
BS
615 'help' => \$opt_help,
616 'htmldir=s' => \$opt_htmldir,
54310121 617 'htmlroot=s' => \$opt_htmlroot,
29f227c9 618 'index!' => \$opt_index,
54310121
PP
619 'infile=s' => \$opt_infile,
620 'libpods=s' => \$opt_libpods,
621 'netscape!' => \$opt_netscape,
622 'outfile=s' => \$opt_outfile,
623 'podpath=s' => \$opt_podpath,
624 'podroot=s' => \$opt_podroot,
0e4548d5 625 'quiet!' => \$opt_quiet,
54310121
PP
626 'recurse!' => \$opt_recurse,
627 'title=s' => \$opt_title,
0e4548d5 628 'verbose!' => \$opt_verbose,
54310121
PP
629 );
630 usage("-", "invalid parameters") if not $result;
631
632 usage("-") if defined $opt_help; # see if the user asked for help
633 $opt_help = ""; # just to make -w shut-up.
634
54310121
PP
635 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
636 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
637
0e4548d5
GS
638 $backlink = $opt_backlink if defined $opt_backlink;
639 $css = $opt_css if defined $opt_css;
640 $header = $opt_header if defined $opt_header;
641 $htmldir = $opt_htmldir if defined $opt_htmldir;
642 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
643 $doindex = $opt_index if defined $opt_index;
644 $podfile = $opt_infile if defined $opt_infile;
645 $netscape = $opt_netscape if defined $opt_netscape;
646 $htmlfile = $opt_outfile if defined $opt_outfile;
647 $podroot = $opt_podroot if defined $opt_podroot;
648 $quiet = $opt_quiet if defined $opt_quiet;
649 $recurse = $opt_recurse if defined $opt_recurse;
650 $title = $opt_title if defined $opt_title;
651 $verbose = $opt_verbose if defined $opt_verbose;
652
54310121
PP
653 warn "Flushing item and directory caches\n"
654 if $opt_verbose && defined $opt_flush;
655 unlink($dircache, $itemcache) if defined $opt_flush;
54310121
PP
656}
657
3e3baf6d
TB
658
659my $saved_cache_key;
660
661sub get_cache {
662 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
663 my @cache_key_args = @_;
664
665 # A first-level cache:
666 # Don't bother reading the cache files if they still apply
667 # and haven't changed since we last read them.
668
669 my $this_cache_key = cache_key(@cache_key_args);
670
671 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
672
673 # load the cache of %pages and %items if possible. $tests will be
674 # non-zero if successful.
675 my $tests = 0;
676 if (-f $dircache && -f $itemcache) {
677 warn "scanning for item cache\n" if $verbose;
678 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
679 }
680
681 # if we didn't succeed in loading the cache then we must (re)build
682 # %pages and %items.
683 if (!$tests) {
684 warn "scanning directories in pod-path\n" if $verbose;
685 scan_podpath($podroot, $recurse, 0);
686 }
687 $saved_cache_key = cache_key(@cache_key_args);
688}
689
690sub cache_key {
691 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
692 return join('!', $dircache, $itemcache, $recurse,
29f227c9 693 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d
TB
694}
695
54310121 696#
3e3baf6d 697# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121
PP
698# are valid caches of %pages and %items. if they are valid then it loads
699# them and returns a non-zero value.
700#
3e3baf6d 701sub load_cache {
54310121
PP
702 my($dircache, $itemcache, $podpath, $podroot) = @_;
703 my($tests);
704 local $_;
705
706 $tests = 0;
707
708 open(CACHE, "<$itemcache") ||
709 die "$0: error opening $itemcache for reading: $!\n";
710 $/ = "\n";
711
712 # is it the same podpath?
713 $_ = <CACHE>;
714 chomp($_);
3e3baf6d 715 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
716
717 # is it the same podroot?
718 $_ = <CACHE>;
719 chomp($_);
720 $tests++ if ($podroot eq $_);
721
722 # load the cache if its good
723 if ($tests != 2) {
724 close(CACHE);
54310121
PP
725 return 0;
726 }
727
728 warn "loading item cache\n" if $verbose;
729 while (<CACHE>) {
730 /(.*?) (.*)$/;
731 $items{$1} = $2;
732 }
733 close(CACHE);
734
735 warn "scanning for directory cache\n" if $verbose;
736 open(CACHE, "<$dircache") ||
737 die "$0: error opening $dircache for reading: $!\n";
738 $/ = "\n";
739 $tests = 0;
740
741 # is it the same podpath?
742 $_ = <CACHE>;
743 chomp($_);
3e3baf6d 744 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
745
746 # is it the same podroot?
747 $_ = <CACHE>;
748 chomp($_);
749 $tests++ if ($podroot eq $_);
750
751 # load the cache if its good
752 if ($tests != 2) {
753 close(CACHE);
54310121
PP
754 return 0;
755 }
756
757 warn "loading directory cache\n" if $verbose;
758 while (<CACHE>) {
759 /(.*?) (.*)$/;
760 $pages{$1} = $2;
761 }
762
763 close(CACHE);
764
765 return 1;
766}
767
768#
769# scan_podpath - scans the directories specified in @podpath for directories,
770# .pod files, and .pm files. it also scans the pod files specified in
771# @libpods for =item directives.
772#
773sub scan_podpath {
3e3baf6d 774 my($podroot, $recurse, $append) = @_;
54310121
PP
775 my($pwd, $dir);
776 my($libpod, $dirname, $pod, @files, @poddata);
777
3e3baf6d
TB
778 unless($append) {
779 %items = ();
780 %pages = ();
781 }
782
54310121
PP
783 # scan each directory listed in @podpath
784 $pwd = getcwd();
785 chdir($podroot)
786 || die "$0: error changing to directory $podroot: $!\n";
787 foreach $dir (@podpath) {
788 scan_dir($dir, $recurse);
789 }
790
791 # scan the pods listed in @libpods for =item directives
792 foreach $libpod (@libpods) {
793 # if the page isn't defined then we won't know where to find it
794 # on the system.
795 next unless defined $pages{$libpod} && $pages{$libpod};
796
797 # if there is a directory then use the .pod and .pm files within it.
29f227c9
BS
798 # NOTE: Only finds the first so-named directory in the tree.
799# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
800 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121
PP
801 # find all the .pod and .pm files within the directory
802 $dirname = $1;
803 opendir(DIR, $dirname) ||
804 die "$0: error opening directory $dirname: $!\n";
fe6f1558 805 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
54310121
PP
806 closedir(DIR);
807
808 # scan each .pod and .pm file for =item directives
809 foreach $pod (@files) {
810 open(POD, "<$dirname/$pod") ||
811 die "$0: error opening $dirname/$pod for input: $!\n";
812 @poddata = <POD>;
813 close(POD);
2a28b791 814 clean_data( \@poddata );
54310121 815
2a28b791 816 scan_items( \%items, "$dirname/$pod", @poddata);
54310121
PP
817 }
818
819 # use the names of files as =item directives too.
2a28b791
GS
820### Don't think this should be done this way - confuses issues.(WL)
821### foreach $pod (@files) {
822### $pod =~ /^(.*)(\.pod|\.pm)$/;
823### $items{$1} = "$dirname/$1.html" if $1;
824### }
54310121
PP
825 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
826 $pages{$libpod} =~ /([^:]*\.pm):/) {
827 # scan the .pod or .pm file for =item directives
828 $pod = $1;
829 open(POD, "<$pod") ||
830 die "$0: error opening $pod for input: $!\n";
831 @poddata = <POD>;
832 close(POD);
2a28b791 833 clean_data( \@poddata );
54310121 834
2a28b791 835 scan_items( \%items, "$pod", @poddata);
54310121
PP
836 } else {
837 warn "$0: shouldn't be here (line ".__LINE__."\n";
838 }
839 }
840 @poddata = (); # clean-up a bit
841
842 chdir($pwd)
843 || die "$0: error changing to directory $pwd: $!\n";
844
845 # cache the item list for later use
846 warn "caching items for later use\n" if $verbose;
847 open(CACHE, ">$itemcache") ||
848 die "$0: error open $itemcache for writing: $!\n";
849
850 print CACHE join(":", @podpath) . "\n$podroot\n";
851 foreach my $key (keys %items) {
852 print CACHE "$key $items{$key}\n";
853 }
854
855 close(CACHE);
856
857 # cache the directory list for later use
858 warn "caching directories for later use\n" if $verbose;
859 open(CACHE, ">$dircache") ||
860 die "$0: error open $dircache for writing: $!\n";
861
862 print CACHE join(":", @podpath) . "\n$podroot\n";
863 foreach my $key (keys %pages) {
864 print CACHE "$key $pages{$key}\n";
865 }
866
867 close(CACHE);
868}
869
870#
871# scan_dir - scans the directory specified in $dir for subdirectories, .pod
872# files, and .pm files. notes those that it finds. this information will
873# be used later in order to figure out where the pages specified in L<>
874# links are on the filesystem.
875#
876sub scan_dir {
877 my($dir, $recurse) = @_;
878 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
879 local $_;
880
881 @subdirs = ();
882 @pods = ();
883
884 opendir(DIR, $dir) ||
885 die "$0: error opening directory $dir: $!\n";
886 while (defined($_ = readdir(DIR))) {
887 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
888 $pages{$_} = "" unless defined $pages{$_};
889 $pages{$_} .= "$dir/$_:";
890 push(@subdirs, $_);
fe6f1558
GS
891 } elsif (/\.pod\z/) { # .pod
892 s/\.pod\z//;
54310121
PP
893 $pages{$_} = "" unless defined $pages{$_};
894 $pages{$_} .= "$dir/$_.pod:";
895 push(@pods, "$dir/$_.pod");
fe6f1558
GS
896 } elsif (/\.pm\z/) { # .pm
897 s/\.pm\z//;
54310121
PP
898 $pages{$_} = "" unless defined $pages{$_};
899 $pages{$_} .= "$dir/$_.pm:";
900 push(@pods, "$dir/$_.pm");
901 }
902 }
903 closedir(DIR);
904
905 # recurse on the subdirectories if necessary
906 if ($recurse) {
907 foreach my $subdir (@subdirs) {
908 scan_dir("$dir/$subdir", $recurse);
909 }
910 }
911}
912
913#
914# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
915# build an index.
916#
917sub scan_headings {
918 my($sections, @data) = @_;
2a28b791 919 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 920
be173d55
UC
921 # here we need local $ignore = 0;
922 # unfortunately, we can't have it, because $ignore is lexical
923 $ignore = 0;
924
54310121
PP
925 $listdepth = 0;
926 $index = "";
927
928 # scan for =head directives, note their name, and build an index
929 # pointing to each of them.
930 foreach my $line (@data) {
bb9460ed 931 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791
GS
932 ($tag, $which_head, $otitle) = ($1,$2,$3);
933
934 my $title = depod( $otitle );
935 my $name = htmlify( $title );
936 $$sections{$name} = 1;
937 $title = process_text( \$otitle );
54310121 938
102c538a
DS
939 while ($which_head != $listdepth) {
940 if ($which_head > $listdepth) {
941 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
942 $listdepth++;
943 } elsif ($which_head < $listdepth) {
944 $listdepth--;
945 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
946 }
54310121 947 }
54310121
PP
948
949 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791
GS
950 "<A HREF=\"#" . $name . "\">" .
951 $title . "</A></LI>";
54310121
PP
952 }
953 }
954
955 # finish off the lists
956 while ($listdepth--) {
957 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
958 }
959
960 # get rid of bogus lists
961 $index =~ s,\t*<UL>\s*</UL>\n,,g;
962
bb9460ed 963 $ignore = 1; # restore old value;
be173d55 964
54310121
PP
965 return $index;
966}
967
968#
969# scan_items - scans the pod specified by $pod for =item directives. we
970# will use this information later on in resolving C<> links.
971#
972sub scan_items {
2a28b791 973 my( $itemref, $pod, @poddata ) = @_;
54310121
PP
974 my($i, $item);
975 local $_;
976
fe6f1558 977 $pod =~ s/\.pod\z//;
54310121
PP
978 $pod .= ".html" if $pod;
979
980 foreach $i (0..$#poddata) {
2a28b791
GS
981 my $txt = depod( $poddata[$i] );
982
983 # figure out what kind of item it is.
984 # Build string for referencing this item.
985 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
986 next unless $1;
987 $item = $1;
988 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
989 $item = $1;
990 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
991 $item = $1;
992 } else {
993 next;
54310121 994 }
2a28b791
GS
995 my $fid = fragment_id( $item );
996 $$itemref{$fid} = "$pod" if $fid;
54310121
PP
997 }
998}
999
1000#
1001# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1002#
1003sub process_head {
2a28b791 1004 my($tag, $heading, $hasindex) = @_;
54310121
PP
1005
1006 # figure out the level of the =head
1007 $tag =~ /head([1-6])/;
1008 my $level = $1;
1009
2a28b791
GS
1010 if( $listlevel ){
1011 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
1012 while( $listlevel ){
1013 process_back();
1014 }
1015 }
1016
1017 print HTML "<P>\n";
1018 if( $level == 1 && ! $top ){
0e4548d5
GS
1019 print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n"
1020 if $hasindex and $backlink;
2a28b791
GS
1021 print HTML "<HR>\n"
1022 }
1023
1024 my $name = htmlify( depod( $heading ) );
1025 my $convert = process_text( \$heading );
1026 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121
PP
1027}
1028
2a28b791 1029
54310121 1030#
2a28b791
GS
1031# emit_item_tag - print an =item's text
1032# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1033#
2a28b791
GS
1034my $EmittedItem;
1035
1036sub emit_item_tag($$$){
1037 my( $otext, $text, $compact ) = @_;
1038 my $item = fragment_id( $text );
54310121 1039
2a28b791
GS
1040 $EmittedItem = $item;
1041 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1042
2a28b791
GS
1043 print HTML '<STRONG>';
1044 if ($items_named{$item}++) {
1045 print HTML process_text( \$otext );
1046 } else {
1047 my $name = 'item_' . $item;
1048 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1049 }
1050 print HTML "</STRONG><BR>\n";
1051 undef( $EmittedItem );
1052}
1053
1054sub emit_li {
1055 my( $tag ) = @_;
1056 if( $items_seen[$listlevel]++ == 0 ){
1057 push( @listend, "</$tag>" );
1058 print HTML "<$tag>\n";
1059 }
1060 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1061}
1062
1063#
1064# process_item - convert a pod item tag and convert it to HTML format.
1065#
1066sub process_item {
1067 my( $otext ) = @_;
54310121
PP
1068
1069 # lots of documents start a list without doing an =over. this is
1070 # bad! but, the proper thing to do seems to be to just assume
1071 # they did do an =over. so warn them once and then continue.
2a28b791
GS
1072 if( $listlevel == 0 ){
1073 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1074 process_over();
1075 }
54310121 1076
2a28b791
GS
1077 # formatting: insert a paragraph if preceding item has >1 paragraph
1078 if( $after_lpar ){
1079 print HTML "<P></P>\n";
1080 $after_lpar = 0;
1081 }
54310121
PP
1082
1083 # remove formatting instructions from the text
2a28b791
GS
1084 my $text = depod( $otext );
1085
1086 # all the list variants:
1087 if( $text =~ /\A\*/ ){ # bullet
1088 emit_li( 'UL' );
1089 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1090 my $tag = $1;
1091 $otext =~ s/\A\*\s+//;
1092 emit_item_tag( $otext, $tag, 1 );
54310121
PP
1093 }
1094
2a28b791
GS
1095 } elsif( $text =~ /\A\d+/ ){ # numbered list
1096 emit_li( 'OL' );
1097 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1098 my $tag = $1;
1099 $otext =~ s/\A\d+\.?\s*//;
1100 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1101 }
54310121 1102
2a28b791
GS
1103 } else { # definition list
1104 emit_li( 'DL' );
1105 if ($text =~ /\A(.+)\Z/s ){ # should have text
1106 emit_item_tag( $otext, $text, 1 );
7b8d334a 1107 }
54310121
PP
1108 print HTML '<DD>';
1109 }
54310121
PP
1110 print HTML "\n";
1111}
1112
1113#
2a28b791 1114# process_over - process a pod over tag and start a corresponding HTML list.
54310121
PP
1115#
1116sub process_over {
1117 # start a new list
1118 $listlevel++;
2a28b791
GS
1119 push( @items_seen, 0 );
1120 $after_lpar = 0;
54310121
PP
1121}
1122
1123#
1124# process_back - process a pod back tag and convert it to HTML format.
1125#
1126sub process_back {
2a28b791
GS
1127 if( $listlevel == 0 ){
1128 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1129 return;
1130 }
54310121
PP
1131
1132 # close off the list. note, I check to see if $listend[$listlevel] is
1133 # defined because an =item directive may have never appeared and thus
1134 # $listend[$listlevel] may have never been initialized.
1135 $listlevel--;
2a28b791
GS
1136 if( defined $listend[$listlevel] ){
1137 print HTML '<P></P>' if $after_lpar;
1138 print HTML $listend[$listlevel];
1139 print HTML "\n";
1140 pop( @listend );
1141 }
1142 $after_lpar = 0;
54310121 1143
2a28b791
GS
1144 # clean up item count
1145 pop( @items_seen );
54310121
PP
1146}
1147
1148#
2a28b791 1149# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121
PP
1150#
1151sub process_cut {
1152 $ignore = 1;
1153}
1154
1155#
2a28b791
GS
1156# process_pod - process a pod pod tag, thus stop ignoring pod directives
1157# until we see a corresponding cut.
54310121
PP
1158#
1159sub process_pod {
1160 # no need to set $ignore to 0 cause the main loop did it
1161}
1162
1163#
2a28b791 1164# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1165# it out verbatim, if illustration, center it, otherwise ignore it.
54310121
PP
1166#
1167sub process_for {
1168 my($whom, $text) = @_;
1169 if ( $whom =~ /^(pod2)?html$/i) {
1170 print HTML $text;
c4d9b39d
IZ
1171 } elsif ($whom =~ /^illustration$/i) {
1172 1 while chomp $text;
1173 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1174 $text .= $ext, last if -r "$text$ext";
1175 }
1176 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1177 }
54310121
PP
1178}
1179
1180#
1181# process_begin - process a =begin pod tag. this pushes
1182# whom we're beginning on the begin stack. if there's a
1183# begin stack, we only print if it us.
1184#
1185sub process_begin {
1186 my($whom, $text) = @_;
1187 $whom = lc($whom);
1188 push (@begin_stack, $whom);
1189 if ( $whom =~ /^(pod2)?html$/) {
1190 print HTML $text if $text;
1191 }
1192}
1193
1194#
1195# process_end - process a =end pod tag. pop the
1196# begin stack. die if we're mismatched.
1197#
1198sub process_end {
1199 my($whom, $text) = @_;
1200 $whom = lc($whom);
1201 if ($begin_stack[-1] ne $whom ) {
1202 die "Unmatched begin/end at chunk $paragraph\n"
1203 }
2a28b791 1204 pop( @begin_stack );
54310121
PP
1205}
1206
1207#
2a28b791 1208# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1209#
2a28b791
GS
1210sub process_pre {
1211 my( $text ) = @_;
1212 my( $rest );
54310121
PP
1213 return if $ignore;
1214
54310121
PP
1215 $rest = $$text;
1216
2a28b791
GS
1217 # insert spaces in place of tabs
1218 $rest =~ s#.*#
be173d55
UC
1219 my $line = $&;
1220 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1221 $line;
1222 #eg;
54310121 1223
2a28b791
GS
1224 # convert some special chars to HTML escapes
1225 $rest =~ s/&/&amp;/g;
1226 $rest =~ s/</&lt;/g;
1227 $rest =~ s/>/&gt;/g;
1228 $rest =~ s/"/&quot;/g;
1229
1230 # try and create links for all occurrences of perl.* within
1231 # the preformatted text.
1232 $rest =~ s{
1233 (\s*)(perl\w+)
1234 }{
1235 if ( defined $pages{$2} ){ # is a link
1236 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1237 } elsif (defined $pages{dosify($2)}) { # is a link
1238 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1239 } else {
1240 "$1$2";
1241 }
1242 }xeg;
1243 $rest =~ s{
1244 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1245 }{
1246 my $url ;
1247 if ( $htmlfileurl ne '' ){
1248 # Here, we take advantage of the knowledge
1249 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1250 # Since $htmlroot eq '', we need to prepend $htmldir
1251 # on the fron of the link to get the absolute path
1252 # of the link's target. We check for a leading '/'
1253 # to avoid corrupting links that are #, file:, etc.
1254 my $old_url = $3 ;
1255 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1256 $url = relativize_url( "$old_url.html", $htmlfileurl );
1257 } else {
1258 $url = "$3.html" ;
1259 }
1260 "$1$url" ;
1261 }xeg;
1262
1263 # Look for embedded URLs and make them into links. We don't
1264 # relativize them since they are best left as the author intended.
1265
1266 my $urls = '(' . join ('|', qw{
54310121
PP
1267 http
1268 telnet
1269 mailto
1270 news
1271 gopher
1272 file
1273 wais
1274 ftp
1275 } )
1276 . ')';
1277
2a28b791
GS
1278 my $ltrs = '\w';
1279 my $gunk = '/#~:.?+=&%@!\-';
1280 my $punc = '.:?\-';
1281 my $any = "${ltrs}${gunk}${punc}";
54310121 1282
2a28b791 1283 $rest =~ s{
54310121
PP
1284 \b # start at word boundary
1285 ( # begin $1 {
29f227c9
BS
1286 $urls : # need resource and a colon
1287 (?!:) # Ignore File::, among others.
54310121
PP
1288 [$any] +? # followed by on or more
1289 # of any valid character, but
1290 # be conservative and take only
1291 # what you need to....
1292 ) # end $1 }
1293 (?= # look-ahead non-consumptive assertion
1294 [$punc]* # either 0 or more puntuation
1295 [^$any] # followed by a non-url char
1296 | # or else
1297 $ # then end of the string
1298 )
1299 }{<A HREF="$1">$1</A>}igox;
1300
2a28b791
GS
1301 # text should be as it is (verbatim)
1302 $$text = $rest;
1303}
54310121 1304
54310121 1305
2a28b791
GS
1306#
1307# pure text processing
1308#
1309# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1310# we don't want this to happen within IS
1311#
1312sub pure_text($){
1313 my $text = shift();
1314 process_puretext( $text, \$ptQuote, 1 );
54310121
PP
1315}
1316
2a28b791
GS
1317sub inIS_text($){
1318 my $text = shift();
1319 process_puretext( $text, \$ptQuote, 0 );
1320}
54310121
PP
1321
1322#
1323# process_puretext - process pure text (without pod-escapes) converting
1324# double-quotes and handling implicit C<> links.
1325#
1326sub process_puretext {
2a28b791 1327 my($text, $quote, $notinIS) = @_;
54310121 1328
2a28b791
GS
1329 ## Guessing at func() or [$@%&]*var references in plain text is destined
1330 ## to produce some strange looking ref's. uncomment to disable:
1331 ## $notinIS = 0;
1332
1333 my(@words, $lead, $trail);
54310121 1334
2a28b791
GS
1335 # convert double-quotes to single-quotes
1336 if( $$quote && $text =~ s/"/''/s ){
1337 $$quote = 0;
1338 }
1339 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1340 $$quote = 1 if $text =~ s/"/``/s;
54310121
PP
1341
1342 # keep track of leading and trailing white-space
2a28b791
GS
1343 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1344 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1345
2a28b791
GS
1346 # split at space/non-space boundaries
1347 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121
PP
1348
1349 # process each word individually
1350 foreach my $word (@words) {
2a28b791
GS
1351 # skip space runs
1352 next if $word =~ /^\s*$/;
54310121 1353 # see if we can infer a link
02369fa5 1354 if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
54310121 1355 # has parenthesis so should have been a C<> ref
2a28b791 1356 ## try for a pagename (perlXXX(1))?
02369fa5
GS
1357 my( $func, $args ) = ( $1, $2 );
1358 if( $args =~ /^\d+$/ ){
2a28b791
GS
1359 my $url = page_sect( $word, '' );
1360 if( defined $url ){
1361 $word = "<A HREF=\"$url\">the $word manpage</A>";
1362 next;
1363 }
1364 }
02369fa5
GS
1365 ## try function name for a link, append tt'ed argument list
1366 $word = emit_C( $func, '', "($args)");
2a28b791
GS
1367
1368#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1369## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1370## # perl variables, should be a C<> ref
1371## $word = emit_C( $word );
1372
54310121
PP
1373 } elsif ($word =~ m,^\w+://\w,) {
1374 # looks like a URL
5a039dd3 1375 # Don't relativize it: leave it as the author intended
54310121 1376 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1377 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1378 # looks like an e-mail address
7b8d334a
GS
1379 my ($w1, $w2, $w3) = ("", $word, "");
1380 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1381 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1382 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1383 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1384 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1385 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1386 } else {
7b8d334a 1387 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1388 }
1389 }
1390
2a28b791
GS
1391 # put everything back together
1392 return $lead . join( '', @words ) . $trail;
1393}
1394
54310121 1395
2a28b791
GS
1396#
1397# process_text - handles plaintext that appears in the input pod file.
1398# there may be pod commands embedded within the text so those must be
1399# converted to html commands.
1400#
7ba65c74 1401
02369fa5 1402sub process_text1($$;$);
7ba65c74 1403
2a28b791
GS
1404sub process_text {
1405 return if $ignore;
1406 my( $tref ) = @_;
1407 my $res = process_text1( 0, $tref );
1408 $$tref = $res;
1409}
1410
02369fa5
GS
1411sub process_text1($$;$){
1412 my( $lev, $rstr, $func ) = @_;
2a28b791
GS
1413 $lev++ unless defined $func;
1414 my $res = '';
1415
228a48a5 1416 $func ||= '';
2a28b791
GS
1417 if( $func eq 'B' ){
1418 # B<text> - boldface
1419 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1420
1421 } elsif( $func eq 'C' ){
1422 # C<code> - can be a ref or <CODE></CODE>
1423 # need to extract text
02369fa5 1424 my $par = go_ahead( $rstr, 'C' );
2a28b791
GS
1425
1426 ## clean-up of the link target
1427 my $text = depod( $par );
1428
1429 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1430 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1431
1432 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1433
1434 } elsif( $func eq 'E' ){
1435 # E<x> - convert to character
1436 $$rstr =~ s/^(\w+)>//;
1437 $res = "&$1;";
1438
1439 } elsif( $func eq 'F' ){
1440 # F<filename> - italizice
1441 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1442
1443 } elsif( $func eq 'I' ){
1444 # I<text> - italizice
1445 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1446
1447 } elsif( $func eq 'L' ){
1448 # L<link> - link
1449 ## L<text|cross-ref> => produce text, use cross-ref for linking
1450 ## L<cross-ref> => make text from cross-ref
1451 ## need to extract text
02369fa5 1452 my $par = go_ahead( $rstr, 'L' );
2a28b791
GS
1453
1454 # some L<>'s that shouldn't be:
1455 # a) full-blown URL's are emitted as-is
1456 if( $par =~ m{^\w+://}s ){
1457 return make_URL_href( $par );
1458 }
1459 # b) C<...> is stripped and treated as C<>
1460 if( $par =~ /^C<(.*)>$/ ){
1461 my $text = depod( $1 );
1462 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1463 }
1464
1465 # analyze the contents
1466 $par =~ s/\n/ /g; # undo word-wrapped tags
1467 my $opar = $par;
1468 my $linktext;
1469 if( $par =~ s{^([^|]+)\|}{} ){
1470 $linktext = $1;
1471 }
1472
1473 # make sure sections start with a /
1474 $par =~ s{^"}{/"};
1475
1476 my( $page, $section, $ident );
1477
1478 # check for link patterns
1479 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1480 # we've got a name/ident (no quotes)
1481 ( $page, $ident ) = ( $1, $2 );
1482 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1483
1484 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1485 # even though this should be a "section", we go for ident first
1486 ( $page, $ident ) = ( $1, $2 );
1487 ### print STDERR "--> L<$par> to page $page, section $section\n";
1488
1489 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1490 ( $page, $section ) = ( '', $par );
1491 ### print STDERR "--> L<$par> to void page, section $section\n";
1492
1493 } else {
1494 ( $page, $section ) = ( $par, '' );
1495 ### print STDERR "--> L<$par> to page $par, void section\n";
1496 }
1497
1498 # now, either $section or $ident is defined. the convoluted logic
1499 # below tries to resolve L<> according to what the user specified.
1500 # failing this, we try to find the next best thing...
1501 my( $url, $ltext, $fid );
1502
1503 RESOLVE: {
1504 if( defined $ident ){
1505 ## try to resolve $ident as an item
1506 ( $url, $fid ) = coderef( $page, $ident );
1507 if( $url ){
1508 if( ! defined( $linktext ) ){
1509 $linktext = $ident;
1510 $linktext .= " in " if $ident && $page;
1511 $linktext .= "the $page manpage" if $page;
1512 }
1513 ### print STDERR "got coderef url=$url\n";
1514 last RESOLVE;
1515 }
1516 ## no luck: go for a section (auto-quoting!)
1517 $section = $ident;
1518 }
1519 ## now go for a section
1520 my $htmlsection = htmlify( $section );
1521 $url = page_sect( $page, $htmlsection );
1522 if( $url ){
1523 if( ! defined( $linktext ) ){
1524 $linktext = $section;
1525 $linktext .= " in " if $section && $page;
1526 $linktext .= "the $page manpage" if $page;
1527 }
1528 ### print STDERR "got page/section url=$url\n";
1529 last RESOLVE;
1530 }
1531 ## no luck: go for an ident
1532 if( $section ){
1533 $ident = $section;
1534 } else {
1535 $ident = $page;
1536 $page = undef();
1537 }
1538 ( $url, $fid ) = coderef( $page, $ident );
1539 if( $url ){
1540 if( ! defined( $linktext ) ){
1541 $linktext = $ident;
1542 $linktext .= " in " if $ident && $page;
1543 $linktext .= "the $page manpage" if $page;
1544 }
1545 ### print STDERR "got section=>coderef url=$url\n";
1546 last RESOLVE;
1547 }
1548
1549 # warning; show some text.
1550 $linktext = $opar unless defined $linktext;
1551 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1552 }
1553
1554 # now we have an URL or just plain code
1555 $$rstr = $linktext . '>' . $$rstr;
1556 if( defined( $url ) ){
1557 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1558 } else {
1559 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1560 }
1561
1562 } elsif( $func eq 'S' ){
1563 # S<text> - non-breaking spaces
1564 $res = process_text1( $lev, $rstr );
1565 $res =~ s/ /&nbsp;/g;
1566
1567 } elsif( $func eq 'X' ){
1568 # X<> - ignore
1569 $$rstr =~ s/^[^>]*>//;
1570
1571 } elsif( $func eq 'Z' ){
1572 # Z<> - empty
1573 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1574 unless $$rstr =~ s/^>//;
1575
1576 } else {
02369fa5 1577 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
2a28b791 1578 # all others: either recurse into new function or
02369fa5 1579 # terminate at closing angle bracket
2a28b791 1580 my $pt = $1;
02369fa5 1581 $pt .= '>' if $2 eq '>' && $lev == 1;
2a28b791 1582 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
02369fa5
GS
1583 return $res if $2 eq '>' && $lev > 1;
1584 if( $2 ne '>' ){
1585 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
2a28b791
GS
1586 }
1587
1588 }
1589 if( $lev == 1 ){
1590 $res .= pure_text( $$rstr );
54310121 1591 } else {
2a28b791 1592 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121
PP
1593 }
1594 }
2a28b791
GS
1595 return $res;
1596}
54310121 1597
2a28b791
GS
1598#
1599# go_ahead: extract text of an IS (can be nested)
1600#
02369fa5
GS
1601sub go_ahead($$){
1602 my( $rstr, $func ) = @_;
2a28b791 1603 my $res = '';
02369fa5
GS
1604 my $level = 1;
1605 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
2a28b791 1606 $res .= $1;
02369fa5
GS
1607 if( $2 eq '>' ){
1608 return $res if --$level == 0;
2a28b791 1609 } else {
02369fa5 1610 ++$level;
2a28b791
GS
1611 }
1612 $res .= $2;
1613 }
1614 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1615 return $res;
54310121
PP
1616}
1617
1618#
2a28b791
GS
1619# emit_C - output result of C<text>
1620# $text is the depod-ed text
54310121 1621#
02369fa5
GS
1622sub emit_C($;$$){
1623 my( $text, $nocode, $args ) = @_;
1624 $args ||= '';
2a28b791
GS
1625 my $res;
1626 my( $url, $fid ) = coderef( undef(), $text );
1627
1628 # need HTML-safe text
02369fa5 1629 my $linktext = html_escape( "$text$args" );
2a28b791
GS
1630
1631 if( defined( $url ) &&
1632 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1633 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1634 } elsif( 0 && $nocode ){
1635 $res = $linktext;
1636 } else {
1637 $res = "<CODE>$linktext</CODE>";
1638 }
1639 return $res;
54310121
PP
1640}
1641
1642#
2a28b791
GS
1643# html_escape: make text safe for HTML
1644#
1645sub html_escape {
1646 my $rest = $_[0];
1647 $rest =~ s/&/&amp;/g;
1648 $rest =~ s/</&lt;/g;
1649 $rest =~ s/>/&gt;/g;
1650 $rest =~ s/"/&quot;/g;
1651 return $rest;
1652}
1653
1654
1655#
39e571d4
ML
1656# dosify - convert filenames to 8.3
1657#
1658sub dosify {
1659 my($str) = @_;
fe4c6be1 1660 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4
ML
1661 if ($Is83) {
1662 $str = lc $str;
1663 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1664 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1665 }
1666 return $str;
1667}
1668
1669#
2a28b791 1670# page_sect - make an URL from the text of a L<>
54310121 1671#
2a28b791
GS
1672sub page_sect($$) {
1673 my( $page, $section ) = @_;
1674 my( $linktext, $page83, $link); # work strings
1675
1676 # check if we know that this is a section in this page
1677 if (!defined $pages{$page} && defined $sections{$page}) {
1678 $section = $page;
1679 $page = "";
1680 ### print STDERR "reset page='', section=$section\n";
54310121
PP
1681 }
1682
39e571d4
ML
1683 $page83=dosify($page);
1684 $page=$page83 if (defined $pages{$page83});
54310121 1685 if ($page eq "") {
2a28b791 1686 $link = "#" . htmlify( $section );
350ccacd 1687 } elsif ( $page =~ /::/ ) {
350ccacd 1688 $page =~ s,::,/,g;
29f227c9
BS
1689 # Search page cache for an entry keyed under the html page name,
1690 # then look to see what directory that page might be in. NOTE:
1691 # this will only find one page. A better solution might be to produce
1692 # an intermediate page that is an index to all such pages.
1693 my $page_name = $page ;
fe6f1558 1694 $page_name =~ s,^.*/,,s ;
29f227c9
BS
1695 if ( defined( $pages{ $page_name } ) &&
1696 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1697 ) {
1698 $page = $1 ;
1699 }
1700 else {
1701 # NOTE: This branch assumes that all A::B pages are located in
1702 # $htmlroot/A/B.html . This is often incorrect, since they are
1703 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1704 # analyze the contents of %pages and figure out where any
1705 # cousins of A::B are, then assume that. So, if A::B isn't found,
1706 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1707 # lib/A/B.pm. This is also limited, but it's an improvement.
1708 # Maybe a hints file so that the links point to the correct places
2a28b791
GS
1709 # nonetheless?
1710
29f227c9 1711 }
350ccacd 1712 $link = "$htmlroot/$page.html";
2a28b791 1713 $link .= "#" . htmlify( $section ) if ($section);
54310121 1714 } elsif (!defined $pages{$page}) {
54310121 1715 $link = "";
54310121 1716 } else {
2a28b791
GS
1717 $section = htmlify( $section ) if $section ne "";
1718 ### print STDERR "...section=$section\n";
54310121
PP
1719
1720 # if there is a directory by the name of the page, then assume that an
1721 # appropriate section will exist in the subdirectory
29f227c9
BS
1722# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1723 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1724 $link = "$htmlroot/$1/$section.html";
2a28b791 1725 ### print STDERR "...link=$link\n";
54310121
PP
1726
1727 # since there is no directory by the name of the page, the section will
1728 # have to exist within a .html of the same name. thus, make sure there
1729 # is a .pod or .pm that might become that .html
1730 } else {
2a28b791
GS
1731 $section = "#$section" if $section;
1732 ### print STDERR "...section=$section\n";
1733
54310121
PP
1734 # check if there is a .pod with the page name
1735 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1736 $link = "$htmlroot/$1.html$section";
1737 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1738 $link = "$htmlroot/$1.html$section";
1739 } else {
54310121 1740 $link = "";
54310121
PP
1741 }
1742 }
1743 }
1744
54310121 1745 if ($link) {
29f227c9
BS
1746 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1747 # implies $htmlroot eq ''. This means that the link in question
1748 # needs a prefix of $htmldir if it begins with '/'. The test for
1749 # the initial '/' is done to avoid '#'-only links, and to allow
1750 # for other kinds of links, like file:, ftp:, etc.
1751 my $url ;
1752 if ( $htmlfileurl ne '' ) {
fe6f1558 1753 $link = "$htmldir$link" if $link =~ m{^/}s;
2a28b791
GS
1754 $url = relativize_url( $link, $htmlfileurl );
1755# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9
BS
1756 }
1757 else {
1758 $url = $link ;
1759 }
2a28b791 1760 return $url;
29f227c9 1761
54310121 1762 } else {
2a28b791 1763 return undef();
54310121 1764 }
54310121
PP
1765}
1766
1767#
29f227c9
BS
1768# relativize_url - convert an absolute URL to one relative to a base URL.
1769# Assumes both end in a filename.
1770#
1771sub relativize_url {
1772 my ($dest,$source) = @_ ;
1773
1774 my ($dest_volume,$dest_directory,$dest_file) =
1775 File::Spec::Unix->splitpath( $dest ) ;
1776 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1777
1778 my ($source_volume,$source_directory,$source_file) =
1779 File::Spec::Unix->splitpath( $source ) ;
1780 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1781
1782 my $rel_path = '' ;
1783 if ( $dest ne '' ) {
1784 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1785 }
1786
1787 if ( $rel_path ne '' &&
1788 substr( $rel_path, -1 ) ne '/' &&
1789 substr( $dest_file, 0, 1 ) ne '#'
1790 ) {
1791 $rel_path .= "/$dest_file" ;
1792 }
1793 else {
1794 $rel_path .= "$dest_file" ;
1795 }
1796
1797 return $rel_path ;
1798}
1799
54310121
PP
1800
1801#
2a28b791 1802# coderef - make URL from the text of a C<>
54310121 1803#
2a28b791
GS
1804sub coderef($$){
1805 my( $page, $item ) = @_;
1806 my( $url );
1807
1808 my $fid = fragment_id( $item );
2a28b791
GS
1809 if( defined( $page ) ){
1810 # we have been given a $page...
1811 $page =~ s{::}{/}g;
1812
1813 # Do we take it? Item could be a section!
228a48a5 1814 my $base = $items{$fid} || "";
2a28b791
GS
1815 $base =~ s{[^/]*/}{};
1816 if( $base ne "$page.html" ){
1817 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1818 $page = undef();
1819 }
54310121 1820
2a28b791
GS
1821 } else {
1822 # no page - local items precede cached items
67398a75
GS
1823 if( defined( $fid ) ){
1824 if( exists $local_items{$fid} ){
1825 $page = $local_items{$fid};
1826 } else {
1827 $page = $items{$fid};
1828 }
2a28b791
GS
1829 }
1830 }
54310121
PP
1831
1832 # if there was a pod file that we found earlier with an appropriate
1833 # =item directive, then create a link to that page.
2a28b791
GS
1834 if( defined $page ){
1835 if( $page ){
228a48a5 1836 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1837 $page = $1 . '.html';
29f227c9 1838 }
2a28b791 1839 my $link = "$htmlroot/$page#item_$fid";
54310121 1840
2a28b791
GS
1841 # Here, we take advantage of the knowledge that $htmlfileurl
1842 # ne '' implies $htmlroot eq ''.
1843 if ( $htmlfileurl ne '' ) {
1844 $link = "$htmldir$link" ;
1845 $url = relativize_url( $link, $htmlfileurl ) ;
1846 } else {
1847 $url = $link ;
1848 }
1849 } else {
1850 $url = "#item_" . $fid;
1851 }
54310121 1852
2a28b791
GS
1853 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1854 }
1855 return( $url, $fid );
54310121
PP
1856}
1857
54310121
PP
1858
1859
1860#
29f227c9
BS
1861# Adapted from Nick Ing-Simmons' PodToHtml package.
1862sub relative_url {
1863 my $source_file = shift ;
1864 my $destination_file = shift;
1865
1866 my $source = URI::file->new_abs($source_file);
1867 my $uo = URI::file->new($destination_file,$source)->abs;
1868 return $uo->rel->as_string;
1869}
1870
1871
1872#
54310121
PP
1873# finish_list - finish off any pending HTML lists. this should be called
1874# after the entire pod file has been read and converted.
1875#
1876sub finish_list {
7b8d334a 1877 while ($listlevel > 0) {
54310121
PP
1878 print HTML "</DL>\n";
1879 $listlevel--;
1880 }
1881}
1882
1883#
1884# htmlify - converts a pod section specification to a suitable section
2a28b791
GS
1885# specification for HTML. Note that we keep spaces and special characters
1886# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121
PP
1887#
1888sub htmlify {
2a28b791
GS
1889 my( $heading) = @_;
1890 $heading =~ s/(\s+)/ /g;
1891 $heading =~ s/\s+\Z//;
1892 $heading =~ s/\A\s+//;
1893 # The hyphen is a disgrace to the English language.
1894 $heading =~ s/[-"?]//g;
1895 $heading = lc( $heading );
1896 return $heading;
1897}
54310121 1898
2a28b791
GS
1899#
1900# depod - convert text by eliminating all interior sequences
1901# Note: can be called with copy or modify semantics
1902#
1903my %E2c;
67398a75
GS
1904$E2c{lt} = '<';
1905$E2c{gt} = '>';
1906$E2c{sol} = '/';
2a28b791 1907$E2c{verbar} = '|';
67398a75 1908$E2c{amp} = '&'; # in Tk's pods
2a28b791 1909
02369fa5 1910sub depod1($;$);
7ba65c74 1911
2a28b791
GS
1912sub depod($){
1913 my $string;
1914 if( ref( $_[0] ) ){
1915 $string = ${$_[0]};
1916 ${$_[0]} = depod1( \$string );
1917 } else {
1918 $string = $_[0];
1919 depod1( \$string );
1920 }
1921}
54310121 1922
02369fa5
GS
1923sub depod1($;$){
1924 my( $rstr, $func ) = @_;
2a28b791 1925 my $res = '';
228a48a5 1926 return $res unless defined $$rstr;
2a28b791
GS
1927 if( ! defined( $func ) ){
1928 # skip to next begin of an interior sequence
02369fa5 1929 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
2a28b791 1930 # recurse into its text
02369fa5 1931 $res .= $1 . depod1( $rstr, $2 );
2a28b791
GS
1932 }
1933 $res .= $$rstr;
1934 } elsif( $func eq 'E' ){
1935 # E<x> - convert to character
1936 $$rstr =~ s/^(\w+)>//;
228a48a5 1937 $res .= $E2c{$1} || "";
2a28b791
GS
1938 } elsif( $func eq 'X' ){
1939 # X<> - ignore
1940 $$rstr =~ s/^[^>]*>//;
1941 } elsif( $func eq 'Z' ){
1942 # Z<> - empty
1943 $$rstr =~ s/^>//;
1944 } else {
1945 # all others: either recurse into new function or
1946 # terminate at closing angle bracket
02369fa5 1947 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
2a28b791 1948 $res .= $1;
02369fa5
GS
1949 last if $2 eq '>';
1950 $res .= depod1( $rstr, substr($2,0,1) );
2a28b791
GS
1951 }
1952 ## If we're here and $2 ne '>': undelimited interior sequence.
1953 ## Ignored, as this is called without proper indication of where we are.
1954 ## Rely on process_text to produce diagnostics.
1955 }
1956 return $res;
1957}
54310121 1958
2a28b791
GS
1959#
1960# fragment_id - construct a fragment identifier from:
1961# a) =item text
1962# b) contents of C<...>
1963#
1964my @hc;
1965sub fragment_id {
1966 my $text = shift();
1967 $text =~ s/\s+\Z//s;
1968 if( $text ){
1969 # a method or function?
1970 return $1 if $text =~ /(\w+)\s*\(/;
1971 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1972
1973 # a variable name?
1974 return $1 if $text =~ /^([$@%*]\S+)/;
1975
1976 # some pattern matching operator?
1977 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1978
1979 # fancy stuff... like "do { }"
1980 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1981
1982 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1983 # and some funnies with ... Module ...
1984 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1985 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1986
1987 # text? normalize!
1988 $text =~ s/\s+/_/sg;
1989 $text =~ s{(\W)}{
1990 defined( $hc[ord($1)] ) ? $hc[ord($1)]
1991 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1992 $text = substr( $text, 0, 50 );
1993 } else {
1994 return undef();
1995 }
54310121
PP
1996}
1997
2a28b791
GS
1998#
1999# make_URL_href - generate HTML href from URL
2000# Special treatment for CGI queries.
2001#
2002sub make_URL_href($){
2003 my( $url ) = @_;
2004 if( $url !~
228a48a5 2005 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791
GS
2006 $url = "<A HREF=\"$url\">$url</A>";
2007 }
2008 return $url;
54310121
PP
2009}
2010
20111;