This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
backout change#2811 and add newer version based on File::Spec
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121
PP
1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
29f227c9 8$VERSION = 1.02;
54310121
PP
9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288
FK
15use locale; # make \w work right in non-ASCII lands
16
54310121
PP
17use strict;
18
7b8d334a
GS
19use Config;
20
54310121
PP
21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121
PP
24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121
PP
28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3
BS
48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9
BS
53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121
PP
57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
140=item verbose
141
142 --verbose
143
144Display progress messages.
145
146=back
147
148=head1 EXAMPLE
149
150 pod2html("pod2html",
151 "--podpath=lib:ext:pod:vms",
152 "--podroot=/usr/src/perl",
153 "--htmlroot=/perl/nmanual",
154 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
155 "--recurse",
156 "--infile=foo.pod",
157 "--outfile=/perl/nmanual/foo.html");
158
159=head1 AUTHOR
160
161Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
162
163=head1 BUGS
164
165Has trouble with C<> etc in = commands.
166
167=head1 SEE ALSO
168
169L<perlpod>
170
171=head1 COPYRIGHT
172
173This program is distributed under the Artistic License.
174
175=cut
176
177my $dircache = "pod2html-dircache";
178my $itemcache = "pod2html-itemcache";
179
180my @begin_stack = (); # begin/end stack
181
29f227c9
BS
182my @libpods = (); # files to search for links from C<> directives
183my $htmlroot = "/"; # http-server base directory from which all
54310121 184 # relative paths in $podpath stem.
5a039dd3
BS
185my $htmldir = ""; # The directory to which the html pages
186 # will (eventually) be written.
54310121 187my $htmlfile = ""; # write to stdout by default
29f227c9 188my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3
BS
189 # refer to this file. This is only used
190 # to make relative urls that point to
191 # other files.
54310121
PP
192my $podfile = ""; # read from stdin by default
193my @podpath = (); # list of directories containing library pods.
194my $podroot = "."; # filesystem base directory from which all
195 # relative paths in $podpath stem.
196my $recurse = 1; # recurse on subdirectories in $podpath.
197my $verbose = 0; # not verbose by default
198my $doindex = 1; # non-zero if we should generate an index
199my $listlevel = 0; # current list depth
200my @listitem = (); # stack of HTML commands to use when a =item is
201 # encountered. the top of the stack is the
202 # current list.
203my @listdata = (); # similar to @listitem, but for the text after
204 # an =item
205my @listend = (); # similar to @listitem, but the text to use to
206 # end the list.
207my $ignore = 1; # whether or not to format text. we don't
208 # format text until we hit our first pod
209 # directive.
210
211my %items_named = (); # for the multiples of the same item in perlfunc
212my @items_seen = ();
213my $netscape = 0; # whether or not to use netscape directives.
214my $title; # title to give the pod(s)
215my $top = 1; # true if we are at the top of the doc. used
216 # to prevent the first <HR> directive.
217my $paragraph; # which paragraph we're processing (used
218 # for error messages)
219my %pages = (); # associative array used to find the location
220 # of pages referenced by L<> links.
221my %sections = (); # sections within this page
222my %items = (); # associative array used to find the location
223 # of =item directives referenced by C<> links
39e571d4
ML
224my $Is83; # is dos with short filenames (8.3)
225
54310121
PP
226sub init_globals {
227$dircache = "pod2html-dircache";
228$itemcache = "pod2html-itemcache";
229
230@begin_stack = (); # begin/end stack
231
232@libpods = (); # files to search for links from C<> directives
233$htmlroot = "/"; # http-server base directory from which all
234 # relative paths in $podpath stem.
235$htmlfile = ""; # write to stdout by default
236$podfile = ""; # read from stdin by default
237@podpath = (); # list of directories containing library pods.
238$podroot = "."; # filesystem base directory from which all
239 # relative paths in $podpath stem.
240$recurse = 1; # recurse on subdirectories in $podpath.
241$verbose = 0; # not verbose by default
242$doindex = 1; # non-zero if we should generate an index
243$listlevel = 0; # current list depth
244@listitem = (); # stack of HTML commands to use when a =item is
245 # encountered. the top of the stack is the
246 # current list.
247@listdata = (); # similar to @listitem, but for the text after
248 # an =item
249@listend = (); # similar to @listitem, but the text to use to
250 # end the list.
251$ignore = 1; # whether or not to format text. we don't
252 # format text until we hit our first pod
253 # directive.
254
255@items_seen = ();
256%items_named = ();
257$netscape = 0; # whether or not to use netscape directives.
258$title = ''; # title to give the pod(s)
259$top = 1; # true if we are at the top of the doc. used
260 # to prevent the first <HR> directive.
261$paragraph = ''; # which paragraph we're processing (used
262 # for error messages)
54310121 263%sections = (); # sections within this page
3e3baf6d
TB
264
265# These are not reinitialised here but are kept as a cache.
266# See get_cache and related cache management code.
267#%pages = (); # associative array used to find the location
268 # of pages referenced by L<> links.
269#%items = (); # associative array used to find the location
54310121 270 # of =item directives referenced by C<> links
39e571d4 271$Is83=$^O eq 'dos';
54310121
PP
272}
273
274sub pod2html {
275 local(@ARGV) = @_;
276 local($/);
277 local $_;
278
279 init_globals();
280
39e571d4
ML
281 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
282
54310121 283 # cache of %pages and %items from last time we ran pod2html
54310121
PP
284
285 #undef $opt_help if defined $opt_help;
286
287 # parse the command-line parameters
288 parse_command_line();
289
290 # set some variables to their default values if necessary
291 local *POD;
292 unless (@ARGV && $ARGV[0]) {
293 $podfile = "-" unless $podfile; # stdin
294 open(POD, "<$podfile")
295 || die "$0: cannot open $podfile file for input: $!\n";
296 } else {
297 $podfile = $ARGV[0]; # XXX: might be more filenames
298 *POD = *ARGV;
299 }
300 $htmlfile = "-" unless $htmlfile; # stdout
301 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9
BS
302 $htmldir =~ s#/$## ; # so we don't get a //
303 if ( $htmlroot eq ''
304 && defined( $htmldir )
305 && $htmldir ne ''
306 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
307 )
5a039dd3 308 {
29f227c9
BS
309 # Set the 'base' url for this file, so that we can use it
310 # as the location from which to calculate relative links
311 # to other files. If this is '', then absolute links will
312 # be used throughout.
313 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 314 }
54310121
PP
315
316 # read the pod a paragraph at a time
317 warn "Scanning for sections in input file(s)\n" if $verbose;
318 $/ = "";
319 my @poddata = <POD>;
320 close(POD);
321
322 # scan the pod for =head[1-6] directives and build an index
323 my $index = scan_headings(\%sections, @poddata);
324
3e3baf6d
TB
325 unless($index) {
326 warn "No pod in $podfile\n" if $verbose;
327 return;
328 }
329
54310121
PP
330 # open the output file
331 open(HTML, ">$htmlfile")
332 || die "$0: cannot open $htmlfile file for output: $!\n";
333
d011ffae
GS
334 # put a title in the HTML file if one wasn't specified
335 if ($title eq '') {
336 TITLE_SEARCH: {
337 for (my $i = 0; $i < @poddata; $i++) {
338 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
339 for my $para ( @poddata[$i, $i+1] ) {
340 last TITLE_SEARCH
341 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
342 }
343 }
54310121 344
d011ffae
GS
345 }
346 }
347 }
3e3baf6d
TB
348 if (!$title and $podfile =~ /\.pod$/) {
349 # probably a split pod so take first =head[12] as title
350 for (my $i = 0; $i < @poddata; $i++) {
351 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
352 }
353 warn "adopted '$title' as title for $podfile\n"
354 if $verbose and $title;
355 }
7b8d334a
GS
356 if ($title) {
357 $title =~ s/\s*\(.*\)//;
358 } else {
3e3baf6d 359 warn "$0: no title for $podfile";
54310121
PP
360 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
361 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 362 warn "using $title" if $verbose;
54310121
PP
363 }
364 print HTML <<END_OF_HEAD;
7b8d334a
GS
365<HTML>
366<HEAD>
367<TITLE>$title</TITLE>
368<LINK REV="made" HREF="mailto:$Config{perladmin}">
369</HEAD>
54310121 370
7b8d334a 371<BODY>
54310121
PP
372
373END_OF_HEAD
374
3e3baf6d
TB
375 # load/reload/validate/cache %pages and %items
376 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121
PP
377
378 # scan the pod for =item directives
379 scan_items("", \%items, @poddata);
380
381 # put an index at the top of the file. note, if $doindex is 0 we
382 # still generate an index, but surround it with an html comment.
383 # that way some other program can extract it if desired.
384 $index =~ s/--+/-/g;
385 print HTML "<!-- INDEX BEGIN -->\n";
386 print HTML "<!--\n" unless $doindex;
387 print HTML $index;
388 print HTML "-->\n" unless $doindex;
389 print HTML "<!-- INDEX END -->\n\n";
390 print HTML "<HR>\n" if $doindex;
391
392 # now convert this file
393 warn "Converting input file\n" if $verbose;
394 foreach my $i (0..$#poddata) {
395 $_ = $poddata[$i];
396 $paragraph = $i+1;
397 if (/^(=.*)/s) { # is it a pod directive?
398 $ignore = 0;
399 $_ = $1;
400 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
401 process_begin($1, $2);
402 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
403 process_end($1, $2);
404 } elsif (/^=cut/) { # =cut
405 process_cut();
406 } elsif (/^=pod/) { # =pod
407 process_pod();
408 } else {
409 next if @begin_stack && $begin_stack[-1] ne 'html';
410
7b8d334a 411 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
54310121 412 process_head($1, $2);
7b8d334a 413 } elsif (/^=item\s*(.*\S)/sm) { # =item text
54310121
PP
414 process_item($1);
415 } elsif (/^=over\s*(.*)/) { # =over N
416 process_over();
417 } elsif (/^=back/) { # =back
418 process_back();
419 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
420 process_for($1,$2);
421 } else {
422 /^=(\S*)\s*/;
423 warn "$0: $podfile: unknown pod directive '$1' in "
424 . "paragraph $paragraph. ignoring.\n";
425 }
426 }
427 $top = 0;
428 }
429 else {
430 next if $ignore;
431 next if @begin_stack && $begin_stack[-1] ne 'html';
432 my $text = $_;
433 process_text(\$text, 1);
7b8d334a 434 print HTML "<P>\n$text";
54310121
PP
435 }
436 }
437
438 # finish off any pending directives
439 finish_list();
440 print HTML <<END_OF_TAIL;
7b8d334a 441</BODY>
54310121 442
7b8d334a 443</HTML>
54310121
PP
444END_OF_TAIL
445
446 # close the html file
447 close(HTML);
448
449 warn "Finished\n" if $verbose;
450}
451
452##############################################################################
453
454my $usage; # see below
455sub usage {
456 my $podfile = shift;
457 warn "$0: $podfile: @_\n" if @_;
458 die $usage;
459}
460
461$usage =<<END_OF_USAGE;
462Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
463 --podpath=<name>:...:<name> --podroot=<name>
464 --libpods=<name>:...:<name> --recurse --verbose --index
465 --netscape --norecurse --noindex
466
467 --flush - flushes the item and directory caches.
468 --help - prints this message.
469 --htmlroot - http-server base directory from which all relative paths
470 in podpath stem (default is /).
471 --index - generate an index at the top of the resulting html
472 (default).
473 --infile - filename for the pod to convert (input taken from stdin
474 by default).
475 --libpods - colon-separated list of pages to search for =item pod
476 directives in as targets of C<> and implicit links (empty
477 by default). note, these are not filenames, but rather
478 page names like those that appear in L<> links.
479 --netscape - will use netscape html directives when applicable.
480 --nonetscape - will not use netscape directives (default).
481 --outfile - filename for the resulting html file (output sent to
482 stdout by default).
483 --podpath - colon-separated list of directories containing library
484 pods. empty by default.
485 --podroot - filesystem base directory from which all relative paths
486 in podpath stem (default is .).
487 --noindex - don't generate an index at the top of the resulting html.
488 --norecurse - don't recurse on those subdirectories listed in podpath.
489 --recurse - recurse on those subdirectories listed in podpath
490 (default behavior).
491 --title - title that will appear in resulting html file.
492 --verbose - self-explanatory
493
494END_OF_USAGE
495
496sub parse_command_line {
29f227c9 497 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
54310121 498 my $result = GetOptions(
29f227c9
BS
499 'flush' => \$opt_flush,
500 'help' => \$opt_help,
501 'htmldir=s' => \$opt_htmldir,
54310121 502 'htmlroot=s' => \$opt_htmlroot,
29f227c9 503 'index!' => \$opt_index,
54310121
PP
504 'infile=s' => \$opt_infile,
505 'libpods=s' => \$opt_libpods,
506 'netscape!' => \$opt_netscape,
507 'outfile=s' => \$opt_outfile,
508 'podpath=s' => \$opt_podpath,
509 'podroot=s' => \$opt_podroot,
510 'norecurse' => \$opt_norecurse,
511 'recurse!' => \$opt_recurse,
512 'title=s' => \$opt_title,
513 'verbose' => \$opt_verbose,
514 );
515 usage("-", "invalid parameters") if not $result;
516
517 usage("-") if defined $opt_help; # see if the user asked for help
518 $opt_help = ""; # just to make -w shut-up.
519
520 $podfile = $opt_infile if defined $opt_infile;
521 $htmlfile = $opt_outfile if defined $opt_outfile;
5a039dd3 522 $htmldir = $opt_htmldir if defined $opt_outfile;
54310121
PP
523
524 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
525 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
526
527 warn "Flushing item and directory caches\n"
528 if $opt_verbose && defined $opt_flush;
529 unlink($dircache, $itemcache) if defined $opt_flush;
530
531 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
532 $podroot = $opt_podroot if defined $opt_podroot;
533
534 $doindex = $opt_index if defined $opt_index;
535 $recurse = $opt_recurse if defined $opt_recurse;
536 $title = $opt_title if defined $opt_title;
537 $verbose = defined $opt_verbose ? 1 : 0;
538 $netscape = $opt_netscape if defined $opt_netscape;
539}
540
3e3baf6d
TB
541
542my $saved_cache_key;
543
544sub get_cache {
545 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
546 my @cache_key_args = @_;
547
548 # A first-level cache:
549 # Don't bother reading the cache files if they still apply
550 # and haven't changed since we last read them.
551
552 my $this_cache_key = cache_key(@cache_key_args);
553
554 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
555
556 # load the cache of %pages and %items if possible. $tests will be
557 # non-zero if successful.
558 my $tests = 0;
559 if (-f $dircache && -f $itemcache) {
560 warn "scanning for item cache\n" if $verbose;
561 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
562 }
563
564 # if we didn't succeed in loading the cache then we must (re)build
565 # %pages and %items.
566 if (!$tests) {
567 warn "scanning directories in pod-path\n" if $verbose;
568 scan_podpath($podroot, $recurse, 0);
569 }
570 $saved_cache_key = cache_key(@cache_key_args);
571}
572
573sub cache_key {
574 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
575 return join('!', $dircache, $itemcache, $recurse,
29f227c9 576 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d
TB
577}
578
54310121 579#
3e3baf6d 580# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121
PP
581# are valid caches of %pages and %items. if they are valid then it loads
582# them and returns a non-zero value.
583#
3e3baf6d
TB
584
585sub load_cache {
54310121
PP
586 my($dircache, $itemcache, $podpath, $podroot) = @_;
587 my($tests);
588 local $_;
589
590 $tests = 0;
591
592 open(CACHE, "<$itemcache") ||
593 die "$0: error opening $itemcache for reading: $!\n";
594 $/ = "\n";
595
596 # is it the same podpath?
597 $_ = <CACHE>;
598 chomp($_);
3e3baf6d 599 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
600
601 # is it the same podroot?
602 $_ = <CACHE>;
603 chomp($_);
604 $tests++ if ($podroot eq $_);
605
606 # load the cache if its good
607 if ($tests != 2) {
608 close(CACHE);
54310121
PP
609 return 0;
610 }
611
612 warn "loading item cache\n" if $verbose;
613 while (<CACHE>) {
614 /(.*?) (.*)$/;
615 $items{$1} = $2;
616 }
617 close(CACHE);
618
619 warn "scanning for directory cache\n" if $verbose;
620 open(CACHE, "<$dircache") ||
621 die "$0: error opening $dircache for reading: $!\n";
622 $/ = "\n";
623 $tests = 0;
624
625 # is it the same podpath?
626 $_ = <CACHE>;
627 chomp($_);
3e3baf6d 628 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
629
630 # is it the same podroot?
631 $_ = <CACHE>;
632 chomp($_);
633 $tests++ if ($podroot eq $_);
634
635 # load the cache if its good
636 if ($tests != 2) {
637 close(CACHE);
54310121
PP
638 return 0;
639 }
640
641 warn "loading directory cache\n" if $verbose;
642 while (<CACHE>) {
643 /(.*?) (.*)$/;
644 $pages{$1} = $2;
645 }
646
647 close(CACHE);
648
649 return 1;
650}
651
652#
653# scan_podpath - scans the directories specified in @podpath for directories,
654# .pod files, and .pm files. it also scans the pod files specified in
655# @libpods for =item directives.
656#
657sub scan_podpath {
3e3baf6d 658 my($podroot, $recurse, $append) = @_;
54310121
PP
659 my($pwd, $dir);
660 my($libpod, $dirname, $pod, @files, @poddata);
661
3e3baf6d
TB
662 unless($append) {
663 %items = ();
664 %pages = ();
665 }
666
54310121
PP
667 # scan each directory listed in @podpath
668 $pwd = getcwd();
669 chdir($podroot)
670 || die "$0: error changing to directory $podroot: $!\n";
671 foreach $dir (@podpath) {
672 scan_dir($dir, $recurse);
673 }
674
675 # scan the pods listed in @libpods for =item directives
676 foreach $libpod (@libpods) {
677 # if the page isn't defined then we won't know where to find it
678 # on the system.
679 next unless defined $pages{$libpod} && $pages{$libpod};
680
681 # if there is a directory then use the .pod and .pm files within it.
29f227c9
BS
682 # NOTE: Only finds the first so-named directory in the tree.
683# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
684 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121
PP
685 # find all the .pod and .pm files within the directory
686 $dirname = $1;
687 opendir(DIR, $dirname) ||
688 die "$0: error opening directory $dirname: $!\n";
689 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
690 closedir(DIR);
691
692 # scan each .pod and .pm file for =item directives
693 foreach $pod (@files) {
694 open(POD, "<$dirname/$pod") ||
695 die "$0: error opening $dirname/$pod for input: $!\n";
696 @poddata = <POD>;
697 close(POD);
698
699 scan_items("$dirname/$pod", @poddata);
700 }
701
702 # use the names of files as =item directives too.
703 foreach $pod (@files) {
704 $pod =~ /^(.*)(\.pod|\.pm)$/;
705 $items{$1} = "$dirname/$1.html" if $1;
706 }
707 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
708 $pages{$libpod} =~ /([^:]*\.pm):/) {
709 # scan the .pod or .pm file for =item directives
710 $pod = $1;
711 open(POD, "<$pod") ||
712 die "$0: error opening $pod for input: $!\n";
713 @poddata = <POD>;
714 close(POD);
715
716 scan_items("$pod", @poddata);
717 } else {
718 warn "$0: shouldn't be here (line ".__LINE__."\n";
719 }
720 }
721 @poddata = (); # clean-up a bit
722
723 chdir($pwd)
724 || die "$0: error changing to directory $pwd: $!\n";
725
726 # cache the item list for later use
727 warn "caching items for later use\n" if $verbose;
728 open(CACHE, ">$itemcache") ||
729 die "$0: error open $itemcache for writing: $!\n";
730
731 print CACHE join(":", @podpath) . "\n$podroot\n";
732 foreach my $key (keys %items) {
733 print CACHE "$key $items{$key}\n";
734 }
735
736 close(CACHE);
737
738 # cache the directory list for later use
739 warn "caching directories for later use\n" if $verbose;
740 open(CACHE, ">$dircache") ||
741 die "$0: error open $dircache for writing: $!\n";
742
743 print CACHE join(":", @podpath) . "\n$podroot\n";
744 foreach my $key (keys %pages) {
745 print CACHE "$key $pages{$key}\n";
746 }
747
748 close(CACHE);
749}
750
751#
752# scan_dir - scans the directory specified in $dir for subdirectories, .pod
753# files, and .pm files. notes those that it finds. this information will
754# be used later in order to figure out where the pages specified in L<>
755# links are on the filesystem.
756#
757sub scan_dir {
758 my($dir, $recurse) = @_;
759 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
760 local $_;
761
762 @subdirs = ();
763 @pods = ();
764
765 opendir(DIR, $dir) ||
766 die "$0: error opening directory $dir: $!\n";
767 while (defined($_ = readdir(DIR))) {
768 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
769 $pages{$_} = "" unless defined $pages{$_};
770 $pages{$_} .= "$dir/$_:";
771 push(@subdirs, $_);
772 } elsif (/\.pod$/) { # .pod
773 s/\.pod$//;
774 $pages{$_} = "" unless defined $pages{$_};
775 $pages{$_} .= "$dir/$_.pod:";
776 push(@pods, "$dir/$_.pod");
777 } elsif (/\.pm$/) { # .pm
778 s/\.pm$//;
779 $pages{$_} = "" unless defined $pages{$_};
780 $pages{$_} .= "$dir/$_.pm:";
781 push(@pods, "$dir/$_.pm");
782 }
783 }
784 closedir(DIR);
785
786 # recurse on the subdirectories if necessary
787 if ($recurse) {
788 foreach my $subdir (@subdirs) {
789 scan_dir("$dir/$subdir", $recurse);
790 }
791 }
792}
793
794#
795# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
796# build an index.
797#
798sub scan_headings {
799 my($sections, @data) = @_;
800 my($tag, $which_head, $title, $listdepth, $index);
801
be173d55
UC
802 # here we need local $ignore = 0;
803 # unfortunately, we can't have it, because $ignore is lexical
804 $ignore = 0;
805
54310121
PP
806 $listdepth = 0;
807 $index = "";
808
809 # scan for =head directives, note their name, and build an index
810 # pointing to each of them.
811 foreach my $line (@data) {
bb9460ed 812 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
54310121
PP
813 ($tag,$which_head, $title) = ($1,$2,$3);
814 chomp($title);
815 $$sections{htmlify(0,$title)} = 1;
816
102c538a
DS
817 while ($which_head != $listdepth) {
818 if ($which_head > $listdepth) {
819 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
820 $listdepth++;
821 } elsif ($which_head < $listdepth) {
822 $listdepth--;
823 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
824 }
54310121 825 }
54310121
PP
826
827 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
be173d55 828 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
7b8d334a 829 html_escape(process_text(\$title, 0)) . "</A>";
54310121
PP
830 }
831 }
832
833 # finish off the lists
834 while ($listdepth--) {
835 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
836 }
837
838 # get rid of bogus lists
839 $index =~ s,\t*<UL>\s*</UL>\n,,g;
840
bb9460ed 841 $ignore = 1; # restore old value;
be173d55 842
54310121
PP
843 return $index;
844}
845
846#
847# scan_items - scans the pod specified by $pod for =item directives. we
848# will use this information later on in resolving C<> links.
849#
850sub scan_items {
851 my($pod, @poddata) = @_;
852 my($i, $item);
853 local $_;
854
855 $pod =~ s/\.pod$//;
856 $pod .= ".html" if $pod;
857
858 foreach $i (0..$#poddata) {
859 $_ = $poddata[$i];
860
861 # remove any formatting instructions
862 s,[A-Z]<([^<>]*)>,$1,g;
863
864 # figure out what kind of item it is and get the first word of
865 # it's name.
866 if (/^=item\s+(\w*)\s*.*$/s) {
867 if ($1 eq "*") { # bullet list
868 /\A=item\s+\*\s*(.*?)\s*\Z/s;
869 $item = $1;
7b8d334a
GS
870 } elsif ($1 =~ /^\d+/) { # numbered list
871 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
54310121
PP
872 $item = $1;
873 } else {
874# /\A=item\s+(.*?)\s*\Z/s;
875 /\A=item\s+(\w*)/s;
876 $item = $1;
877 }
878
879 $items{$item} = "$pod" if $item;
880 }
881 }
882}
883
884#
885# process_head - convert a pod head[1-6] tag and convert it to HTML format.
886#
887sub process_head {
888 my($tag, $heading) = @_;
889 my $firstword;
890
891 # figure out the level of the =head
892 $tag =~ /head([1-6])/;
893 my $level = $1;
894
895 # can't have a heading full of spaces and speechmarks and so on
896 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
897
898 print HTML "<P>\n" unless $listlevel;
899 print HTML "<HR>\n" unless $listlevel || $top;
900 print HTML "<H$level>"; # unless $listlevel;
901 #print HTML "<H$level>" unless $listlevel;
be173d55 902 my $convert = $heading; process_text(\$convert, 0);
7b8d334a 903 $convert = html_escape($convert);
54310121
PP
904 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
905 print HTML "</H$level>"; # unless $listlevel;
906 print HTML "\n";
907}
908
909#
910# process_item - convert a pod item tag and convert it to HTML format.
911#
912sub process_item {
913 my $text = $_[0];
914 my($i, $quote, $name);
915
916 my $need_preamble = 0;
917 my $this_entry;
918
919
920 # lots of documents start a list without doing an =over. this is
921 # bad! but, the proper thing to do seems to be to just assume
922 # they did do an =over. so warn them once and then continue.
923 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
924 unless $listlevel;
925 process_over() unless $listlevel;
926
927 return unless $listlevel;
928
929 # remove formatting instructions from the text
930 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
931 pre_escape(\$text);
932
933 $need_preamble = $items_seen[$listlevel]++ == 0;
934
935 # check if this is the first =item after an =over
936 $i = $listlevel - 1;
937 my $need_new = $listlevel >= @listitem;
938
939 if ($text =~ /\A\*/) { # bullet
940
941 if ($need_preamble) {
942 push(@listend, "</UL>");
943 print HTML "<UL>\n";
944 }
945
7b8d334a
GS
946 print HTML '<LI>';
947 if ($text =~ /\A\*\s*(.+)\Z/s) {
948 print HTML '<STRONG>';
949 if ($items_named{$1}++) {
950 print HTML html_escape($1);
951 } else {
952 my $name = 'item_' . htmlify(1,$1);
953 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
954 }
955 print HTML '</STRONG>';
956 }
54310121 957
7b8d334a 958 } elsif ($text =~ /\A[\d#]+/) { # numbered list
54310121
PP
959
960 if ($need_preamble) {
961 push(@listend, "</OL>");
962 print HTML "<OL>\n";
963 }
964
7b8d334a
GS
965 print HTML '<LI>';
966 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
967 print HTML '<STRONG>';
968 if ($items_named{$1}++) {
969 print HTML html_escape($1);
970 } else {
971 my $name = 'item_' . htmlify(0,$1);
972 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
973 }
974 print HTML '</STRONG>';
975 }
54310121
PP
976
977 } else { # all others
978
979 if ($need_preamble) {
980 push(@listend, '</DL>');
981 print HTML "<DL>\n";
982 }
983
7b8d334a
GS
984 print HTML '<DT>';
985 if ($text =~ /(\S+)/) {
986 print HTML '<STRONG>';
987 if ($items_named{$1}++) {
988 print HTML html_escape($text);
989 } else {
990 my $name = 'item_' . htmlify(1,$text);
991 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
992 }
993 print HTML '</STRONG>';
994 }
54310121
PP
995 print HTML '<DD>';
996 }
997
998 print HTML "\n";
999}
1000
1001#
1002# process_over - process a pod over tag and start a corresponding HTML
1003# list.
1004#
1005sub process_over {
1006 # start a new list
1007 $listlevel++;
1008}
1009
1010#
1011# process_back - process a pod back tag and convert it to HTML format.
1012#
1013sub process_back {
2ceaccd7 1014 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
54310121
PP
1015 unless $listlevel;
1016 return unless $listlevel;
1017
1018 # close off the list. note, I check to see if $listend[$listlevel] is
1019 # defined because an =item directive may have never appeared and thus
1020 # $listend[$listlevel] may have never been initialized.
1021 $listlevel--;
1022 print HTML $listend[$listlevel] if defined $listend[$listlevel];
1023 print HTML "\n";
1024
1025 # don't need the corresponding perl code anymore
1026 pop(@listitem);
1027 pop(@listdata);
1028 pop(@listend);
1029
1030 pop(@items_seen);
1031}
1032
1033#
1034# process_cut - process a pod cut tag, thus stop ignoring pod directives.
1035#
1036sub process_cut {
1037 $ignore = 1;
1038}
1039
1040#
1041# process_pod - process a pod pod tag, thus ignore pod directives until we see a
1042# corresponding cut.
1043#
1044sub process_pod {
1045 # no need to set $ignore to 0 cause the main loop did it
1046}
1047
1048#
1049# process_for - process a =for pod tag. if it's for html, split
c4d9b39d 1050# it out verbatim, if illustration, center it, otherwise ignore it.
54310121
PP
1051#
1052sub process_for {
1053 my($whom, $text) = @_;
1054 if ( $whom =~ /^(pod2)?html$/i) {
1055 print HTML $text;
c4d9b39d
IZ
1056 } elsif ($whom =~ /^illustration$/i) {
1057 1 while chomp $text;
1058 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1059 $text .= $ext, last if -r "$text$ext";
1060 }
1061 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1062 }
54310121
PP
1063}
1064
1065#
1066# process_begin - process a =begin pod tag. this pushes
1067# whom we're beginning on the begin stack. if there's a
1068# begin stack, we only print if it us.
1069#
1070sub process_begin {
1071 my($whom, $text) = @_;
1072 $whom = lc($whom);
1073 push (@begin_stack, $whom);
1074 if ( $whom =~ /^(pod2)?html$/) {
1075 print HTML $text if $text;
1076 }
1077}
1078
1079#
1080# process_end - process a =end pod tag. pop the
1081# begin stack. die if we're mismatched.
1082#
1083sub process_end {
1084 my($whom, $text) = @_;
1085 $whom = lc($whom);
1086 if ($begin_stack[-1] ne $whom ) {
1087 die "Unmatched begin/end at chunk $paragraph\n"
1088 }
1089 pop @begin_stack;
1090}
1091
1092#
1093# process_text - handles plaintext that appears in the input pod file.
1094# there may be pod commands embedded within the text so those must be
1095# converted to html commands.
1096#
1097sub process_text {
1098 my($text, $escapeQuotes) = @_;
1099 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1100 my($podcommand, $params, $tag, $quote);
1101
1102 return if $ignore;
1103
1104 $quote = 0; # status of double-quote conversion
1105 $result = "";
1106 $rest = $$text;
1107
1108 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
be173d55
UC
1109 $rest =~ s/\n+\Z//;
1110 $rest =~ s#.*#
1111 my $line = $&;
1112 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1113 $line;
1114 #eg;
54310121
PP
1115
1116 $rest =~ s/&/&amp;/g;
1117 $rest =~ s/</&lt;/g;
1118 $rest =~ s/>/&gt;/g;
1119 $rest =~ s/"/&quot;/g;
1120
1121 # try and create links for all occurrences of perl.* within
1122 # the preformatted text.
1123 $rest =~ s{
1124 (\s*)(perl\w+)
1125 }{
1126 if (defined $pages{$2}) { # is a link
1127 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
39e571d4
ML
1128 } elsif (defined $pages{dosify($2)}) { # is a link
1129 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
54310121
PP
1130 } else {
1131 "$1$2";
1132 }
1133 }xeg;
5a039dd3
BS
1134# $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1135 $rest =~ s{
29f227c9
BS
1136 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1137 }{
1138 my $url ;
1139 if ( $htmlfileurl ne '' ) {
1140 # Here, we take advantage of the knowledge
1141 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1142 # Since $htmlroot eq '', we need to prepend $htmldir
1143 # on the fron of the link to get the absolute path
1144 # of the link's target. We check for a leading '/'
1145 # to avoid corrupting links that are #, file:, etc.
1146 my $old_url = $3 ;
1147 $old_url = "$htmldir$old_url"
1148 if ( $old_url =~ m{^\/} ) ;
1149 $url = relativize_url( "$old_url.html", $htmlfileurl );
1150# print( " a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
1151 }
1152 else {
1153 $url = "$3.html" ;
1154 }
5a039dd3
BS
1155 "$1$url" ;
1156 }xeg;
54310121 1157
5a039dd3
BS
1158 # Look for embedded URLs and make them in to links. We don't
1159 # relativize them since they are best left as the author intended.
54310121
PP
1160 my $urls = '(' . join ('|', qw{
1161 http
1162 telnet
1163 mailto
1164 news
1165 gopher
1166 file
1167 wais
1168 ftp
1169 } )
1170 . ')';
1171
1172 my $ltrs = '\w';
1173 my $gunk = '/#~:.?+=&%@!\-';
1174 my $punc = '.:?\-';
1175 my $any = "${ltrs}${gunk}${punc}";
1176
1177 $rest =~ s{
1178 \b # start at word boundary
1179 ( # begin $1 {
29f227c9
BS
1180 $urls : # need resource and a colon
1181 (?!:) # Ignore File::, among others.
54310121
PP
1182 [$any] +? # followed by on or more
1183 # of any valid character, but
1184 # be conservative and take only
1185 # what you need to....
1186 ) # end $1 }
1187 (?= # look-ahead non-consumptive assertion
1188 [$punc]* # either 0 or more puntuation
1189 [^$any] # followed by a non-url char
1190 | # or else
1191 $ # then end of the string
1192 )
1193 }{<A HREF="$1">$1</A>}igox;
1194
1195 $result = "<PRE>" # text should be as it is (verbatim)
1196 . "$rest\n"
1197 . "</PRE>\n";
1198 } else { # formatted text
1199 # parse through the string, stopping each time we find a
1200 # pod-escape. once the string has been throughly processed
1201 # we can output it.
ec978fad 1202 while (length $rest) {
54310121
PP
1203 # check to see if there are any possible pod directives in
1204 # the remaining part of the text.
1205 if ($rest =~ m/[BCEIFLSZ]</) {
1206 warn "\$rest\t= $rest\n" unless
1207 $rest =~ /\A
1208 ([^<]*?)
1209 ([BCEIFLSZ]?)
1210 <
1211 (.*)\Z/xs;
1212
1213 $s1 = $1; # pure text
1214 $s2 = $2; # the type of pod-escape that follows
1215 $s3 = '<'; # '<'
1216 $s4 = $3; # the rest of the string
1217 } else {
1218 $s1 = $rest;
1219 $s2 = "";
1220 $s3 = "";
1221 $s4 = "";
1222 }
1223
1224 if ($s3 eq '<' && $s2) { # a pod-escape
1225 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1226 $podcommand = "$s2<";
1227 $rest = $s4;
1228
1229 # find the matching '>'
1230 $match = 1;
1231 $bf = 0;
1232 while ($match && !$bf) {
1233 $bf = 1;
1234 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1235 $bf = 0;
1236 $match++;
1237 $podcommand .= $1;
1238 $rest = $2;
1239 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1240 $bf = 0;
1241 $match--;
1242 $podcommand .= $1;
1243 $rest = $2;
1244 }
1245 }
1246
1247 if ($match != 0) {
1248 warn <<WARN;
1249$0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1250WARN
1251 $result .= substr $podcommand, 0, 2;
1252 $rest = substr($podcommand, 2) . $rest;
1253 next;
1254 }
1255
1256 # pull out the parameters to the pod-escape
1257 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1258 $tag = $1;
1259 $params = $2;
1260
1261 # process the text within the pod-escape so that any escapes
1262 # which must occur do.
1263 process_text(\$params, 0) unless $tag eq 'L';
1264
1265 $s1 = $params;
1266 if (!$tag || $tag eq " ") { # <> : no tag
1267 $s1 = "&lt;$params&gt;";
1268 } elsif ($tag eq "L") { # L<> : link
1269 $s1 = process_L($params);
1270 } elsif ($tag eq "I" || # I<> : italicize text
1271 $tag eq "B" || # B<> : bold text
1272 $tag eq "F") { # F<> : file specification
1273 $s1 = process_BFI($tag, $params);
1274 } elsif ($tag eq "C") { # C<> : literal code
1275 $s1 = process_C($params, 1);
1276 } elsif ($tag eq "E") { # E<> : escape
1277 $s1 = process_E($params);
1278 } elsif ($tag eq "Z") { # Z<> : zero-width character
1279 $s1 = process_Z($params);
1280 } elsif ($tag eq "S") { # S<> : non-breaking space
1281 $s1 = process_S($params);
1282 } elsif ($tag eq "X") { # S<> : non-breaking space
1283 $s1 = process_X($params);
1284 } else {
1285 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1286 }
1287
1288 $result .= "$s1";
1289 } else {
1290 # for pure text we must deal with implicit links and
1291 # double-quotes among other things.
1292 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1293 $rest = $s4;
1294 }
1295 }
1296 }
1297 $$text = $result;
1298}
1299
1300sub html_escape {
1301 my $rest = $_[0];
1302 $rest =~ s/&/&amp;/g;
1303 $rest =~ s/</&lt;/g;
1304 $rest =~ s/>/&gt;/g;
1305 $rest =~ s/"/&quot;/g;
1306 return $rest;
1307}
1308
1309#
1310# process_puretext - process pure text (without pod-escapes) converting
1311# double-quotes and handling implicit C<> links.
1312#
1313sub process_puretext {
1314 my($text, $quote) = @_;
1315 my(@words, $result, $rest, $lead, $trail);
1316
1317 # convert double-quotes to single-quotes
1318 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1319 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1320
1321 $$quote = ($text =~ m/"/ ? 1 : 0);
1322 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1323
1324 # keep track of leading and trailing white-space
1325 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1326 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1327
1328 # collapse all white space into a single space
1329 $text =~ s/\s+/ /g;
1330 @words = split(" ", $text);
1331
1332 # process each word individually
1333 foreach my $word (@words) {
1334 # see if we can infer a link
1335 if ($word =~ /^\w+\(/) {
1336 # has parenthesis so should have been a C<> ref
1337 $word = process_C($word);
1338# $word =~ /^[^()]*]\(/;
1339# if (defined $items{$1} && $items{$1}) {
1340# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1341# . htmlify(0,$word)
1342# . "\">$word</A></CODE>";
1343# } elsif (defined $items{$word} && $items{$word}) {
1344# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1345# . htmlify(0,$word)
1346# . "\">$word</A></CODE>";
1347# } else {
1348# $word = "\n<CODE><A HREF=\"#item_"
1349# . htmlify(0,$word)
1350# . "\">$word</A></CODE>";
1351# }
1352 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1353 # perl variables, should be a C<> ref
1354 $word = process_C($word, 1);
1355 } elsif ($word =~ m,^\w+://\w,) {
1356 # looks like a URL
5a039dd3 1357 # Don't relativize it: leave it as the author intended
54310121 1358 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1359 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1360 # looks like an e-mail address
7b8d334a
GS
1361 my ($w1, $w2, $w3) = ("", $word, "");
1362 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1363 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1364 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1365 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1366 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1367 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1368 } else {
7b8d334a 1369 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1370 }
1371 }
1372
1373 # build a new string based upon our conversion
1374 $result = "";
1375 $rest = join(" ", @words);
1376 while (length($rest) > 75) {
1377 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1378 $rest =~ m/^(\S*)\s(.*?)$/o) {
1379
1380 $result .= "$1\n";
1381 $rest = $2;
1382 } else {
1383 $result .= "$rest\n";
1384 $rest = "";
1385 }
1386 }
1387 $result .= $rest if $rest;
1388
1389 # restore the leading and trailing white-space
1390 $result = "$lead$result$trail";
1391
1392 return $result;
1393}
1394
1395#
1396# pre_escape - convert & in text to $amp;
1397#
1398sub pre_escape {
1399 my($str) = @_;
1400
1401 $$str =~ s,&,&amp;,g;
1402}
1403
1404#
39e571d4
ML
1405# dosify - convert filenames to 8.3
1406#
1407sub dosify {
1408 my($str) = @_;
1409 if ($Is83) {
1410 $str = lc $str;
1411 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1412 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1413 }
1414 return $str;
1415}
1416
1417#
54310121
PP
1418# process_L - convert a pod L<> directive to a corresponding HTML link.
1419# most of the links made are inferred rather than known about directly
1420# (i.e it's not known whether the =head\d section exists in the target file,
1421# or whether a .pod file exists in the case of split files). however, the
1422# guessing usually works.
1423#
1424# Unlike the other directives, this should be called with an unprocessed
1425# string, else tags in the link won't be matched.
1426#
1427sub process_L {
1428 my($str) = @_;
39e571d4 1429 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
54310121
PP
1430
1431 $str =~ s/\n/ /g; # undo word-wrapped tags
1432 $s1 = $str;
1433 for ($s1) {
b74bceb9
AB
1434 # LREF: a la HREF L<show this text|man/section>
1435 $linktext = $1 if s:^([^|]+)\|::;
1436
54310121
PP
1437 # make sure sections start with a /
1438 s,^",/",g;
1439 s,^,/,g if (!m,/, && / /);
1440
1441 # check if there's a section specified
1442 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1443 ($page, $section) = ($1, $2);
1444 } else { # no
1445 ($page, $section) = ($str, "");
1446 }
1447
1448 # check if we know that this is a section in this page
1449 if (!defined $pages{$page} && defined $sections{$page}) {
1450 $section = $page;
1451 $page = "";
1452 }
29f227c9
BS
1453
1454 # remove trailing punctuation, like ()
1455 $section =~ s/\W*$// ;
54310121
PP
1456 }
1457
39e571d4
ML
1458 $page83=dosify($page);
1459 $page=$page83 if (defined $pages{$page83});
54310121
PP
1460 if ($page eq "") {
1461 $link = "#" . htmlify(0,$section);
b74bceb9 1462 $linktext = $section unless defined($linktext);
350ccacd
GB
1463 } elsif ( $page =~ /::/ ) {
1464 $linktext = ($section ? "$section" : "$page");
1465 $page =~ s,::,/,g;
29f227c9
BS
1466 # Search page cache for an entry keyed under the html page name,
1467 # then look to see what directory that page might be in. NOTE:
1468 # this will only find one page. A better solution might be to produce
1469 # an intermediate page that is an index to all such pages.
1470 my $page_name = $page ;
1471 $page_name =~ s,^.*/,, ;
1472 if ( defined( $pages{ $page_name } ) &&
1473 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1474 ) {
1475 $page = $1 ;
1476 }
1477 else {
1478 # NOTE: This branch assumes that all A::B pages are located in
1479 # $htmlroot/A/B.html . This is often incorrect, since they are
1480 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1481 # analyze the contents of %pages and figure out where any
1482 # cousins of A::B are, then assume that. So, if A::B isn't found,
1483 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1484 # lib/A/B.pm. This is also limited, but it's an improvement.
1485 # Maybe a hints file so that the links point to the correct places
1486 # non-theless?
1487 # Also, maybe put a warn "$0: cannot resolve..." here.
1488 }
350ccacd
GB
1489 $link = "$htmlroot/$page.html";
1490 $link .= "#" . htmlify(0,$section) if ($section);
54310121
PP
1491 } elsif (!defined $pages{$page}) {
1492 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1493 $link = "";
b74bceb9 1494 $linktext = $page unless defined($linktext);
54310121 1495 } else {
b74bceb9 1496 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
54310121
PP
1497 $section = htmlify(0,$section) if $section ne "";
1498
1499 # if there is a directory by the name of the page, then assume that an
1500 # appropriate section will exist in the subdirectory
29f227c9
BS
1501# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1502 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121
PP
1503 $link = "$htmlroot/$1/$section.html";
1504
1505 # since there is no directory by the name of the page, the section will
1506 # have to exist within a .html of the same name. thus, make sure there
1507 # is a .pod or .pm that might become that .html
1508 } else {
1509 $section = "#$section";
1510 # check if there is a .pod with the page name
1511 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1512 $link = "$htmlroot/$1.html$section";
1513 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1514 $link = "$htmlroot/$1.html$section";
1515 } else {
1516 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1517 "no .pod or .pm found\n";
1518 $link = "";
b74bceb9 1519 $linktext = $section unless defined($linktext);
54310121
PP
1520 }
1521 }
1522 }
1523
1524 process_text(\$linktext, 0);
1525 if ($link) {
29f227c9
BS
1526 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1527 # implies $htmlroot eq ''. This means that the link in question
1528 # needs a prefix of $htmldir if it begins with '/'. The test for
1529 # the initial '/' is done to avoid '#'-only links, and to allow
1530 # for other kinds of links, like file:, ftp:, etc.
1531 my $url ;
1532 if ( $htmlfileurl ne '' ) {
1533 $link = "$htmldir$link"
1534 if ( $link =~ m{^/} ) ;
1535
1536 $url = relativize_url( $link, $htmlfileurl ) ;
1537# print( " b: [$link,$htmlfileurl,$url]\n" ) ;
1538 }
1539 else {
1540 $url = $link ;
1541 }
1542
5a039dd3 1543 $s1 = "<A HREF=\"$url\">$linktext</A>";
54310121
PP
1544 } else {
1545 $s1 = "<EM>$linktext</EM>";
1546 }
1547 return $s1;
1548}
1549
1550#
29f227c9
BS
1551# relativize_url - convert an absolute URL to one relative to a base URL.
1552# Assumes both end in a filename.
1553#
1554sub relativize_url {
1555 my ($dest,$source) = @_ ;
1556
1557 my ($dest_volume,$dest_directory,$dest_file) =
1558 File::Spec::Unix->splitpath( $dest ) ;
1559 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1560
1561 my ($source_volume,$source_directory,$source_file) =
1562 File::Spec::Unix->splitpath( $source ) ;
1563 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1564
1565 my $rel_path = '' ;
1566 if ( $dest ne '' ) {
1567 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1568 }
1569
1570 if ( $rel_path ne '' &&
1571 substr( $rel_path, -1 ) ne '/' &&
1572 substr( $dest_file, 0, 1 ) ne '#'
1573 ) {
1574 $rel_path .= "/$dest_file" ;
1575 }
1576 else {
1577 $rel_path .= "$dest_file" ;
1578 }
1579
1580 return $rel_path ;
1581}
1582
1583#
54310121
PP
1584# process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1585# convert them to corresponding HTML directives.
1586#
1587sub process_BFI {
1588 my($tag, $str) = @_;
1589 my($s1); # work string
1590 my(%repltext) = ( 'B' => 'STRONG',
1591 'F' => 'EM',
1592 'I' => 'EM');
1593
1594 # extract the modified text and convert to HTML
1595 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1596 return $s1;
1597}
1598
1599#
1600# process_C - process the C<> pod-escape.
1601#
1602sub process_C {
1603 my($str, $doref) = @_;
1604 my($s1, $s2);
1605
1606 $s1 = $str;
1607 $s1 =~ s/\([^()]*\)//g; # delete parentheses
be173d55 1608 $s2 = $s1;
54310121 1609 $s1 =~ s/\W//g; # delete bogus characters
7b8d334a 1610 $str = html_escape($str);
54310121
PP
1611
1612 # if there was a pod file that we found earlier with an appropriate
1613 # =item directive, then create a link to that page.
1614 if ($doref && defined $items{$s1}) {
5a039dd3
BS
1615 if ( $items{$s1} ) {
1616 my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
29f227c9
BS
1617 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1618 # implies $htmlroot eq ''.
1619 my $url ;
1620 if ( $htmlfileurl ne '' ) {
1621 $link = "$htmldir$link" ;
1622 $url = relativize_url( $link, $htmlfileurl ) ;
1623 }
1624 else {
1625 $url = $link ;
1626 }
5a039dd3
BS
1627 $s1 = "<A HREF=\"$url\">$str</A>" ;
1628 }
1629 else {
1630 $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ;
1631 }
54310121
PP
1632 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1633 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1634 } else {
1635 $s1 = "<CODE>$str</CODE>";
1636 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1637 }
1638
1639
1640 return $s1;
1641}
1642
1643#
1644# process_E - process the E<> pod directive which seems to escape a character.
1645#
1646sub process_E {
1647 my($str) = @_;
1648
1649 for ($str) {
1650 s,([^/].*),\&$1\;,g;
1651 }
1652
1653 return $str;
1654}
1655
1656#
1657# process_Z - process the Z<> pod directive which really just amounts to
1658# ignoring it. this allows someone to start a paragraph with an =
1659#
1660sub process_Z {
1661 my($str) = @_;
1662
1663 # there is no equivalent in HTML for this so just ignore it.
1664 $str = "";
1665 return $str;
1666}
1667
1668#
1669# process_S - process the S<> pod directive which means to convert all
1670# spaces in the string to non-breaking spaces (in HTML-eze).
1671#
1672sub process_S {
1673 my($str) = @_;
1674
1675 # convert all spaces in the text to non-breaking spaces in HTML.
1676 $str =~ s/ /&nbsp;/g;
1677 return $str;
1678}
1679
1680#
1681# process_X - this is supposed to make an index entry. we'll just
1682# ignore it.
1683#
1684sub process_X {
1685 return '';
1686}
1687
1688
1689#
29f227c9
BS
1690# Adapted from Nick Ing-Simmons' PodToHtml package.
1691sub relative_url {
1692 my $source_file = shift ;
1693 my $destination_file = shift;
1694
1695 my $source = URI::file->new_abs($source_file);
1696 my $uo = URI::file->new($destination_file,$source)->abs;
1697 return $uo->rel->as_string;
1698}
1699
1700
1701#
54310121
PP
1702# finish_list - finish off any pending HTML lists. this should be called
1703# after the entire pod file has been read and converted.
1704#
1705sub finish_list {
7b8d334a 1706 while ($listlevel > 0) {
54310121
PP
1707 print HTML "</DL>\n";
1708 $listlevel--;
1709 }
1710}
1711
1712#
1713# htmlify - converts a pod section specification to a suitable section
1714# specification for HTML. if first arg is 1, only takes 1st word.
1715#
1716sub htmlify {
1717 my($compact, $heading) = @_;
1718
1719 if ($compact) {
1720 $heading =~ /^(\w+)/;
1721 $heading = $1;
1722 }
1723
1724 # $heading = lc($heading);
1725 $heading =~ s/[^\w\s]/_/g;
1726 $heading =~ s/(\s+)/ /g;
1727 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1728 $heading =~ s/ /_/g;
1729 $heading =~ s/\A(.{32}).*\Z/$1/s;
1730 $heading =~ s/\s+\Z//;
1731 $heading =~ s/_{2,}/_/g;
1732
1733 return $heading;
1734}
1735
1736BEGIN {
1737}
1738
17391;