This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
escape ampersands in <pre> sections
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
29f227c9 8$VERSION = 1.02;
54310121 9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288
FK
15use locale; # make \w work right in non-ASCII lands
16
54310121 17use strict;
18
7b8d334a
GS
19use Config;
20
54310121 21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121 24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121 28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3
BS
48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9
BS
53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121 57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
140=item verbose
141
142 --verbose
143
144Display progress messages.
145
146=back
147
148=head1 EXAMPLE
149
150 pod2html("pod2html",
151 "--podpath=lib:ext:pod:vms",
152 "--podroot=/usr/src/perl",
153 "--htmlroot=/perl/nmanual",
154 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
155 "--recurse",
156 "--infile=foo.pod",
157 "--outfile=/perl/nmanual/foo.html");
158
159=head1 AUTHOR
160
161Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
162
163=head1 BUGS
164
165Has trouble with C<> etc in = commands.
166
167=head1 SEE ALSO
168
169L<perlpod>
170
171=head1 COPYRIGHT
172
173This program is distributed under the Artistic License.
174
175=cut
176
177my $dircache = "pod2html-dircache";
178my $itemcache = "pod2html-itemcache";
179
180my @begin_stack = (); # begin/end stack
181
29f227c9
BS
182my @libpods = (); # files to search for links from C<> directives
183my $htmlroot = "/"; # http-server base directory from which all
54310121 184 # relative paths in $podpath stem.
5a039dd3
BS
185my $htmldir = ""; # The directory to which the html pages
186 # will (eventually) be written.
54310121 187my $htmlfile = ""; # write to stdout by default
29f227c9 188my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3
BS
189 # refer to this file. This is only used
190 # to make relative urls that point to
191 # other files.
54310121 192my $podfile = ""; # read from stdin by default
193my @podpath = (); # list of directories containing library pods.
194my $podroot = "."; # filesystem base directory from which all
195 # relative paths in $podpath stem.
196my $recurse = 1; # recurse on subdirectories in $podpath.
197my $verbose = 0; # not verbose by default
198my $doindex = 1; # non-zero if we should generate an index
199my $listlevel = 0; # current list depth
200my @listitem = (); # stack of HTML commands to use when a =item is
201 # encountered. the top of the stack is the
202 # current list.
203my @listdata = (); # similar to @listitem, but for the text after
204 # an =item
205my @listend = (); # similar to @listitem, but the text to use to
206 # end the list.
207my $ignore = 1; # whether or not to format text. we don't
208 # format text until we hit our first pod
209 # directive.
210
211my %items_named = (); # for the multiples of the same item in perlfunc
212my @items_seen = ();
213my $netscape = 0; # whether or not to use netscape directives.
214my $title; # title to give the pod(s)
215my $top = 1; # true if we are at the top of the doc. used
216 # to prevent the first <HR> directive.
217my $paragraph; # which paragraph we're processing (used
218 # for error messages)
219my %pages = (); # associative array used to find the location
220 # of pages referenced by L<> links.
221my %sections = (); # sections within this page
222my %items = (); # associative array used to find the location
223 # of =item directives referenced by C<> links
39e571d4
LM
224my $Is83; # is dos with short filenames (8.3)
225
54310121 226sub init_globals {
227$dircache = "pod2html-dircache";
228$itemcache = "pod2html-itemcache";
229
230@begin_stack = (); # begin/end stack
231
232@libpods = (); # files to search for links from C<> directives
233$htmlroot = "/"; # http-server base directory from which all
234 # relative paths in $podpath stem.
235$htmlfile = ""; # write to stdout by default
236$podfile = ""; # read from stdin by default
237@podpath = (); # list of directories containing library pods.
238$podroot = "."; # filesystem base directory from which all
239 # relative paths in $podpath stem.
240$recurse = 1; # recurse on subdirectories in $podpath.
241$verbose = 0; # not verbose by default
242$doindex = 1; # non-zero if we should generate an index
243$listlevel = 0; # current list depth
244@listitem = (); # stack of HTML commands to use when a =item is
245 # encountered. the top of the stack is the
246 # current list.
247@listdata = (); # similar to @listitem, but for the text after
248 # an =item
249@listend = (); # similar to @listitem, but the text to use to
250 # end the list.
251$ignore = 1; # whether or not to format text. we don't
252 # format text until we hit our first pod
253 # directive.
254
255@items_seen = ();
256%items_named = ();
257$netscape = 0; # whether or not to use netscape directives.
258$title = ''; # title to give the pod(s)
259$top = 1; # true if we are at the top of the doc. used
260 # to prevent the first <HR> directive.
261$paragraph = ''; # which paragraph we're processing (used
262 # for error messages)
54310121 263%sections = (); # sections within this page
3e3baf6d
TB
264
265# These are not reinitialised here but are kept as a cache.
266# See get_cache and related cache management code.
267#%pages = (); # associative array used to find the location
268 # of pages referenced by L<> links.
269#%items = (); # associative array used to find the location
54310121 270 # of =item directives referenced by C<> links
39e571d4 271$Is83=$^O eq 'dos';
54310121 272}
273
274sub pod2html {
275 local(@ARGV) = @_;
276 local($/);
277 local $_;
278
279 init_globals();
280
39e571d4
LM
281 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
282
54310121 283 # cache of %pages and %items from last time we ran pod2html
54310121 284
285 #undef $opt_help if defined $opt_help;
286
287 # parse the command-line parameters
288 parse_command_line();
289
290 # set some variables to their default values if necessary
291 local *POD;
292 unless (@ARGV && $ARGV[0]) {
293 $podfile = "-" unless $podfile; # stdin
294 open(POD, "<$podfile")
295 || die "$0: cannot open $podfile file for input: $!\n";
296 } else {
297 $podfile = $ARGV[0]; # XXX: might be more filenames
298 *POD = *ARGV;
299 }
300 $htmlfile = "-" unless $htmlfile; # stdout
301 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9
BS
302 $htmldir =~ s#/$## ; # so we don't get a //
303 if ( $htmlroot eq ''
304 && defined( $htmldir )
305 && $htmldir ne ''
306 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
307 )
5a039dd3 308 {
29f227c9
BS
309 # Set the 'base' url for this file, so that we can use it
310 # as the location from which to calculate relative links
311 # to other files. If this is '', then absolute links will
312 # be used throughout.
313 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 314 }
54310121 315
316 # read the pod a paragraph at a time
317 warn "Scanning for sections in input file(s)\n" if $verbose;
318 $/ = "";
319 my @poddata = <POD>;
320 close(POD);
321
322 # scan the pod for =head[1-6] directives and build an index
323 my $index = scan_headings(\%sections, @poddata);
324
3e3baf6d
TB
325 unless($index) {
326 warn "No pod in $podfile\n" if $verbose;
327 return;
328 }
329
54310121 330 # open the output file
331 open(HTML, ">$htmlfile")
332 || die "$0: cannot open $htmlfile file for output: $!\n";
333
d011ffae
GS
334 # put a title in the HTML file if one wasn't specified
335 if ($title eq '') {
336 TITLE_SEARCH: {
337 for (my $i = 0; $i < @poddata; $i++) {
338 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
339 for my $para ( @poddata[$i, $i+1] ) {
340 last TITLE_SEARCH
341 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
342 }
343 }
54310121 344
d011ffae
GS
345 }
346 }
347 }
3e3baf6d
TB
348 if (!$title and $podfile =~ /\.pod$/) {
349 # probably a split pod so take first =head[12] as title
350 for (my $i = 0; $i < @poddata; $i++) {
351 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
352 }
353 warn "adopted '$title' as title for $podfile\n"
354 if $verbose and $title;
355 }
7b8d334a
GS
356 if ($title) {
357 $title =~ s/\s*\(.*\)//;
358 } else {
3e3baf6d 359 warn "$0: no title for $podfile";
54310121 360 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
361 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 362 warn "using $title" if $verbose;
54310121 363 }
364 print HTML <<END_OF_HEAD;
7b8d334a
GS
365<HTML>
366<HEAD>
367<TITLE>$title</TITLE>
368<LINK REV="made" HREF="mailto:$Config{perladmin}">
369</HEAD>
54310121 370
7b8d334a 371<BODY>
54310121 372
373END_OF_HEAD
374
3e3baf6d
TB
375 # load/reload/validate/cache %pages and %items
376 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 377
378 # scan the pod for =item directives
379 scan_items("", \%items, @poddata);
380
381 # put an index at the top of the file. note, if $doindex is 0 we
382 # still generate an index, but surround it with an html comment.
383 # that way some other program can extract it if desired.
384 $index =~ s/--+/-/g;
385 print HTML "<!-- INDEX BEGIN -->\n";
386 print HTML "<!--\n" unless $doindex;
387 print HTML $index;
388 print HTML "-->\n" unless $doindex;
389 print HTML "<!-- INDEX END -->\n\n";
390 print HTML "<HR>\n" if $doindex;
391
392 # now convert this file
393 warn "Converting input file\n" if $verbose;
394 foreach my $i (0..$#poddata) {
395 $_ = $poddata[$i];
396 $paragraph = $i+1;
397 if (/^(=.*)/s) { # is it a pod directive?
398 $ignore = 0;
399 $_ = $1;
400 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
401 process_begin($1, $2);
402 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
403 process_end($1, $2);
404 } elsif (/^=cut/) { # =cut
405 process_cut();
406 } elsif (/^=pod/) { # =pod
407 process_pod();
408 } else {
409 next if @begin_stack && $begin_stack[-1] ne 'html';
410
7b8d334a 411 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
54310121 412 process_head($1, $2);
7b8d334a 413 } elsif (/^=item\s*(.*\S)/sm) { # =item text
54310121 414 process_item($1);
415 } elsif (/^=over\s*(.*)/) { # =over N
416 process_over();
417 } elsif (/^=back/) { # =back
418 process_back();
419 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
420 process_for($1,$2);
421 } else {
422 /^=(\S*)\s*/;
423 warn "$0: $podfile: unknown pod directive '$1' in "
424 . "paragraph $paragraph. ignoring.\n";
425 }
426 }
427 $top = 0;
428 }
429 else {
430 next if $ignore;
431 next if @begin_stack && $begin_stack[-1] ne 'html';
432 my $text = $_;
433 process_text(\$text, 1);
7b8d334a 434 print HTML "<P>\n$text";
54310121 435 }
436 }
437
438 # finish off any pending directives
439 finish_list();
440 print HTML <<END_OF_TAIL;
7b8d334a 441</BODY>
54310121 442
7b8d334a 443</HTML>
54310121 444END_OF_TAIL
445
446 # close the html file
447 close(HTML);
448
449 warn "Finished\n" if $verbose;
450}
451
452##############################################################################
453
454my $usage; # see below
455sub usage {
456 my $podfile = shift;
457 warn "$0: $podfile: @_\n" if @_;
458 die $usage;
459}
460
461$usage =<<END_OF_USAGE;
462Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
463 --podpath=<name>:...:<name> --podroot=<name>
464 --libpods=<name>:...:<name> --recurse --verbose --index
465 --netscape --norecurse --noindex
466
467 --flush - flushes the item and directory caches.
468 --help - prints this message.
469 --htmlroot - http-server base directory from which all relative paths
470 in podpath stem (default is /).
471 --index - generate an index at the top of the resulting html
472 (default).
473 --infile - filename for the pod to convert (input taken from stdin
474 by default).
475 --libpods - colon-separated list of pages to search for =item pod
476 directives in as targets of C<> and implicit links (empty
477 by default). note, these are not filenames, but rather
478 page names like those that appear in L<> links.
479 --netscape - will use netscape html directives when applicable.
480 --nonetscape - will not use netscape directives (default).
481 --outfile - filename for the resulting html file (output sent to
482 stdout by default).
483 --podpath - colon-separated list of directories containing library
484 pods. empty by default.
485 --podroot - filesystem base directory from which all relative paths
486 in podpath stem (default is .).
487 --noindex - don't generate an index at the top of the resulting html.
488 --norecurse - don't recurse on those subdirectories listed in podpath.
489 --recurse - recurse on those subdirectories listed in podpath
490 (default behavior).
491 --title - title that will appear in resulting html file.
492 --verbose - self-explanatory
493
494END_OF_USAGE
495
496sub parse_command_line {
29f227c9 497 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
54310121 498 my $result = GetOptions(
29f227c9
BS
499 'flush' => \$opt_flush,
500 'help' => \$opt_help,
501 'htmldir=s' => \$opt_htmldir,
54310121 502 'htmlroot=s' => \$opt_htmlroot,
29f227c9 503 'index!' => \$opt_index,
54310121 504 'infile=s' => \$opt_infile,
505 'libpods=s' => \$opt_libpods,
506 'netscape!' => \$opt_netscape,
507 'outfile=s' => \$opt_outfile,
508 'podpath=s' => \$opt_podpath,
509 'podroot=s' => \$opt_podroot,
510 'norecurse' => \$opt_norecurse,
511 'recurse!' => \$opt_recurse,
512 'title=s' => \$opt_title,
513 'verbose' => \$opt_verbose,
514 );
515 usage("-", "invalid parameters") if not $result;
516
517 usage("-") if defined $opt_help; # see if the user asked for help
518 $opt_help = ""; # just to make -w shut-up.
519
520 $podfile = $opt_infile if defined $opt_infile;
521 $htmlfile = $opt_outfile if defined $opt_outfile;
5a039dd3 522 $htmldir = $opt_htmldir if defined $opt_outfile;
54310121 523
524 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
525 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
526
527 warn "Flushing item and directory caches\n"
528 if $opt_verbose && defined $opt_flush;
529 unlink($dircache, $itemcache) if defined $opt_flush;
530
531 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
532 $podroot = $opt_podroot if defined $opt_podroot;
533
534 $doindex = $opt_index if defined $opt_index;
535 $recurse = $opt_recurse if defined $opt_recurse;
536 $title = $opt_title if defined $opt_title;
537 $verbose = defined $opt_verbose ? 1 : 0;
538 $netscape = $opt_netscape if defined $opt_netscape;
539}
540
3e3baf6d
TB
541
542my $saved_cache_key;
543
544sub get_cache {
545 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
546 my @cache_key_args = @_;
547
548 # A first-level cache:
549 # Don't bother reading the cache files if they still apply
550 # and haven't changed since we last read them.
551
552 my $this_cache_key = cache_key(@cache_key_args);
553
554 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
555
556 # load the cache of %pages and %items if possible. $tests will be
557 # non-zero if successful.
558 my $tests = 0;
559 if (-f $dircache && -f $itemcache) {
560 warn "scanning for item cache\n" if $verbose;
561 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
562 }
563
564 # if we didn't succeed in loading the cache then we must (re)build
565 # %pages and %items.
566 if (!$tests) {
567 warn "scanning directories in pod-path\n" if $verbose;
568 scan_podpath($podroot, $recurse, 0);
569 }
570 $saved_cache_key = cache_key(@cache_key_args);
571}
572
573sub cache_key {
574 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
575 return join('!', $dircache, $itemcache, $recurse,
29f227c9 576 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d
TB
577}
578
54310121 579#
3e3baf6d 580# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 581# are valid caches of %pages and %items. if they are valid then it loads
582# them and returns a non-zero value.
583#
3e3baf6d
TB
584
585sub load_cache {
54310121 586 my($dircache, $itemcache, $podpath, $podroot) = @_;
587 my($tests);
588 local $_;
589
590 $tests = 0;
591
592 open(CACHE, "<$itemcache") ||
593 die "$0: error opening $itemcache for reading: $!\n";
594 $/ = "\n";
595
596 # is it the same podpath?
597 $_ = <CACHE>;
598 chomp($_);
3e3baf6d 599 $tests++ if (join(":", @$podpath) eq $_);
54310121 600
601 # is it the same podroot?
602 $_ = <CACHE>;
603 chomp($_);
604 $tests++ if ($podroot eq $_);
605
606 # load the cache if its good
607 if ($tests != 2) {
608 close(CACHE);
54310121 609 return 0;
610 }
611
612 warn "loading item cache\n" if $verbose;
613 while (<CACHE>) {
614 /(.*?) (.*)$/;
615 $items{$1} = $2;
616 }
617 close(CACHE);
618
619 warn "scanning for directory cache\n" if $verbose;
620 open(CACHE, "<$dircache") ||
621 die "$0: error opening $dircache for reading: $!\n";
622 $/ = "\n";
623 $tests = 0;
624
625 # is it the same podpath?
626 $_ = <CACHE>;
627 chomp($_);
3e3baf6d 628 $tests++ if (join(":", @$podpath) eq $_);
54310121 629
630 # is it the same podroot?
631 $_ = <CACHE>;
632 chomp($_);
633 $tests++ if ($podroot eq $_);
634
635 # load the cache if its good
636 if ($tests != 2) {
637 close(CACHE);
54310121 638 return 0;
639 }
640
641 warn "loading directory cache\n" if $verbose;
642 while (<CACHE>) {
643 /(.*?) (.*)$/;
644 $pages{$1} = $2;
645 }
646
647 close(CACHE);
648
649 return 1;
650}
651
652#
653# scan_podpath - scans the directories specified in @podpath for directories,
654# .pod files, and .pm files. it also scans the pod files specified in
655# @libpods for =item directives.
656#
657sub scan_podpath {
3e3baf6d 658 my($podroot, $recurse, $append) = @_;
54310121 659 my($pwd, $dir);
660 my($libpod, $dirname, $pod, @files, @poddata);
661
3e3baf6d
TB
662 unless($append) {
663 %items = ();
664 %pages = ();
665 }
666
54310121 667 # scan each directory listed in @podpath
668 $pwd = getcwd();
669 chdir($podroot)
670 || die "$0: error changing to directory $podroot: $!\n";
671 foreach $dir (@podpath) {
672 scan_dir($dir, $recurse);
673 }
674
675 # scan the pods listed in @libpods for =item directives
676 foreach $libpod (@libpods) {
677 # if the page isn't defined then we won't know where to find it
678 # on the system.
679 next unless defined $pages{$libpod} && $pages{$libpod};
680
681 # if there is a directory then use the .pod and .pm files within it.
29f227c9
BS
682 # NOTE: Only finds the first so-named directory in the tree.
683# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
684 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 685 # find all the .pod and .pm files within the directory
686 $dirname = $1;
687 opendir(DIR, $dirname) ||
688 die "$0: error opening directory $dirname: $!\n";
689 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
690 closedir(DIR);
691
692 # scan each .pod and .pm file for =item directives
693 foreach $pod (@files) {
694 open(POD, "<$dirname/$pod") ||
695 die "$0: error opening $dirname/$pod for input: $!\n";
696 @poddata = <POD>;
697 close(POD);
698
699 scan_items("$dirname/$pod", @poddata);
700 }
701
702 # use the names of files as =item directives too.
703 foreach $pod (@files) {
704 $pod =~ /^(.*)(\.pod|\.pm)$/;
705 $items{$1} = "$dirname/$1.html" if $1;
706 }
707 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
708 $pages{$libpod} =~ /([^:]*\.pm):/) {
709 # scan the .pod or .pm file for =item directives
710 $pod = $1;
711 open(POD, "<$pod") ||
712 die "$0: error opening $pod for input: $!\n";
713 @poddata = <POD>;
714 close(POD);
715
716 scan_items("$pod", @poddata);
717 } else {
718 warn "$0: shouldn't be here (line ".__LINE__."\n";
719 }
720 }
721 @poddata = (); # clean-up a bit
722
723 chdir($pwd)
724 || die "$0: error changing to directory $pwd: $!\n";
725
726 # cache the item list for later use
727 warn "caching items for later use\n" if $verbose;
728 open(CACHE, ">$itemcache") ||
729 die "$0: error open $itemcache for writing: $!\n";
730
731 print CACHE join(":", @podpath) . "\n$podroot\n";
732 foreach my $key (keys %items) {
733 print CACHE "$key $items{$key}\n";
734 }
735
736 close(CACHE);
737
738 # cache the directory list for later use
739 warn "caching directories for later use\n" if $verbose;
740 open(CACHE, ">$dircache") ||
741 die "$0: error open $dircache for writing: $!\n";
742
743 print CACHE join(":", @podpath) . "\n$podroot\n";
744 foreach my $key (keys %pages) {
745 print CACHE "$key $pages{$key}\n";
746 }
747
748 close(CACHE);
749}
750
751#
752# scan_dir - scans the directory specified in $dir for subdirectories, .pod
753# files, and .pm files. notes those that it finds. this information will
754# be used later in order to figure out where the pages specified in L<>
755# links are on the filesystem.
756#
757sub scan_dir {
758 my($dir, $recurse) = @_;
759 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
760 local $_;
761
762 @subdirs = ();
763 @pods = ();
764
765 opendir(DIR, $dir) ||
766 die "$0: error opening directory $dir: $!\n";
767 while (defined($_ = readdir(DIR))) {
768 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
769 $pages{$_} = "" unless defined $pages{$_};
770 $pages{$_} .= "$dir/$_:";
771 push(@subdirs, $_);
772 } elsif (/\.pod$/) { # .pod
773 s/\.pod$//;
774 $pages{$_} = "" unless defined $pages{$_};
775 $pages{$_} .= "$dir/$_.pod:";
776 push(@pods, "$dir/$_.pod");
777 } elsif (/\.pm$/) { # .pm
778 s/\.pm$//;
779 $pages{$_} = "" unless defined $pages{$_};
780 $pages{$_} .= "$dir/$_.pm:";
781 push(@pods, "$dir/$_.pm");
782 }
783 }
784 closedir(DIR);
785
786 # recurse on the subdirectories if necessary
787 if ($recurse) {
788 foreach my $subdir (@subdirs) {
789 scan_dir("$dir/$subdir", $recurse);
790 }
791 }
792}
793
794#
795# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
796# build an index.
797#
798sub scan_headings {
799 my($sections, @data) = @_;
800 my($tag, $which_head, $title, $listdepth, $index);
801
be173d55
HM
802 # here we need local $ignore = 0;
803 # unfortunately, we can't have it, because $ignore is lexical
804 $ignore = 0;
805
54310121 806 $listdepth = 0;
807 $index = "";
808
809 # scan for =head directives, note their name, and build an index
810 # pointing to each of them.
811 foreach my $line (@data) {
bb9460ed 812 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
54310121 813 ($tag,$which_head, $title) = ($1,$2,$3);
814 chomp($title);
815 $$sections{htmlify(0,$title)} = 1;
816
102c538a
DS
817 while ($which_head != $listdepth) {
818 if ($which_head > $listdepth) {
819 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
820 $listdepth++;
821 } elsif ($which_head < $listdepth) {
822 $listdepth--;
823 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
824 }
54310121 825 }
54310121 826
827 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
be173d55 828 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
7b8d334a 829 html_escape(process_text(\$title, 0)) . "</A>";
54310121 830 }
831 }
832
833 # finish off the lists
834 while ($listdepth--) {
835 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
836 }
837
838 # get rid of bogus lists
839 $index =~ s,\t*<UL>\s*</UL>\n,,g;
840
bb9460ed 841 $ignore = 1; # restore old value;
be173d55 842
54310121 843 return $index;
844}
845
846#
847# scan_items - scans the pod specified by $pod for =item directives. we
848# will use this information later on in resolving C<> links.
849#
850sub scan_items {
851 my($pod, @poddata) = @_;
852 my($i, $item);
853 local $_;
854
855 $pod =~ s/\.pod$//;
856 $pod .= ".html" if $pod;
857
858 foreach $i (0..$#poddata) {
859 $_ = $poddata[$i];
860
861 # remove any formatting instructions
862 s,[A-Z]<([^<>]*)>,$1,g;
863
864 # figure out what kind of item it is and get the first word of
865 # it's name.
866 if (/^=item\s+(\w*)\s*.*$/s) {
867 if ($1 eq "*") { # bullet list
868 /\A=item\s+\*\s*(.*?)\s*\Z/s;
869 $item = $1;
7b8d334a
GS
870 } elsif ($1 =~ /^\d+/) { # numbered list
871 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
54310121 872 $item = $1;
873 } else {
874# /\A=item\s+(.*?)\s*\Z/s;
875 /\A=item\s+(\w*)/s;
876 $item = $1;
877 }
878
879 $items{$item} = "$pod" if $item;
880 }
881 }
882}
883
884#
885# process_head - convert a pod head[1-6] tag and convert it to HTML format.
886#
887sub process_head {
888 my($tag, $heading) = @_;
889 my $firstword;
890
891 # figure out the level of the =head
892 $tag =~ /head([1-6])/;
893 my $level = $1;
894
895 # can't have a heading full of spaces and speechmarks and so on
896 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
897
898 print HTML "<P>\n" unless $listlevel;
899 print HTML "<HR>\n" unless $listlevel || $top;
900 print HTML "<H$level>"; # unless $listlevel;
901 #print HTML "<H$level>" unless $listlevel;
be173d55 902 my $convert = $heading; process_text(\$convert, 0);
7b8d334a 903 $convert = html_escape($convert);
54310121 904 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
905 print HTML "</H$level>"; # unless $listlevel;
906 print HTML "\n";
907}
908
909#
910# process_item - convert a pod item tag and convert it to HTML format.
911#
912sub process_item {
913 my $text = $_[0];
914 my($i, $quote, $name);
915
916 my $need_preamble = 0;
917 my $this_entry;
918
919
920 # lots of documents start a list without doing an =over. this is
921 # bad! but, the proper thing to do seems to be to just assume
922 # they did do an =over. so warn them once and then continue.
923 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
924 unless $listlevel;
925 process_over() unless $listlevel;
926
927 return unless $listlevel;
928
929 # remove formatting instructions from the text
930 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
931 pre_escape(\$text);
932
933 $need_preamble = $items_seen[$listlevel]++ == 0;
934
935 # check if this is the first =item after an =over
936 $i = $listlevel - 1;
937 my $need_new = $listlevel >= @listitem;
938
939 if ($text =~ /\A\*/) { # bullet
940
941 if ($need_preamble) {
942 push(@listend, "</UL>");
943 print HTML "<UL>\n";
944 }
945
7b8d334a
GS
946 print HTML '<LI>';
947 if ($text =~ /\A\*\s*(.+)\Z/s) {
948 print HTML '<STRONG>';
949 if ($items_named{$1}++) {
950 print HTML html_escape($1);
951 } else {
952 my $name = 'item_' . htmlify(1,$1);
953 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
954 }
955 print HTML '</STRONG>';
956 }
54310121 957
7b8d334a 958 } elsif ($text =~ /\A[\d#]+/) { # numbered list
54310121 959
960 if ($need_preamble) {
961 push(@listend, "</OL>");
962 print HTML "<OL>\n";
963 }
964
7b8d334a
GS
965 print HTML '<LI>';
966 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
967 print HTML '<STRONG>';
968 if ($items_named{$1}++) {
969 print HTML html_escape($1);
970 } else {
971 my $name = 'item_' . htmlify(0,$1);
972 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
973 }
974 print HTML '</STRONG>';
975 }
54310121 976
977 } else { # all others
978
979 if ($need_preamble) {
980 push(@listend, '</DL>');
981 print HTML "<DL>\n";
982 }
983
7b8d334a
GS
984 print HTML '<DT>';
985 if ($text =~ /(\S+)/) {
986 print HTML '<STRONG>';
987 if ($items_named{$1}++) {
988 print HTML html_escape($text);
989 } else {
990 my $name = 'item_' . htmlify(1,$text);
991 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
992 }
993 print HTML '</STRONG>';
994 }
54310121 995 print HTML '<DD>';
996 }
997
998 print HTML "\n";
999}
1000
1001#
1002# process_over - process a pod over tag and start a corresponding HTML
1003# list.
1004#
1005sub process_over {
1006 # start a new list
1007 $listlevel++;
1008}
1009
1010#
1011# process_back - process a pod back tag and convert it to HTML format.
1012#
1013sub process_back {
2ceaccd7 1014 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
54310121 1015 unless $listlevel;
1016 return unless $listlevel;
1017
1018 # close off the list. note, I check to see if $listend[$listlevel] is
1019 # defined because an =item directive may have never appeared and thus
1020 # $listend[$listlevel] may have never been initialized.
1021 $listlevel--;
1022 print HTML $listend[$listlevel] if defined $listend[$listlevel];
1023 print HTML "\n";
1024
1025 # don't need the corresponding perl code anymore
1026 pop(@listitem);
1027 pop(@listdata);
1028 pop(@listend);
1029
1030 pop(@items_seen);
1031}
1032
1033#
1034# process_cut - process a pod cut tag, thus stop ignoring pod directives.
1035#
1036sub process_cut {
1037 $ignore = 1;
1038}
1039
1040#
1041# process_pod - process a pod pod tag, thus ignore pod directives until we see a
1042# corresponding cut.
1043#
1044sub process_pod {
1045 # no need to set $ignore to 0 cause the main loop did it
1046}
1047
1048#
1049# process_for - process a =for pod tag. if it's for html, split
c4d9b39d 1050# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1051#
1052sub process_for {
1053 my($whom, $text) = @_;
1054 if ( $whom =~ /^(pod2)?html$/i) {
1055 print HTML $text;
c4d9b39d
IZ
1056 } elsif ($whom =~ /^illustration$/i) {
1057 1 while chomp $text;
1058 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1059 $text .= $ext, last if -r "$text$ext";
1060 }
1061 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1062 }
54310121 1063}
1064
1065#
1066# process_begin - process a =begin pod tag. this pushes
1067# whom we're beginning on the begin stack. if there's a
1068# begin stack, we only print if it us.
1069#
1070sub process_begin {
1071 my($whom, $text) = @_;
1072 $whom = lc($whom);
1073 push (@begin_stack, $whom);
1074 if ( $whom =~ /^(pod2)?html$/) {
1075 print HTML $text if $text;
1076 }
1077}
1078
1079#
1080# process_end - process a =end pod tag. pop the
1081# begin stack. die if we're mismatched.
1082#
1083sub process_end {
1084 my($whom, $text) = @_;
1085 $whom = lc($whom);
1086 if ($begin_stack[-1] ne $whom ) {
1087 die "Unmatched begin/end at chunk $paragraph\n"
1088 }
1089 pop @begin_stack;
1090}
1091
1092#
1093# process_text - handles plaintext that appears in the input pod file.
1094# there may be pod commands embedded within the text so those must be
1095# converted to html commands.
1096#
1097sub process_text {
1098 my($text, $escapeQuotes) = @_;
1099 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1100 my($podcommand, $params, $tag, $quote);
1101
1102 return if $ignore;
1103
1104 $quote = 0; # status of double-quote conversion
1105 $result = "";
1106 $rest = $$text;
1107
1108 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
be173d55
HM
1109 $rest =~ s/\n+\Z//;
1110 $rest =~ s#.*#
1111 my $line = $&;
1112 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1113 $line;
1114 #eg;
54310121 1115
1116 $rest =~ s/&/&amp;/g;
1117 $rest =~ s/</&lt;/g;
1118 $rest =~ s/>/&gt;/g;
1119 $rest =~ s/"/&quot;/g;
1120
1121 # try and create links for all occurrences of perl.* within
1122 # the preformatted text.
1123 $rest =~ s{
1124 (\s*)(perl\w+)
1125 }{
1126 if (defined $pages{$2}) { # is a link
1127 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
39e571d4
LM
1128 } elsif (defined $pages{dosify($2)}) { # is a link
1129 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
54310121 1130 } else {
1131 "$1$2";
1132 }
1133 }xeg;
5a039dd3
BS
1134# $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1135 $rest =~ s{
29f227c9
BS
1136 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1137 }{
1138 my $url ;
1139 if ( $htmlfileurl ne '' ) {
1140 # Here, we take advantage of the knowledge
1141 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1142 # Since $htmlroot eq '', we need to prepend $htmldir
1143 # on the fron of the link to get the absolute path
1144 # of the link's target. We check for a leading '/'
1145 # to avoid corrupting links that are #, file:, etc.
1146 my $old_url = $3 ;
1147 $old_url = "$htmldir$old_url"
1148 if ( $old_url =~ m{^\/} ) ;
1149 $url = relativize_url( "$old_url.html", $htmlfileurl );
1150# print( " a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
1151 }
1152 else {
1153 $url = "$3.html" ;
1154 }
5a039dd3
BS
1155 "$1$url" ;
1156 }xeg;
54310121 1157
5a039dd3
BS
1158 # Look for embedded URLs and make them in to links. We don't
1159 # relativize them since they are best left as the author intended.
54310121 1160 my $urls = '(' . join ('|', qw{
1161 http
1162 telnet
1163 mailto
1164 news
1165 gopher
1166 file
1167 wais
1168 ftp
1169 } )
1170 . ')';
1171
1172 my $ltrs = '\w';
1173 my $gunk = '/#~:.?+=&%@!\-';
1174 my $punc = '.:?\-';
1175 my $any = "${ltrs}${gunk}${punc}";
1176
1177 $rest =~ s{
1178 \b # start at word boundary
1179 ( # begin $1 {
29f227c9
BS
1180 $urls : # need resource and a colon
1181 (?!:) # Ignore File::, among others.
54310121 1182 [$any] +? # followed by on or more
1183 # of any valid character, but
1184 # be conservative and take only
1185 # what you need to....
1186 ) # end $1 }
1187 (?= # look-ahead non-consumptive assertion
1188 [$punc]* # either 0 or more puntuation
1189 [^$any] # followed by a non-url char
1190 | # or else
1191 $ # then end of the string
1192 )
1193 }{<A HREF="$1">$1</A>}igox;
1194
1195 $result = "<PRE>" # text should be as it is (verbatim)
1196 . "$rest\n"
1197 . "</PRE>\n";
1198 } else { # formatted text
1199 # parse through the string, stopping each time we find a
1200 # pod-escape. once the string has been throughly processed
1201 # we can output it.
ec978fad 1202 while (length $rest) {
54310121 1203 # check to see if there are any possible pod directives in
1204 # the remaining part of the text.
1205 if ($rest =~ m/[BCEIFLSZ]</) {
1206 warn "\$rest\t= $rest\n" unless
1207 $rest =~ /\A
1208 ([^<]*?)
1209 ([BCEIFLSZ]?)
1210 <
1211 (.*)\Z/xs;
1212
1213 $s1 = $1; # pure text
1214 $s2 = $2; # the type of pod-escape that follows
1215 $s3 = '<'; # '<'
1216 $s4 = $3; # the rest of the string
1217 } else {
1218 $s1 = $rest;
1219 $s2 = "";
1220 $s3 = "";
1221 $s4 = "";
1222 }
1223
1224 if ($s3 eq '<' && $s2) { # a pod-escape
1225 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1226 $podcommand = "$s2<";
1227 $rest = $s4;
1228
1229 # find the matching '>'
1230 $match = 1;
1231 $bf = 0;
1232 while ($match && !$bf) {
1233 $bf = 1;
1234 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1235 $bf = 0;
1236 $match++;
1237 $podcommand .= $1;
1238 $rest = $2;
1239 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1240 $bf = 0;
1241 $match--;
1242 $podcommand .= $1;
1243 $rest = $2;
1244 }
1245 }
1246
1247 if ($match != 0) {
1248 warn <<WARN;
1249$0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1250WARN
1251 $result .= substr $podcommand, 0, 2;
1252 $rest = substr($podcommand, 2) . $rest;
1253 next;
1254 }
1255
1256 # pull out the parameters to the pod-escape
1257 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1258 $tag = $1;
1259 $params = $2;
1260
1261 # process the text within the pod-escape so that any escapes
1262 # which must occur do.
1263 process_text(\$params, 0) unless $tag eq 'L';
1264
1265 $s1 = $params;
1266 if (!$tag || $tag eq " ") { # <> : no tag
1267 $s1 = "&lt;$params&gt;";
1268 } elsif ($tag eq "L") { # L<> : link
1269 $s1 = process_L($params);
1270 } elsif ($tag eq "I" || # I<> : italicize text
1271 $tag eq "B" || # B<> : bold text
1272 $tag eq "F") { # F<> : file specification
1273 $s1 = process_BFI($tag, $params);
1274 } elsif ($tag eq "C") { # C<> : literal code
1275 $s1 = process_C($params, 1);
1276 } elsif ($tag eq "E") { # E<> : escape
1277 $s1 = process_E($params);
1278 } elsif ($tag eq "Z") { # Z<> : zero-width character
1279 $s1 = process_Z($params);
1280 } elsif ($tag eq "S") { # S<> : non-breaking space
1281 $s1 = process_S($params);
1282 } elsif ($tag eq "X") { # S<> : non-breaking space
1283 $s1 = process_X($params);
1284 } else {
1285 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1286 }
1287
1288 $result .= "$s1";
1289 } else {
1290 # for pure text we must deal with implicit links and
1291 # double-quotes among other things.
1292 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1293 $rest = $s4;
1294 }
1295 }
1296 }
1297 $$text = $result;
1298}
1299
1300sub html_escape {
1301 my $rest = $_[0];
a3c03ba2 1302 $rest =~ s/&(?!\w+;|#)/&amp;/g; # XXX not bulletproof
54310121 1303 $rest =~ s/</&lt;/g;
1304 $rest =~ s/>/&gt;/g;
1305 $rest =~ s/"/&quot;/g;
1306 return $rest;
1307}
1308
1309#
1310# process_puretext - process pure text (without pod-escapes) converting
1311# double-quotes and handling implicit C<> links.
1312#
1313sub process_puretext {
1314 my($text, $quote) = @_;
1315 my(@words, $result, $rest, $lead, $trail);
1316
1317 # convert double-quotes to single-quotes
1318 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1319 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1320
1321 $$quote = ($text =~ m/"/ ? 1 : 0);
1322 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1323
1324 # keep track of leading and trailing white-space
1325 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1326 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1327
1328 # collapse all white space into a single space
1329 $text =~ s/\s+/ /g;
1330 @words = split(" ", $text);
1331
1332 # process each word individually
1333 foreach my $word (@words) {
1334 # see if we can infer a link
1335 if ($word =~ /^\w+\(/) {
1336 # has parenthesis so should have been a C<> ref
1337 $word = process_C($word);
1338# $word =~ /^[^()]*]\(/;
1339# if (defined $items{$1} && $items{$1}) {
1340# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1341# . htmlify(0,$word)
1342# . "\">$word</A></CODE>";
1343# } elsif (defined $items{$word} && $items{$word}) {
1344# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1345# . htmlify(0,$word)
1346# . "\">$word</A></CODE>";
1347# } else {
1348# $word = "\n<CODE><A HREF=\"#item_"
1349# . htmlify(0,$word)
1350# . "\">$word</A></CODE>";
1351# }
1352 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1353 # perl variables, should be a C<> ref
1354 $word = process_C($word, 1);
1355 } elsif ($word =~ m,^\w+://\w,) {
1356 # looks like a URL
5a039dd3 1357 # Don't relativize it: leave it as the author intended
54310121 1358 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1359 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1360 # looks like an e-mail address
7b8d334a
GS
1361 my ($w1, $w2, $w3) = ("", $word, "");
1362 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1363 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1364 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1365 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1366 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1367 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1368 } else {
7b8d334a 1369 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1370 }
1371 }
1372
1373 # build a new string based upon our conversion
1374 $result = "";
1375 $rest = join(" ", @words);
1376 while (length($rest) > 75) {
1377 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1378 $rest =~ m/^(\S*)\s(.*?)$/o) {
1379
1380 $result .= "$1\n";
1381 $rest = $2;
1382 } else {
1383 $result .= "$rest\n";
1384 $rest = "";
1385 }
1386 }
1387 $result .= $rest if $rest;
1388
1389 # restore the leading and trailing white-space
1390 $result = "$lead$result$trail";
1391
1392 return $result;
1393}
1394
1395#
1396# pre_escape - convert & in text to $amp;
1397#
1398sub pre_escape {
1399 my($str) = @_;
db085819 1400 $$str =~ s/&(?!\w+;|#)/&amp;/g; # XXX not bulletproof
54310121 1401}
1402
1403#
39e571d4
LM
1404# dosify - convert filenames to 8.3
1405#
1406sub dosify {
1407 my($str) = @_;
1408 if ($Is83) {
1409 $str = lc $str;
1410 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1411 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1412 }
1413 return $str;
1414}
1415
1416#
54310121 1417# process_L - convert a pod L<> directive to a corresponding HTML link.
1418# most of the links made are inferred rather than known about directly
1419# (i.e it's not known whether the =head\d section exists in the target file,
1420# or whether a .pod file exists in the case of split files). however, the
1421# guessing usually works.
1422#
1423# Unlike the other directives, this should be called with an unprocessed
1424# string, else tags in the link won't be matched.
1425#
1426sub process_L {
1427 my($str) = @_;
39e571d4 1428 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
54310121 1429
1430 $str =~ s/\n/ /g; # undo word-wrapped tags
1431 $s1 = $str;
1432 for ($s1) {
b74bceb9
AB
1433 # LREF: a la HREF L<show this text|man/section>
1434 $linktext = $1 if s:^([^|]+)\|::;
1435
54310121 1436 # make sure sections start with a /
1437 s,^",/",g;
1438 s,^,/,g if (!m,/, && / /);
1439
1440 # check if there's a section specified
1441 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1442 ($page, $section) = ($1, $2);
1443 } else { # no
1444 ($page, $section) = ($str, "");
1445 }
1446
1447 # check if we know that this is a section in this page
1448 if (!defined $pages{$page} && defined $sections{$page}) {
1449 $section = $page;
1450 $page = "";
1451 }
29f227c9
BS
1452
1453 # remove trailing punctuation, like ()
1454 $section =~ s/\W*$// ;
54310121 1455 }
1456
39e571d4
LM
1457 $page83=dosify($page);
1458 $page=$page83 if (defined $pages{$page83});
54310121 1459 if ($page eq "") {
1460 $link = "#" . htmlify(0,$section);
b74bceb9 1461 $linktext = $section unless defined($linktext);
350ccacd
GB
1462 } elsif ( $page =~ /::/ ) {
1463 $linktext = ($section ? "$section" : "$page");
1464 $page =~ s,::,/,g;
29f227c9
BS
1465 # Search page cache for an entry keyed under the html page name,
1466 # then look to see what directory that page might be in. NOTE:
1467 # this will only find one page. A better solution might be to produce
1468 # an intermediate page that is an index to all such pages.
1469 my $page_name = $page ;
1470 $page_name =~ s,^.*/,, ;
1471 if ( defined( $pages{ $page_name } ) &&
1472 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1473 ) {
1474 $page = $1 ;
1475 }
1476 else {
1477 # NOTE: This branch assumes that all A::B pages are located in
1478 # $htmlroot/A/B.html . This is often incorrect, since they are
1479 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1480 # analyze the contents of %pages and figure out where any
1481 # cousins of A::B are, then assume that. So, if A::B isn't found,
1482 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1483 # lib/A/B.pm. This is also limited, but it's an improvement.
1484 # Maybe a hints file so that the links point to the correct places
1485 # non-theless?
1486 # Also, maybe put a warn "$0: cannot resolve..." here.
1487 }
350ccacd
GB
1488 $link = "$htmlroot/$page.html";
1489 $link .= "#" . htmlify(0,$section) if ($section);
54310121 1490 } elsif (!defined $pages{$page}) {
1491 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1492 $link = "";
b74bceb9 1493 $linktext = $page unless defined($linktext);
54310121 1494 } else {
b74bceb9 1495 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
54310121 1496 $section = htmlify(0,$section) if $section ne "";
1497
1498 # if there is a directory by the name of the page, then assume that an
1499 # appropriate section will exist in the subdirectory
29f227c9
BS
1500# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1501 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1502 $link = "$htmlroot/$1/$section.html";
1503
1504 # since there is no directory by the name of the page, the section will
1505 # have to exist within a .html of the same name. thus, make sure there
1506 # is a .pod or .pm that might become that .html
1507 } else {
1508 $section = "#$section";
1509 # check if there is a .pod with the page name
1510 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1511 $link = "$htmlroot/$1.html$section";
1512 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1513 $link = "$htmlroot/$1.html$section";
1514 } else {
1515 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1516 "no .pod or .pm found\n";
1517 $link = "";
b74bceb9 1518 $linktext = $section unless defined($linktext);
54310121 1519 }
1520 }
1521 }
1522
1523 process_text(\$linktext, 0);
1524 if ($link) {
29f227c9
BS
1525 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1526 # implies $htmlroot eq ''. This means that the link in question
1527 # needs a prefix of $htmldir if it begins with '/'. The test for
1528 # the initial '/' is done to avoid '#'-only links, and to allow
1529 # for other kinds of links, like file:, ftp:, etc.
1530 my $url ;
1531 if ( $htmlfileurl ne '' ) {
1532 $link = "$htmldir$link"
1533 if ( $link =~ m{^/} ) ;
1534
1535 $url = relativize_url( $link, $htmlfileurl ) ;
1536# print( " b: [$link,$htmlfileurl,$url]\n" ) ;
1537 }
1538 else {
1539 $url = $link ;
1540 }
1541
5a039dd3 1542 $s1 = "<A HREF=\"$url\">$linktext</A>";
54310121 1543 } else {
1544 $s1 = "<EM>$linktext</EM>";
1545 }
1546 return $s1;
1547}
1548
1549#
29f227c9
BS
1550# relativize_url - convert an absolute URL to one relative to a base URL.
1551# Assumes both end in a filename.
1552#
1553sub relativize_url {
1554 my ($dest,$source) = @_ ;
1555
1556 my ($dest_volume,$dest_directory,$dest_file) =
1557 File::Spec::Unix->splitpath( $dest ) ;
1558 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1559
1560 my ($source_volume,$source_directory,$source_file) =
1561 File::Spec::Unix->splitpath( $source ) ;
1562 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1563
1564 my $rel_path = '' ;
1565 if ( $dest ne '' ) {
1566 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1567 }
1568
1569 if ( $rel_path ne '' &&
1570 substr( $rel_path, -1 ) ne '/' &&
1571 substr( $dest_file, 0, 1 ) ne '#'
1572 ) {
1573 $rel_path .= "/$dest_file" ;
1574 }
1575 else {
1576 $rel_path .= "$dest_file" ;
1577 }
1578
1579 return $rel_path ;
1580}
1581
1582#
54310121 1583# process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1584# convert them to corresponding HTML directives.
1585#
1586sub process_BFI {
1587 my($tag, $str) = @_;
1588 my($s1); # work string
1589 my(%repltext) = ( 'B' => 'STRONG',
1590 'F' => 'EM',
1591 'I' => 'EM');
1592
1593 # extract the modified text and convert to HTML
1594 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1595 return $s1;
1596}
1597
1598#
1599# process_C - process the C<> pod-escape.
1600#
1601sub process_C {
1602 my($str, $doref) = @_;
1603 my($s1, $s2);
1604
1605 $s1 = $str;
1606 $s1 =~ s/\([^()]*\)//g; # delete parentheses
be173d55 1607 $s2 = $s1;
54310121 1608 $s1 =~ s/\W//g; # delete bogus characters
7b8d334a 1609 $str = html_escape($str);
54310121 1610
1611 # if there was a pod file that we found earlier with an appropriate
1612 # =item directive, then create a link to that page.
1613 if ($doref && defined $items{$s1}) {
5a039dd3
BS
1614 if ( $items{$s1} ) {
1615 my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
29f227c9
BS
1616 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1617 # implies $htmlroot eq ''.
1618 my $url ;
1619 if ( $htmlfileurl ne '' ) {
1620 $link = "$htmldir$link" ;
1621 $url = relativize_url( $link, $htmlfileurl ) ;
1622 }
1623 else {
1624 $url = $link ;
1625 }
5a039dd3
BS
1626 $s1 = "<A HREF=\"$url\">$str</A>" ;
1627 }
1628 else {
1629 $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ;
1630 }
54310121 1631 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1632 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1633 } else {
1634 $s1 = "<CODE>$str</CODE>";
1635 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1636 }
1637
1638
1639 return $s1;
1640}
1641
1642#
1643# process_E - process the E<> pod directive which seems to escape a character.
1644#
1645sub process_E {
1646 my($str) = @_;
1647
1648 for ($str) {
1649 s,([^/].*),\&$1\;,g;
1650 }
1651
1652 return $str;
1653}
1654
1655#
1656# process_Z - process the Z<> pod directive which really just amounts to
1657# ignoring it. this allows someone to start a paragraph with an =
1658#
1659sub process_Z {
1660 my($str) = @_;
1661
1662 # there is no equivalent in HTML for this so just ignore it.
1663 $str = "";
1664 return $str;
1665}
1666
1667#
1668# process_S - process the S<> pod directive which means to convert all
1669# spaces in the string to non-breaking spaces (in HTML-eze).
1670#
1671sub process_S {
1672 my($str) = @_;
1673
1674 # convert all spaces in the text to non-breaking spaces in HTML.
1675 $str =~ s/ /&nbsp;/g;
1676 return $str;
1677}
1678
1679#
1680# process_X - this is supposed to make an index entry. we'll just
1681# ignore it.
1682#
1683sub process_X {
1684 return '';
1685}
1686
1687
1688#
29f227c9
BS
1689# Adapted from Nick Ing-Simmons' PodToHtml package.
1690sub relative_url {
1691 my $source_file = shift ;
1692 my $destination_file = shift;
1693
1694 my $source = URI::file->new_abs($source_file);
1695 my $uo = URI::file->new($destination_file,$source)->abs;
1696 return $uo->rel->as_string;
1697}
1698
1699
1700#
54310121 1701# finish_list - finish off any pending HTML lists. this should be called
1702# after the entire pod file has been read and converted.
1703#
1704sub finish_list {
7b8d334a 1705 while ($listlevel > 0) {
54310121 1706 print HTML "</DL>\n";
1707 $listlevel--;
1708 }
1709}
1710
1711#
1712# htmlify - converts a pod section specification to a suitable section
1713# specification for HTML. if first arg is 1, only takes 1st word.
1714#
1715sub htmlify {
1716 my($compact, $heading) = @_;
1717
1718 if ($compact) {
1719 $heading =~ /^(\w+)/;
1720 $heading = $1;
1721 }
1722
1723 # $heading = lc($heading);
1724 $heading =~ s/[^\w\s]/_/g;
1725 $heading =~ s/(\s+)/ /g;
1726 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1727 $heading =~ s/ /_/g;
1728 $heading =~ s/\A(.{32}).*\Z/$1/s;
1729 $heading =~ s/\s+\Z//;
1730 $heading =~ s/_{2,}/_/g;
1731
1732 return $heading;
1733}
1734
1735BEGIN {
1736}
1737
17381;