This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
allow 'text' in L<text|A::B/"C"> (from Martin Lichtin
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121
PP
1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
29f227c9 8$VERSION = 1.02;
54310121
PP
9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288
FK
15use locale; # make \w work right in non-ASCII lands
16
54310121
PP
17use strict;
18
7b8d334a
GS
19use Config;
20
54310121
PP
21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121
PP
24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121
PP
28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3
BS
48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9
BS
53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121
PP
57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
34db337b
JD
140=item css
141
142 --css=stylesheet
143
144Specify the URL of a cascading style sheet.
145
54310121
PP
146=item verbose
147
148 --verbose
149
150Display progress messages.
151
34db337b
JD
152=item quiet
153
154 --quiet
155
156Don't display I<mostly harmless> warning messages.
157
54310121
PP
158=back
159
160=head1 EXAMPLE
161
162 pod2html("pod2html",
163 "--podpath=lib:ext:pod:vms",
164 "--podroot=/usr/src/perl",
165 "--htmlroot=/perl/nmanual",
166 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
167 "--recurse",
168 "--infile=foo.pod",
169 "--outfile=/perl/nmanual/foo.html");
170
34db337b
JD
171=head1 ENVIRONMENT
172
173Uses $Config{pod2html} to setup default options.
174
54310121
PP
175=head1 AUTHOR
176
177Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
178
179=head1 BUGS
180
181Has trouble with C<> etc in = commands.
182
183=head1 SEE ALSO
184
185L<perlpod>
186
187=head1 COPYRIGHT
188
189This program is distributed under the Artistic License.
190
191=cut
192
1f763251
GS
193my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
194my $dircache = "pod2htmd$cache_ext";
195my $itemcache = "pod2htmi$cache_ext";
54310121
PP
196
197my @begin_stack = (); # begin/end stack
198
29f227c9
BS
199my @libpods = (); # files to search for links from C<> directives
200my $htmlroot = "/"; # http-server base directory from which all
54310121 201 # relative paths in $podpath stem.
5a039dd3
BS
202my $htmldir = ""; # The directory to which the html pages
203 # will (eventually) be written.
54310121 204my $htmlfile = ""; # write to stdout by default
29f227c9 205my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3
BS
206 # refer to this file. This is only used
207 # to make relative urls that point to
208 # other files.
54310121
PP
209my $podfile = ""; # read from stdin by default
210my @podpath = (); # list of directories containing library pods.
211my $podroot = "."; # filesystem base directory from which all
212 # relative paths in $podpath stem.
34db337b 213my $css = ''; # Cascading style sheet
54310121 214my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 215my $quiet = 0; # not quiet by default
54310121
PP
216my $verbose = 0; # not verbose by default
217my $doindex = 1; # non-zero if we should generate an index
218my $listlevel = 0; # current list depth
219my @listitem = (); # stack of HTML commands to use when a =item is
220 # encountered. the top of the stack is the
221 # current list.
222my @listdata = (); # similar to @listitem, but for the text after
223 # an =item
224my @listend = (); # similar to @listitem, but the text to use to
225 # end the list.
226my $ignore = 1; # whether or not to format text. we don't
227 # format text until we hit our first pod
228 # directive.
229
230my %items_named = (); # for the multiples of the same item in perlfunc
231my @items_seen = ();
232my $netscape = 0; # whether or not to use netscape directives.
233my $title; # title to give the pod(s)
34db337b 234my $header = 0; # produce block header/footer
54310121
PP
235my $top = 1; # true if we are at the top of the doc. used
236 # to prevent the first <HR> directive.
237my $paragraph; # which paragraph we're processing (used
238 # for error messages)
239my %pages = (); # associative array used to find the location
240 # of pages referenced by L<> links.
241my %sections = (); # sections within this page
242my %items = (); # associative array used to find the location
243 # of =item directives referenced by C<> links
39e571d4
ML
244my $Is83; # is dos with short filenames (8.3)
245
54310121 246sub init_globals {
04f720db
JD
247$dircache = "pod2htmd$cache_ext";
248$itemcache = "pod2htmi$cache_ext";
54310121
PP
249
250@begin_stack = (); # begin/end stack
251
252@libpods = (); # files to search for links from C<> directives
253$htmlroot = "/"; # http-server base directory from which all
254 # relative paths in $podpath stem.
255$htmlfile = ""; # write to stdout by default
256$podfile = ""; # read from stdin by default
257@podpath = (); # list of directories containing library pods.
258$podroot = "."; # filesystem base directory from which all
259 # relative paths in $podpath stem.
34db337b 260$css = ''; # Cascading style sheet
54310121 261$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 262$quiet = 0; # not quiet by default
54310121
PP
263$verbose = 0; # not verbose by default
264$doindex = 1; # non-zero if we should generate an index
265$listlevel = 0; # current list depth
266@listitem = (); # stack of HTML commands to use when a =item is
267 # encountered. the top of the stack is the
268 # current list.
269@listdata = (); # similar to @listitem, but for the text after
270 # an =item
271@listend = (); # similar to @listitem, but the text to use to
272 # end the list.
273$ignore = 1; # whether or not to format text. we don't
274 # format text until we hit our first pod
275 # directive.
276
277@items_seen = ();
278%items_named = ();
279$netscape = 0; # whether or not to use netscape directives.
34db337b 280$header = 0; # produce block header/footer
54310121
PP
281$title = ''; # title to give the pod(s)
282$top = 1; # true if we are at the top of the doc. used
283 # to prevent the first <HR> directive.
284$paragraph = ''; # which paragraph we're processing (used
285 # for error messages)
54310121 286%sections = (); # sections within this page
3e3baf6d
TB
287
288# These are not reinitialised here but are kept as a cache.
289# See get_cache and related cache management code.
290#%pages = (); # associative array used to find the location
291 # of pages referenced by L<> links.
292#%items = (); # associative array used to find the location
54310121 293 # of =item directives referenced by C<> links
39e571d4 294$Is83=$^O eq 'dos';
54310121
PP
295}
296
297sub pod2html {
298 local(@ARGV) = @_;
299 local($/);
300 local $_;
301
302 init_globals();
303
39e571d4
ML
304 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
305
54310121 306 # cache of %pages and %items from last time we ran pod2html
54310121
PP
307
308 #undef $opt_help if defined $opt_help;
309
310 # parse the command-line parameters
311 parse_command_line();
312
313 # set some variables to their default values if necessary
314 local *POD;
315 unless (@ARGV && $ARGV[0]) {
316 $podfile = "-" unless $podfile; # stdin
317 open(POD, "<$podfile")
318 || die "$0: cannot open $podfile file for input: $!\n";
319 } else {
320 $podfile = $ARGV[0]; # XXX: might be more filenames
321 *POD = *ARGV;
322 }
323 $htmlfile = "-" unless $htmlfile; # stdout
324 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9
BS
325 $htmldir =~ s#/$## ; # so we don't get a //
326 if ( $htmlroot eq ''
327 && defined( $htmldir )
328 && $htmldir ne ''
329 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
330 )
5a039dd3 331 {
29f227c9
BS
332 # Set the 'base' url for this file, so that we can use it
333 # as the location from which to calculate relative links
334 # to other files. If this is '', then absolute links will
335 # be used throughout.
336 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 337 }
54310121
PP
338
339 # read the pod a paragraph at a time
340 warn "Scanning for sections in input file(s)\n" if $verbose;
341 $/ = "";
342 my @poddata = <POD>;
343 close(POD);
344
345 # scan the pod for =head[1-6] directives and build an index
346 my $index = scan_headings(\%sections, @poddata);
347
3e3baf6d 348 unless($index) {
31e56455 349 warn "No headings in $podfile\n" if $verbose;
3e3baf6d
TB
350 }
351
54310121
PP
352 # open the output file
353 open(HTML, ">$htmlfile")
354 || die "$0: cannot open $htmlfile file for output: $!\n";
355
d011ffae
GS
356 # put a title in the HTML file if one wasn't specified
357 if ($title eq '') {
358 TITLE_SEARCH: {
359 for (my $i = 0; $i < @poddata; $i++) {
360 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
361 for my $para ( @poddata[$i, $i+1] ) {
362 last TITLE_SEARCH
363 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
364 }
365 }
54310121 366
d011ffae
GS
367 }
368 }
369 }
3e3baf6d
TB
370 if (!$title and $podfile =~ /\.pod$/) {
371 # probably a split pod so take first =head[12] as title
372 for (my $i = 0; $i < @poddata; $i++) {
373 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
374 }
375 warn "adopted '$title' as title for $podfile\n"
376 if $verbose and $title;
377 }
7b8d334a
GS
378 if ($title) {
379 $title =~ s/\s*\(.*\)//;
380 } else {
34db337b 381 warn "$0: no title for $podfile" unless $quiet;
54310121
PP
382 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
383 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 384 warn "using $title" if $verbose;
54310121 385 }
34db337b
JD
386 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
387 $csslink =~ s,\\,/,g;
388 $csslink =~ s,(/.):,$1|,;
389
390 my $block = $header ? <<END_OF_BLOCK : '';
391<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
392<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
393<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
394</TD></TR>
395</TABLE>
396END_OF_BLOCK
397
54310121 398 print HTML <<END_OF_HEAD;
7b8d334a
GS
399<HTML>
400<HEAD>
34db337b 401<TITLE>$title</TITLE>$csslink
7b8d334a
GS
402<LINK REV="made" HREF="mailto:$Config{perladmin}">
403</HEAD>
54310121 404
7b8d334a 405<BODY>
34db337b 406$block
54310121
PP
407END_OF_HEAD
408
3e3baf6d
TB
409 # load/reload/validate/cache %pages and %items
410 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121
PP
411
412 # scan the pod for =item directives
413 scan_items("", \%items, @poddata);
414
415 # put an index at the top of the file. note, if $doindex is 0 we
416 # still generate an index, but surround it with an html comment.
417 # that way some other program can extract it if desired.
418 $index =~ s/--+/-/g;
419 print HTML "<!-- INDEX BEGIN -->\n";
420 print HTML "<!--\n" unless $doindex;
421 print HTML $index;
422 print HTML "-->\n" unless $doindex;
423 print HTML "<!-- INDEX END -->\n\n";
31e56455 424 print HTML "<HR>\n" if $doindex and $index;
54310121
PP
425
426 # now convert this file
427 warn "Converting input file\n" if $verbose;
428 foreach my $i (0..$#poddata) {
429 $_ = $poddata[$i];
430 $paragraph = $i+1;
431 if (/^(=.*)/s) { # is it a pod directive?
432 $ignore = 0;
433 $_ = $1;
434 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
435 process_begin($1, $2);
436 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
437 process_end($1, $2);
438 } elsif (/^=cut/) { # =cut
439 process_cut();
440 } elsif (/^=pod/) { # =pod
441 process_pod();
442 } else {
443 next if @begin_stack && $begin_stack[-1] ne 'html';
444
7b8d334a 445 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
54310121 446 process_head($1, $2);
7b8d334a 447 } elsif (/^=item\s*(.*\S)/sm) { # =item text
54310121
PP
448 process_item($1);
449 } elsif (/^=over\s*(.*)/) { # =over N
450 process_over();
451 } elsif (/^=back/) { # =back
452 process_back();
453 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
454 process_for($1,$2);
455 } else {
456 /^=(\S*)\s*/;
457 warn "$0: $podfile: unknown pod directive '$1' in "
458 . "paragraph $paragraph. ignoring.\n";
459 }
460 }
461 $top = 0;
462 }
463 else {
464 next if $ignore;
465 next if @begin_stack && $begin_stack[-1] ne 'html';
466 my $text = $_;
467 process_text(\$text, 1);
34db337b 468 print HTML "<P>\n$text</P>\n";
54310121
PP
469 }
470 }
471
472 # finish off any pending directives
473 finish_list();
474 print HTML <<END_OF_TAIL;
34db337b 475$block
7b8d334a 476</BODY>
54310121 477
7b8d334a 478</HTML>
54310121
PP
479END_OF_TAIL
480
481 # close the html file
482 close(HTML);
483
484 warn "Finished\n" if $verbose;
485}
486
487##############################################################################
488
489my $usage; # see below
490sub usage {
491 my $podfile = shift;
492 warn "$0: $podfile: @_\n" if @_;
493 die $usage;
494}
495
496$usage =<<END_OF_USAGE;
497Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
498 --podpath=<name>:...:<name> --podroot=<name>
499 --libpods=<name>:...:<name> --recurse --verbose --index
500 --netscape --norecurse --noindex
501
502 --flush - flushes the item and directory caches.
503 --help - prints this message.
504 --htmlroot - http-server base directory from which all relative paths
505 in podpath stem (default is /).
506 --index - generate an index at the top of the resulting html
507 (default).
508 --infile - filename for the pod to convert (input taken from stdin
509 by default).
510 --libpods - colon-separated list of pages to search for =item pod
511 directives in as targets of C<> and implicit links (empty
512 by default). note, these are not filenames, but rather
513 page names like those that appear in L<> links.
514 --netscape - will use netscape html directives when applicable.
515 --nonetscape - will not use netscape directives (default).
516 --outfile - filename for the resulting html file (output sent to
517 stdout by default).
518 --podpath - colon-separated list of directories containing library
519 pods. empty by default.
520 --podroot - filesystem base directory from which all relative paths
521 in podpath stem (default is .).
522 --noindex - don't generate an index at the top of the resulting html.
523 --norecurse - don't recurse on those subdirectories listed in podpath.
524 --recurse - recurse on those subdirectories listed in podpath
525 (default behavior).
526 --title - title that will appear in resulting html file.
34db337b
JD
527 --header - produce block header/footer
528 --css - stylesheet URL
54310121 529 --verbose - self-explanatory
34db337b 530 --quiet - supress some benign warning messages
54310121
PP
531
532END_OF_USAGE
533
534sub parse_command_line {
34db337b
JD
535 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
536 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 537 my $result = GetOptions(
29f227c9
BS
538 'flush' => \$opt_flush,
539 'help' => \$opt_help,
540 'htmldir=s' => \$opt_htmldir,
54310121 541 'htmlroot=s' => \$opt_htmlroot,
29f227c9 542 'index!' => \$opt_index,
54310121
PP
543 'infile=s' => \$opt_infile,
544 'libpods=s' => \$opt_libpods,
545 'netscape!' => \$opt_netscape,
546 'outfile=s' => \$opt_outfile,
547 'podpath=s' => \$opt_podpath,
548 'podroot=s' => \$opt_podroot,
549 'norecurse' => \$opt_norecurse,
550 'recurse!' => \$opt_recurse,
551 'title=s' => \$opt_title,
34db337b
JD
552 'header' => \$opt_header,
553 'css=s' => \$opt_css,
54310121 554 'verbose' => \$opt_verbose,
34db337b 555 'quiet' => \$opt_quiet,
54310121
PP
556 );
557 usage("-", "invalid parameters") if not $result;
558
559 usage("-") if defined $opt_help; # see if the user asked for help
560 $opt_help = ""; # just to make -w shut-up.
561
562 $podfile = $opt_infile if defined $opt_infile;
563 $htmlfile = $opt_outfile if defined $opt_outfile;
5a039dd3 564 $htmldir = $opt_htmldir if defined $opt_outfile;
54310121
PP
565
566 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
567 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
568
569 warn "Flushing item and directory caches\n"
570 if $opt_verbose && defined $opt_flush;
571 unlink($dircache, $itemcache) if defined $opt_flush;
572
573 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
574 $podroot = $opt_podroot if defined $opt_podroot;
575
576 $doindex = $opt_index if defined $opt_index;
577 $recurse = $opt_recurse if defined $opt_recurse;
578 $title = $opt_title if defined $opt_title;
34db337b
JD
579 $header = defined $opt_header ? 1 : 0;
580 $css = $opt_css if defined $opt_css;
54310121 581 $verbose = defined $opt_verbose ? 1 : 0;
34db337b 582 $quiet = defined $opt_quiet ? 1 : 0;
54310121
PP
583 $netscape = $opt_netscape if defined $opt_netscape;
584}
585
3e3baf6d
TB
586
587my $saved_cache_key;
588
589sub get_cache {
590 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
591 my @cache_key_args = @_;
592
593 # A first-level cache:
594 # Don't bother reading the cache files if they still apply
595 # and haven't changed since we last read them.
596
597 my $this_cache_key = cache_key(@cache_key_args);
598
599 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
600
601 # load the cache of %pages and %items if possible. $tests will be
602 # non-zero if successful.
603 my $tests = 0;
604 if (-f $dircache && -f $itemcache) {
605 warn "scanning for item cache\n" if $verbose;
606 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
607 }
608
609 # if we didn't succeed in loading the cache then we must (re)build
610 # %pages and %items.
611 if (!$tests) {
612 warn "scanning directories in pod-path\n" if $verbose;
613 scan_podpath($podroot, $recurse, 0);
614 }
615 $saved_cache_key = cache_key(@cache_key_args);
616}
617
618sub cache_key {
619 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
620 return join('!', $dircache, $itemcache, $recurse,
29f227c9 621 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d
TB
622}
623
54310121 624#
3e3baf6d 625# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121
PP
626# are valid caches of %pages and %items. if they are valid then it loads
627# them and returns a non-zero value.
628#
3e3baf6d
TB
629
630sub load_cache {
54310121
PP
631 my($dircache, $itemcache, $podpath, $podroot) = @_;
632 my($tests);
633 local $_;
634
635 $tests = 0;
636
637 open(CACHE, "<$itemcache") ||
638 die "$0: error opening $itemcache for reading: $!\n";
639 $/ = "\n";
640
641 # is it the same podpath?
642 $_ = <CACHE>;
643 chomp($_);
3e3baf6d 644 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
645
646 # is it the same podroot?
647 $_ = <CACHE>;
648 chomp($_);
649 $tests++ if ($podroot eq $_);
650
651 # load the cache if its good
652 if ($tests != 2) {
653 close(CACHE);
54310121
PP
654 return 0;
655 }
656
657 warn "loading item cache\n" if $verbose;
658 while (<CACHE>) {
659 /(.*?) (.*)$/;
660 $items{$1} = $2;
661 }
662 close(CACHE);
663
664 warn "scanning for directory cache\n" if $verbose;
665 open(CACHE, "<$dircache") ||
666 die "$0: error opening $dircache for reading: $!\n";
667 $/ = "\n";
668 $tests = 0;
669
670 # is it the same podpath?
671 $_ = <CACHE>;
672 chomp($_);
3e3baf6d 673 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
674
675 # is it the same podroot?
676 $_ = <CACHE>;
677 chomp($_);
678 $tests++ if ($podroot eq $_);
679
680 # load the cache if its good
681 if ($tests != 2) {
682 close(CACHE);
54310121
PP
683 return 0;
684 }
685
686 warn "loading directory cache\n" if $verbose;
687 while (<CACHE>) {
688 /(.*?) (.*)$/;
689 $pages{$1} = $2;
690 }
691
692 close(CACHE);
693
694 return 1;
695}
696
697#
698# scan_podpath - scans the directories specified in @podpath for directories,
699# .pod files, and .pm files. it also scans the pod files specified in
700# @libpods for =item directives.
701#
702sub scan_podpath {
3e3baf6d 703 my($podroot, $recurse, $append) = @_;
54310121
PP
704 my($pwd, $dir);
705 my($libpod, $dirname, $pod, @files, @poddata);
706
3e3baf6d
TB
707 unless($append) {
708 %items = ();
709 %pages = ();
710 }
711
54310121
PP
712 # scan each directory listed in @podpath
713 $pwd = getcwd();
714 chdir($podroot)
715 || die "$0: error changing to directory $podroot: $!\n";
716 foreach $dir (@podpath) {
717 scan_dir($dir, $recurse);
718 }
719
720 # scan the pods listed in @libpods for =item directives
721 foreach $libpod (@libpods) {
722 # if the page isn't defined then we won't know where to find it
723 # on the system.
724 next unless defined $pages{$libpod} && $pages{$libpod};
725
726 # if there is a directory then use the .pod and .pm files within it.
29f227c9
BS
727 # NOTE: Only finds the first so-named directory in the tree.
728# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
729 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121
PP
730 # find all the .pod and .pm files within the directory
731 $dirname = $1;
732 opendir(DIR, $dirname) ||
733 die "$0: error opening directory $dirname: $!\n";
734 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
735 closedir(DIR);
736
737 # scan each .pod and .pm file for =item directives
738 foreach $pod (@files) {
739 open(POD, "<$dirname/$pod") ||
740 die "$0: error opening $dirname/$pod for input: $!\n";
741 @poddata = <POD>;
742 close(POD);
743
744 scan_items("$dirname/$pod", @poddata);
745 }
746
747 # use the names of files as =item directives too.
748 foreach $pod (@files) {
749 $pod =~ /^(.*)(\.pod|\.pm)$/;
750 $items{$1} = "$dirname/$1.html" if $1;
751 }
752 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
753 $pages{$libpod} =~ /([^:]*\.pm):/) {
754 # scan the .pod or .pm file for =item directives
755 $pod = $1;
756 open(POD, "<$pod") ||
757 die "$0: error opening $pod for input: $!\n";
758 @poddata = <POD>;
759 close(POD);
760
761 scan_items("$pod", @poddata);
762 } else {
763 warn "$0: shouldn't be here (line ".__LINE__."\n";
764 }
765 }
766 @poddata = (); # clean-up a bit
767
768 chdir($pwd)
769 || die "$0: error changing to directory $pwd: $!\n";
770
771 # cache the item list for later use
772 warn "caching items for later use\n" if $verbose;
773 open(CACHE, ">$itemcache") ||
774 die "$0: error open $itemcache for writing: $!\n";
775
776 print CACHE join(":", @podpath) . "\n$podroot\n";
777 foreach my $key (keys %items) {
778 print CACHE "$key $items{$key}\n";
779 }
780
781 close(CACHE);
782
783 # cache the directory list for later use
784 warn "caching directories for later use\n" if $verbose;
785 open(CACHE, ">$dircache") ||
786 die "$0: error open $dircache for writing: $!\n";
787
788 print CACHE join(":", @podpath) . "\n$podroot\n";
789 foreach my $key (keys %pages) {
790 print CACHE "$key $pages{$key}\n";
791 }
792
793 close(CACHE);
794}
795
796#
797# scan_dir - scans the directory specified in $dir for subdirectories, .pod
798# files, and .pm files. notes those that it finds. this information will
799# be used later in order to figure out where the pages specified in L<>
800# links are on the filesystem.
801#
802sub scan_dir {
803 my($dir, $recurse) = @_;
804 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
805 local $_;
806
807 @subdirs = ();
808 @pods = ();
809
810 opendir(DIR, $dir) ||
811 die "$0: error opening directory $dir: $!\n";
812 while (defined($_ = readdir(DIR))) {
813 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
814 $pages{$_} = "" unless defined $pages{$_};
815 $pages{$_} .= "$dir/$_:";
816 push(@subdirs, $_);
817 } elsif (/\.pod$/) { # .pod
818 s/\.pod$//;
819 $pages{$_} = "" unless defined $pages{$_};
820 $pages{$_} .= "$dir/$_.pod:";
821 push(@pods, "$dir/$_.pod");
822 } elsif (/\.pm$/) { # .pm
823 s/\.pm$//;
824 $pages{$_} = "" unless defined $pages{$_};
825 $pages{$_} .= "$dir/$_.pm:";
826 push(@pods, "$dir/$_.pm");
827 }
828 }
829 closedir(DIR);
830
831 # recurse on the subdirectories if necessary
832 if ($recurse) {
833 foreach my $subdir (@subdirs) {
834 scan_dir("$dir/$subdir", $recurse);
835 }
836 }
837}
838
839#
840# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
841# build an index.
842#
843sub scan_headings {
844 my($sections, @data) = @_;
845 my($tag, $which_head, $title, $listdepth, $index);
846
be173d55
UC
847 # here we need local $ignore = 0;
848 # unfortunately, we can't have it, because $ignore is lexical
849 $ignore = 0;
850
54310121
PP
851 $listdepth = 0;
852 $index = "";
853
854 # scan for =head directives, note their name, and build an index
855 # pointing to each of them.
856 foreach my $line (@data) {
bb9460ed 857 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
54310121
PP
858 ($tag,$which_head, $title) = ($1,$2,$3);
859 chomp($title);
860 $$sections{htmlify(0,$title)} = 1;
861
102c538a
DS
862 while ($which_head != $listdepth) {
863 if ($which_head > $listdepth) {
864 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
865 $listdepth++;
866 } elsif ($which_head < $listdepth) {
867 $listdepth--;
868 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
869 }
54310121 870 }
54310121
PP
871
872 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
be173d55 873 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
34db337b 874 html_escape(process_text(\$title, 0)) . "</A></LI>";
54310121
PP
875 }
876 }
877
878 # finish off the lists
879 while ($listdepth--) {
880 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
881 }
882
883 # get rid of bogus lists
884 $index =~ s,\t*<UL>\s*</UL>\n,,g;
885
bb9460ed 886 $ignore = 1; # restore old value;
be173d55 887
54310121
PP
888 return $index;
889}
890
891#
892# scan_items - scans the pod specified by $pod for =item directives. we
893# will use this information later on in resolving C<> links.
894#
895sub scan_items {
896 my($pod, @poddata) = @_;
897 my($i, $item);
898 local $_;
899
900 $pod =~ s/\.pod$//;
901 $pod .= ".html" if $pod;
902
903 foreach $i (0..$#poddata) {
904 $_ = $poddata[$i];
905
906 # remove any formatting instructions
907 s,[A-Z]<([^<>]*)>,$1,g;
908
909 # figure out what kind of item it is and get the first word of
910 # it's name.
911 if (/^=item\s+(\w*)\s*.*$/s) {
912 if ($1 eq "*") { # bullet list
913 /\A=item\s+\*\s*(.*?)\s*\Z/s;
914 $item = $1;
7b8d334a
GS
915 } elsif ($1 =~ /^\d+/) { # numbered list
916 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
54310121
PP
917 $item = $1;
918 } else {
919# /\A=item\s+(.*?)\s*\Z/s;
920 /\A=item\s+(\w*)/s;
921 $item = $1;
922 }
923
924 $items{$item} = "$pod" if $item;
925 }
926 }
927}
928
929#
930# process_head - convert a pod head[1-6] tag and convert it to HTML format.
931#
932sub process_head {
933 my($tag, $heading) = @_;
934 my $firstword;
935
936 # figure out the level of the =head
937 $tag =~ /head([1-6])/;
938 my $level = $1;
939
940 # can't have a heading full of spaces and speechmarks and so on
941 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
942
943 print HTML "<P>\n" unless $listlevel;
944 print HTML "<HR>\n" unless $listlevel || $top;
945 print HTML "<H$level>"; # unless $listlevel;
946 #print HTML "<H$level>" unless $listlevel;
be173d55 947 my $convert = $heading; process_text(\$convert, 0);
7b8d334a 948 $convert = html_escape($convert);
54310121
PP
949 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
950 print HTML "</H$level>"; # unless $listlevel;
951 print HTML "\n";
952}
953
954#
955# process_item - convert a pod item tag and convert it to HTML format.
956#
957sub process_item {
958 my $text = $_[0];
959 my($i, $quote, $name);
960
961 my $need_preamble = 0;
962 my $this_entry;
963
964
965 # lots of documents start a list without doing an =over. this is
966 # bad! but, the proper thing to do seems to be to just assume
967 # they did do an =over. so warn them once and then continue.
968 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
969 unless $listlevel;
970 process_over() unless $listlevel;
971
972 return unless $listlevel;
973
974 # remove formatting instructions from the text
975 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
976 pre_escape(\$text);
977
978 $need_preamble = $items_seen[$listlevel]++ == 0;
979
980 # check if this is the first =item after an =over
981 $i = $listlevel - 1;
982 my $need_new = $listlevel >= @listitem;
983
984 if ($text =~ /\A\*/) { # bullet
985
986 if ($need_preamble) {
987 push(@listend, "</UL>");
988 print HTML "<UL>\n";
989 }
990
7b8d334a
GS
991 print HTML '<LI>';
992 if ($text =~ /\A\*\s*(.+)\Z/s) {
993 print HTML '<STRONG>';
994 if ($items_named{$1}++) {
995 print HTML html_escape($1);
996 } else {
997 my $name = 'item_' . htmlify(1,$1);
998 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
999 }
1000 print HTML '</STRONG>';
1001 }
54310121 1002
7b8d334a 1003 } elsif ($text =~ /\A[\d#]+/) { # numbered list
54310121
PP
1004
1005 if ($need_preamble) {
1006 push(@listend, "</OL>");
1007 print HTML "<OL>\n";
1008 }
1009
7b8d334a
GS
1010 print HTML '<LI>';
1011 if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
1012 print HTML '<STRONG>';
1013 if ($items_named{$1}++) {
1014 print HTML html_escape($1);
1015 } else {
1016 my $name = 'item_' . htmlify(0,$1);
1017 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
1018 }
1019 print HTML '</STRONG>';
1020 }
54310121
PP
1021
1022 } else { # all others
1023
1024 if ($need_preamble) {
1025 push(@listend, '</DL>');
1026 print HTML "<DL>\n";
1027 }
1028
7b8d334a
GS
1029 print HTML '<DT>';
1030 if ($text =~ /(\S+)/) {
1031 print HTML '<STRONG>';
1032 if ($items_named{$1}++) {
1033 print HTML html_escape($text);
1034 } else {
1035 my $name = 'item_' . htmlify(1,$text);
1036 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
1037 }
1038 print HTML '</STRONG>';
1039 }
54310121
PP
1040 print HTML '<DD>';
1041 }
1042
1043 print HTML "\n";
1044}
1045
1046#
1047# process_over - process a pod over tag and start a corresponding HTML
1048# list.
1049#
1050sub process_over {
1051 # start a new list
1052 $listlevel++;
1053}
1054
1055#
1056# process_back - process a pod back tag and convert it to HTML format.
1057#
1058sub process_back {
2ceaccd7 1059 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
54310121
PP
1060 unless $listlevel;
1061 return unless $listlevel;
1062
1063 # close off the list. note, I check to see if $listend[$listlevel] is
1064 # defined because an =item directive may have never appeared and thus
1065 # $listend[$listlevel] may have never been initialized.
1066 $listlevel--;
1067 print HTML $listend[$listlevel] if defined $listend[$listlevel];
1068 print HTML "\n";
1069
1070 # don't need the corresponding perl code anymore
1071 pop(@listitem);
1072 pop(@listdata);
1073 pop(@listend);
1074
1075 pop(@items_seen);
1076}
1077
1078#
1079# process_cut - process a pod cut tag, thus stop ignoring pod directives.
1080#
1081sub process_cut {
1082 $ignore = 1;
1083}
1084
1085#
1086# process_pod - process a pod pod tag, thus ignore pod directives until we see a
1087# corresponding cut.
1088#
1089sub process_pod {
1090 # no need to set $ignore to 0 cause the main loop did it
1091}
1092
1093#
1094# process_for - process a =for pod tag. if it's for html, split
c4d9b39d 1095# it out verbatim, if illustration, center it, otherwise ignore it.
54310121
PP
1096#
1097sub process_for {
1098 my($whom, $text) = @_;
1099 if ( $whom =~ /^(pod2)?html$/i) {
1100 print HTML $text;
c4d9b39d
IZ
1101 } elsif ($whom =~ /^illustration$/i) {
1102 1 while chomp $text;
1103 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1104 $text .= $ext, last if -r "$text$ext";
1105 }
1106 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1107 }
54310121
PP
1108}
1109
1110#
1111# process_begin - process a =begin pod tag. this pushes
1112# whom we're beginning on the begin stack. if there's a
1113# begin stack, we only print if it us.
1114#
1115sub process_begin {
1116 my($whom, $text) = @_;
1117 $whom = lc($whom);
1118 push (@begin_stack, $whom);
1119 if ( $whom =~ /^(pod2)?html$/) {
1120 print HTML $text if $text;
1121 }
1122}
1123
1124#
1125# process_end - process a =end pod tag. pop the
1126# begin stack. die if we're mismatched.
1127#
1128sub process_end {
1129 my($whom, $text) = @_;
1130 $whom = lc($whom);
1131 if ($begin_stack[-1] ne $whom ) {
1132 die "Unmatched begin/end at chunk $paragraph\n"
1133 }
1134 pop @begin_stack;
1135}
1136
1137#
1138# process_text - handles plaintext that appears in the input pod file.
1139# there may be pod commands embedded within the text so those must be
1140# converted to html commands.
1141#
1142sub process_text {
1143 my($text, $escapeQuotes) = @_;
1144 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1145 my($podcommand, $params, $tag, $quote);
1146
1147 return if $ignore;
1148
1149 $quote = 0; # status of double-quote conversion
1150 $result = "";
1151 $rest = $$text;
1152
1153 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
be173d55
UC
1154 $rest =~ s/\n+\Z//;
1155 $rest =~ s#.*#
1156 my $line = $&;
1157 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1158 $line;
1159 #eg;
54310121
PP
1160
1161 $rest =~ s/&/&amp;/g;
1162 $rest =~ s/</&lt;/g;
1163 $rest =~ s/>/&gt;/g;
1164 $rest =~ s/"/&quot;/g;
1165
1166 # try and create links for all occurrences of perl.* within
1167 # the preformatted text.
1168 $rest =~ s{
1169 (\s*)(perl\w+)
1170 }{
1171 if (defined $pages{$2}) { # is a link
1172 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
39e571d4
ML
1173 } elsif (defined $pages{dosify($2)}) { # is a link
1174 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
54310121
PP
1175 } else {
1176 "$1$2";
1177 }
1178 }xeg;
5a039dd3
BS
1179# $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1180 $rest =~ s{
29f227c9
BS
1181 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1182 }{
1183 my $url ;
1184 if ( $htmlfileurl ne '' ) {
1185 # Here, we take advantage of the knowledge
1186 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1187 # Since $htmlroot eq '', we need to prepend $htmldir
1188 # on the fron of the link to get the absolute path
1189 # of the link's target. We check for a leading '/'
1190 # to avoid corrupting links that are #, file:, etc.
1191 my $old_url = $3 ;
1192 $old_url = "$htmldir$old_url"
1193 if ( $old_url =~ m{^\/} ) ;
1194 $url = relativize_url( "$old_url.html", $htmlfileurl );
1195# print( " a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
1196 }
1197 else {
1198 $url = "$3.html" ;
1199 }
5a039dd3
BS
1200 "$1$url" ;
1201 }xeg;
54310121 1202
5a039dd3
BS
1203 # Look for embedded URLs and make them in to links. We don't
1204 # relativize them since they are best left as the author intended.
54310121
PP
1205 my $urls = '(' . join ('|', qw{
1206 http
1207 telnet
1208 mailto
1209 news
1210 gopher
1211 file
1212 wais
1213 ftp
1214 } )
1215 . ')';
1216
1217 my $ltrs = '\w';
1218 my $gunk = '/#~:.?+=&%@!\-';
1219 my $punc = '.:?\-';
1220 my $any = "${ltrs}${gunk}${punc}";
1221
1222 $rest =~ s{
1223 \b # start at word boundary
1224 ( # begin $1 {
29f227c9
BS
1225 $urls : # need resource and a colon
1226 (?!:) # Ignore File::, among others.
54310121
PP
1227 [$any] +? # followed by on or more
1228 # of any valid character, but
1229 # be conservative and take only
1230 # what you need to....
1231 ) # end $1 }
1232 (?= # look-ahead non-consumptive assertion
1233 [$punc]* # either 0 or more puntuation
1234 [^$any] # followed by a non-url char
1235 | # or else
1236 $ # then end of the string
1237 )
1238 }{<A HREF="$1">$1</A>}igox;
1239
1240 $result = "<PRE>" # text should be as it is (verbatim)
1241 . "$rest\n"
1242 . "</PRE>\n";
1243 } else { # formatted text
1244 # parse through the string, stopping each time we find a
1245 # pod-escape. once the string has been throughly processed
1246 # we can output it.
ec978fad 1247 while (length $rest) {
54310121
PP
1248 # check to see if there are any possible pod directives in
1249 # the remaining part of the text.
1250 if ($rest =~ m/[BCEIFLSZ]</) {
1251 warn "\$rest\t= $rest\n" unless
1252 $rest =~ /\A
1253 ([^<]*?)
1254 ([BCEIFLSZ]?)
1255 <
1256 (.*)\Z/xs;
1257
1258 $s1 = $1; # pure text
1259 $s2 = $2; # the type of pod-escape that follows
1260 $s3 = '<'; # '<'
1261 $s4 = $3; # the rest of the string
1262 } else {
1263 $s1 = $rest;
1264 $s2 = "";
1265 $s3 = "";
1266 $s4 = "";
1267 }
1268
1269 if ($s3 eq '<' && $s2) { # a pod-escape
1270 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1271 $podcommand = "$s2<";
1272 $rest = $s4;
1273
1274 # find the matching '>'
1275 $match = 1;
1276 $bf = 0;
1277 while ($match && !$bf) {
1278 $bf = 1;
1279 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1280 $bf = 0;
1281 $match++;
1282 $podcommand .= $1;
1283 $rest = $2;
1284 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1285 $bf = 0;
1286 $match--;
1287 $podcommand .= $1;
1288 $rest = $2;
1289 }
1290 }
1291
1292 if ($match != 0) {
1293 warn <<WARN;
1294$0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1295WARN
1296 $result .= substr $podcommand, 0, 2;
1297 $rest = substr($podcommand, 2) . $rest;
1298 next;
1299 }
1300
1301 # pull out the parameters to the pod-escape
1302 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1303 $tag = $1;
1304 $params = $2;
1305
1306 # process the text within the pod-escape so that any escapes
1307 # which must occur do.
1308 process_text(\$params, 0) unless $tag eq 'L';
1309
1310 $s1 = $params;
1311 if (!$tag || $tag eq " ") { # <> : no tag
1312 $s1 = "&lt;$params&gt;";
1313 } elsif ($tag eq "L") { # L<> : link
1314 $s1 = process_L($params);
1315 } elsif ($tag eq "I" || # I<> : italicize text
1316 $tag eq "B" || # B<> : bold text
1317 $tag eq "F") { # F<> : file specification
1318 $s1 = process_BFI($tag, $params);
1319 } elsif ($tag eq "C") { # C<> : literal code
1320 $s1 = process_C($params, 1);
1321 } elsif ($tag eq "E") { # E<> : escape
1322 $s1 = process_E($params);
1323 } elsif ($tag eq "Z") { # Z<> : zero-width character
1324 $s1 = process_Z($params);
1325 } elsif ($tag eq "S") { # S<> : non-breaking space
1326 $s1 = process_S($params);
1327 } elsif ($tag eq "X") { # S<> : non-breaking space
1328 $s1 = process_X($params);
1329 } else {
1330 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1331 }
1332
1333 $result .= "$s1";
1334 } else {
1335 # for pure text we must deal with implicit links and
1336 # double-quotes among other things.
1337 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1338 $rest = $s4;
1339 }
1340 }
1341 }
1342 $$text = $result;
1343}
1344
1345sub html_escape {
1346 my $rest = $_[0];
a3c03ba2 1347 $rest =~ s/&(?!\w+;|#)/&amp;/g; # XXX not bulletproof
54310121
PP
1348 $rest =~ s/</&lt;/g;
1349 $rest =~ s/>/&gt;/g;
1350 $rest =~ s/"/&quot;/g;
1351 return $rest;
1352}
1353
1354#
1355# process_puretext - process pure text (without pod-escapes) converting
1356# double-quotes and handling implicit C<> links.
1357#
1358sub process_puretext {
1359 my($text, $quote) = @_;
1360 my(@words, $result, $rest, $lead, $trail);
1361
1362 # convert double-quotes to single-quotes
1363 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1364 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1365
1366 $$quote = ($text =~ m/"/ ? 1 : 0);
1367 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1368
1369 # keep track of leading and trailing white-space
1370 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1371 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1372
1373 # collapse all white space into a single space
1374 $text =~ s/\s+/ /g;
1375 @words = split(" ", $text);
1376
1377 # process each word individually
1378 foreach my $word (@words) {
1379 # see if we can infer a link
1380 if ($word =~ /^\w+\(/) {
1381 # has parenthesis so should have been a C<> ref
1382 $word = process_C($word);
1383# $word =~ /^[^()]*]\(/;
1384# if (defined $items{$1} && $items{$1}) {
1385# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1386# . htmlify(0,$word)
1387# . "\">$word</A></CODE>";
1388# } elsif (defined $items{$word} && $items{$word}) {
1389# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1390# . htmlify(0,$word)
1391# . "\">$word</A></CODE>";
1392# } else {
1393# $word = "\n<CODE><A HREF=\"#item_"
1394# . htmlify(0,$word)
1395# . "\">$word</A></CODE>";
1396# }
1397 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1398 # perl variables, should be a C<> ref
1399 $word = process_C($word, 1);
1400 } elsif ($word =~ m,^\w+://\w,) {
1401 # looks like a URL
5a039dd3 1402 # Don't relativize it: leave it as the author intended
54310121 1403 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1404 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1405 # looks like an e-mail address
7b8d334a
GS
1406 my ($w1, $w2, $w3) = ("", $word, "");
1407 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1408 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1409 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1410 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1411 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1412 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1413 } else {
7b8d334a 1414 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1415 }
1416 }
1417
1418 # build a new string based upon our conversion
1419 $result = "";
1420 $rest = join(" ", @words);
1421 while (length($rest) > 75) {
1422 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1423 $rest =~ m/^(\S*)\s(.*?)$/o) {
1424
1425 $result .= "$1\n";
1426 $rest = $2;
1427 } else {
1428 $result .= "$rest\n";
1429 $rest = "";
1430 }
1431 }
1432 $result .= $rest if $rest;
1433
1434 # restore the leading and trailing white-space
1435 $result = "$lead$result$trail";
1436
1437 return $result;
1438}
1439
1440#
1441# pre_escape - convert & in text to $amp;
1442#
1443sub pre_escape {
1444 my($str) = @_;
db085819 1445 $$str =~ s/&(?!\w+;|#)/&amp;/g; # XXX not bulletproof
54310121
PP
1446}
1447
1448#
39e571d4
ML
1449# dosify - convert filenames to 8.3
1450#
1451sub dosify {
1452 my($str) = @_;
fe4c6be1 1453 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4
ML
1454 if ($Is83) {
1455 $str = lc $str;
1456 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1457 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1458 }
1459 return $str;
1460}
1461
1462#
54310121
PP
1463# process_L - convert a pod L<> directive to a corresponding HTML link.
1464# most of the links made are inferred rather than known about directly
1465# (i.e it's not known whether the =head\d section exists in the target file,
1466# or whether a .pod file exists in the case of split files). however, the
1467# guessing usually works.
1468#
1469# Unlike the other directives, this should be called with an unprocessed
1470# string, else tags in the link won't be matched.
1471#
1472sub process_L {
1473 my($str) = @_;
39e571d4 1474 my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
54310121
PP
1475
1476 $str =~ s/\n/ /g; # undo word-wrapped tags
1477 $s1 = $str;
1478 for ($s1) {
b74bceb9
AB
1479 # LREF: a la HREF L<show this text|man/section>
1480 $linktext = $1 if s:^([^|]+)\|::;
1481
54310121
PP
1482 # make sure sections start with a /
1483 s,^",/",g;
1484 s,^,/,g if (!m,/, && / /);
1485
1486 # check if there's a section specified
1487 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1488 ($page, $section) = ($1, $2);
1489 } else { # no
1490 ($page, $section) = ($str, "");
1491 }
1492
1493 # check if we know that this is a section in this page
1494 if (!defined $pages{$page} && defined $sections{$page}) {
1495 $section = $page;
1496 $page = "";
1497 }
29f227c9
BS
1498
1499 # remove trailing punctuation, like ()
1500 $section =~ s/\W*$// ;
54310121
PP
1501 }
1502
39e571d4
ML
1503 $page83=dosify($page);
1504 $page=$page83 if (defined $pages{$page83});
54310121
PP
1505 if ($page eq "") {
1506 $link = "#" . htmlify(0,$section);
b74bceb9 1507 $linktext = $section unless defined($linktext);
350ccacd 1508 } elsif ( $page =~ /::/ ) {
8f4c9bdd
GS
1509 $linktext = ($section ? "$section" : "$page")
1510 unless defined($linktext);
350ccacd 1511 $page =~ s,::,/,g;
29f227c9
BS
1512 # Search page cache for an entry keyed under the html page name,
1513 # then look to see what directory that page might be in. NOTE:
1514 # this will only find one page. A better solution might be to produce
1515 # an intermediate page that is an index to all such pages.
1516 my $page_name = $page ;
1517 $page_name =~ s,^.*/,, ;
1518 if ( defined( $pages{ $page_name } ) &&
1519 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1520 ) {
1521 $page = $1 ;
1522 }
1523 else {
1524 # NOTE: This branch assumes that all A::B pages are located in
1525 # $htmlroot/A/B.html . This is often incorrect, since they are
1526 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1527 # analyze the contents of %pages and figure out where any
1528 # cousins of A::B are, then assume that. So, if A::B isn't found,
1529 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1530 # lib/A/B.pm. This is also limited, but it's an improvement.
1531 # Maybe a hints file so that the links point to the correct places
1532 # non-theless?
1533 # Also, maybe put a warn "$0: cannot resolve..." here.
1534 }
350ccacd
GB
1535 $link = "$htmlroot/$page.html";
1536 $link .= "#" . htmlify(0,$section) if ($section);
54310121 1537 } elsif (!defined $pages{$page}) {
34db337b 1538 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n" unless $quiet;
54310121 1539 $link = "";
b74bceb9 1540 $linktext = $page unless defined($linktext);
54310121 1541 } else {
b74bceb9 1542 $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
54310121
PP
1543 $section = htmlify(0,$section) if $section ne "";
1544
1545 # if there is a directory by the name of the page, then assume that an
1546 # appropriate section will exist in the subdirectory
29f227c9
BS
1547# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1548 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121
PP
1549 $link = "$htmlroot/$1/$section.html";
1550
1551 # since there is no directory by the name of the page, the section will
1552 # have to exist within a .html of the same name. thus, make sure there
1553 # is a .pod or .pm that might become that .html
1554 } else {
1555 $section = "#$section";
1556 # check if there is a .pod with the page name
1557 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1558 $link = "$htmlroot/$1.html$section";
1559 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1560 $link = "$htmlroot/$1.html$section";
1561 } else {
1562 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1563 "no .pod or .pm found\n";
1564 $link = "";
b74bceb9 1565 $linktext = $section unless defined($linktext);
54310121
PP
1566 }
1567 }
1568 }
1569
1570 process_text(\$linktext, 0);
1571 if ($link) {
29f227c9
BS
1572 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1573 # implies $htmlroot eq ''. This means that the link in question
1574 # needs a prefix of $htmldir if it begins with '/'. The test for
1575 # the initial '/' is done to avoid '#'-only links, and to allow
1576 # for other kinds of links, like file:, ftp:, etc.
1577 my $url ;
1578 if ( $htmlfileurl ne '' ) {
1579 $link = "$htmldir$link"
1580 if ( $link =~ m{^/} ) ;
1581
1582 $url = relativize_url( $link, $htmlfileurl ) ;
1583# print( " b: [$link,$htmlfileurl,$url]\n" ) ;
1584 }
1585 else {
1586 $url = $link ;
1587 }
1588
5a039dd3 1589 $s1 = "<A HREF=\"$url\">$linktext</A>";
54310121
PP
1590 } else {
1591 $s1 = "<EM>$linktext</EM>";
1592 }
1593 return $s1;
1594}
1595
1596#
29f227c9
BS
1597# relativize_url - convert an absolute URL to one relative to a base URL.
1598# Assumes both end in a filename.
1599#
1600sub relativize_url {
1601 my ($dest,$source) = @_ ;
1602
1603 my ($dest_volume,$dest_directory,$dest_file) =
1604 File::Spec::Unix->splitpath( $dest ) ;
1605 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1606
1607 my ($source_volume,$source_directory,$source_file) =
1608 File::Spec::Unix->splitpath( $source ) ;
1609 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1610
1611 my $rel_path = '' ;
1612 if ( $dest ne '' ) {
1613 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1614 }
1615
1616 if ( $rel_path ne '' &&
1617 substr( $rel_path, -1 ) ne '/' &&
1618 substr( $dest_file, 0, 1 ) ne '#'
1619 ) {
1620 $rel_path .= "/$dest_file" ;
1621 }
1622 else {
1623 $rel_path .= "$dest_file" ;
1624 }
1625
1626 return $rel_path ;
1627}
1628
1629#
54310121
PP
1630# process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1631# convert them to corresponding HTML directives.
1632#
1633sub process_BFI {
1634 my($tag, $str) = @_;
1635 my($s1); # work string
1636 my(%repltext) = ( 'B' => 'STRONG',
1637 'F' => 'EM',
1638 'I' => 'EM');
1639
1640 # extract the modified text and convert to HTML
1641 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1642 return $s1;
1643}
1644
1645#
1646# process_C - process the C<> pod-escape.
1647#
1648sub process_C {
1649 my($str, $doref) = @_;
1650 my($s1, $s2);
1651
1652 $s1 = $str;
1653 $s1 =~ s/\([^()]*\)//g; # delete parentheses
be173d55 1654 $s2 = $s1;
54310121 1655 $s1 =~ s/\W//g; # delete bogus characters
7b8d334a 1656 $str = html_escape($str);
54310121
PP
1657
1658 # if there was a pod file that we found earlier with an appropriate
1659 # =item directive, then create a link to that page.
1660 if ($doref && defined $items{$s1}) {
5a039dd3
BS
1661 if ( $items{$s1} ) {
1662 my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
29f227c9
BS
1663 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1664 # implies $htmlroot eq ''.
1665 my $url ;
1666 if ( $htmlfileurl ne '' ) {
1667 $link = "$htmldir$link" ;
1668 $url = relativize_url( $link, $htmlfileurl ) ;
1669 }
1670 else {
1671 $url = $link ;
1672 }
5a039dd3
BS
1673 $s1 = "<A HREF=\"$url\">$str</A>" ;
1674 }
1675 else {
1676 $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ;
1677 }
54310121
PP
1678 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1679 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1680 } else {
1681 $s1 = "<CODE>$str</CODE>";
1682 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1683 }
1684
1685
1686 return $s1;
1687}
1688
1689#
1690# process_E - process the E<> pod directive which seems to escape a character.
1691#
1692sub process_E {
1693 my($str) = @_;
1694
1695 for ($str) {
1696 s,([^/].*),\&$1\;,g;
1697 }
1698
1699 return $str;
1700}
1701
1702#
1703# process_Z - process the Z<> pod directive which really just amounts to
1704# ignoring it. this allows someone to start a paragraph with an =
1705#
1706sub process_Z {
1707 my($str) = @_;
1708
1709 # there is no equivalent in HTML for this so just ignore it.
1710 $str = "";
1711 return $str;
1712}
1713
1714#
1715# process_S - process the S<> pod directive which means to convert all
1716# spaces in the string to non-breaking spaces (in HTML-eze).
1717#
1718sub process_S {
1719 my($str) = @_;
1720
1721 # convert all spaces in the text to non-breaking spaces in HTML.
1722 $str =~ s/ /&nbsp;/g;
1723 return $str;
1724}
1725
1726#
1727# process_X - this is supposed to make an index entry. we'll just
1728# ignore it.
1729#
1730sub process_X {
1731 return '';
1732}
1733
1734
1735#
29f227c9
BS
1736# Adapted from Nick Ing-Simmons' PodToHtml package.
1737sub relative_url {
1738 my $source_file = shift ;
1739 my $destination_file = shift;
1740
1741 my $source = URI::file->new_abs($source_file);
1742 my $uo = URI::file->new($destination_file,$source)->abs;
1743 return $uo->rel->as_string;
1744}
1745
1746
1747#
54310121
PP
1748# finish_list - finish off any pending HTML lists. this should be called
1749# after the entire pod file has been read and converted.
1750#
1751sub finish_list {
7b8d334a 1752 while ($listlevel > 0) {
54310121
PP
1753 print HTML "</DL>\n";
1754 $listlevel--;
1755 }
1756}
1757
1758#
1759# htmlify - converts a pod section specification to a suitable section
1760# specification for HTML. if first arg is 1, only takes 1st word.
1761#
1762sub htmlify {
1763 my($compact, $heading) = @_;
1764
1765 if ($compact) {
1766 $heading =~ /^(\w+)/;
1767 $heading = $1;
1768 }
1769
1770 # $heading = lc($heading);
1771 $heading =~ s/[^\w\s]/_/g;
1772 $heading =~ s/(\s+)/ /g;
1773 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1774 $heading =~ s/ /_/g;
1775 $heading =~ s/\A(.{32}).*\Z/$1/s;
1776 $heading =~ s/\s+\Z//;
1777 $heading =~ s/_{2,}/_/g;
1778
1779 return $heading;
1780}
1781
1782BEGIN {
1783}
1784
17851;