This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Pod::Html unifying global variable declaration and initialization
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
0e4548d5 2use strict;
54310121 3require Exporter;
0e4548d5 4
776d31fe 5use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
99cb6bd8 6$VERSION = 1.0501;
0e4548d5 7@ISA = qw(Exporter);
54310121 8@EXPORT = qw(pod2html htmlify);
776d31fe 9@EXPORT_OK = qw(anchorify);
54310121
PP
10
11use Carp;
0e4548d5
GS
12use Config;
13use Cwd;
af75be11 14use File::Spec;
0e4548d5
GS
15use File::Spec::Unix;
16use Getopt::Long;
54310121 17
3ec07288
FK
18use locale; # make \w work right in non-ASCII lands
19
54310121
PP
20=head1 NAME
21
7b8d334a 22Pod::Html - module to convert pod files to HTML
54310121
PP
23
24=head1 SYNOPSIS
25
7b8d334a 26 use Pod::Html;
54310121
PP
27 pod2html([options]);
28
29=head1 DESCRIPTION
30
31Converts files from pod format (see L<perlpod>) to HTML format. It
32can automatically generate indexes and cross-references, and it keeps
33a cache of things it knows how to cross-reference.
34
35=head1 ARGUMENTS
36
37Pod::Html takes the following arguments:
38
39=over 4
40
0e4548d5
GS
41=item backlink
42
43 --backlink="Back to Top"
44
59ecbafa
VS
45Adds "Back to Top" links in front of every C<head1> heading (except for
46the first). By default, no backlinks are generated.
0e4548d5 47
b42210d7
RV
48=item cachedir
49
50 --cachedir=name
51
52Creates the item and directory caches in the given directory.
53
0e4548d5
GS
54=item css
55
56 --css=stylesheet
57
59ecbafa
VS
58Specify the URL of a cascading style sheet. Also disables all HTML/CSS
59C<style> attributes that are output by default (to avoid conflicts).
0e4548d5
GS
60
61=item flush
62
63 --flush
64
65Flushes the item and directory caches.
66
67=item header
68
69 --header
70 --noheader
71
59ecbafa
VS
72Creates header and footer blocks containing the text of the C<NAME>
73section. By default, no headers are generated.
0e4548d5 74
54310121
PP
75=item help
76
77 --help
78
79Displays the usage message.
80
5a039dd3
BS
81=item htmldir
82
83 --htmldir=name
84
85Sets the directory in which the resulting HTML file is placed. This
29f227c9
BS
86is used to generate relative links to other files. Not passing this
87causes all links to be absolute, since this is the value that tells
88Pod::Html the root of the documentation tree.
5a039dd3 89
54310121
PP
90=item htmlroot
91
92 --htmlroot=name
93
94Sets the base URL for the HTML files. When cross-references are made,
95the HTML root is prepended to the URL.
96
0e4548d5
GS
97=item index
98
99 --index
100 --noindex
101
102Generate an index at the top of the HTML file. This is the default
103behaviour.
104
54310121
PP
105=item infile
106
107 --infile=name
108
109Specify the pod file to convert. Input is taken from STDIN if no
110infile is specified.
111
54310121
PP
112=item libpods
113
114 --libpods=name:...:name
115
116List of page names (eg, "perlfunc") which contain linkable C<=item>s.
117
118=item netscape
119
120 --netscape
0e4548d5 121 --nonetscape
54310121 122
59ecbafa 123B<Deprecated>, has no effect. For backwards compatibility only.
54310121 124
0e4548d5 125=item outfile
54310121 126
0e4548d5 127 --outfile=name
54310121 128
0e4548d5
GS
129Specify the HTML file to create. Output goes to STDOUT if no outfile
130is specified.
54310121 131
0e4548d5 132=item podpath
54310121 133
0e4548d5 134 --podpath=name:...:name
54310121 135
0e4548d5 136Specify which subdirectories of the podroot contain pod files whose
59ecbafa 137HTML converted forms can be linked to in cross references.
54310121 138
0e4548d5 139=item podroot
54310121 140
0e4548d5 141 --podroot=name
54310121 142
0e4548d5 143Specify the base directory for finding library pods.
54310121 144
0e4548d5 145=item quiet
54310121 146
0e4548d5
GS
147 --quiet
148 --noquiet
54310121 149
0e4548d5
GS
150Don't display I<mostly harmless> warning messages. These messages
151will be displayed by default. But this is not the same as C<verbose>
152mode.
54310121 153
0e4548d5 154=item recurse
54310121 155
0e4548d5 156 --recurse
54310121
PP
157 --norecurse
158
0e4548d5 159Recurse into subdirectories specified in podpath (default behaviour).
54310121
PP
160
161=item title
162
163 --title=title
164
165Specify the title of the resulting HTML file.
166
167=item verbose
168
169 --verbose
0e4548d5 170 --noverbose
54310121 171
0e4548d5 172Display progress messages. By default, they won't be displayed.
34db337b 173
54310121
PP
174=back
175
176=head1 EXAMPLE
177
178 pod2html("pod2html",
59ecbafa 179 "--podpath=lib:ext:pod:vms",
54310121
PP
180 "--podroot=/usr/src/perl",
181 "--htmlroot=/perl/nmanual",
182 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
183 "--recurse",
184 "--infile=foo.pod",
185 "--outfile=/perl/nmanual/foo.html");
186
34db337b
JD
187=head1 ENVIRONMENT
188
59ecbafa 189Uses C<$Config{pod2html}> to setup default options.
34db337b 190
54310121
PP
191=head1 AUTHOR
192
193Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
194
54310121
PP
195=head1 SEE ALSO
196
197L<perlpod>
198
199=head1 COPYRIGHT
200
201This program is distributed under the Artistic License.
202
203=cut
204
54310121 205
99cb6bd8
MS
206my($cachedir);
207my($dircache, $itemcache);
208my @begin_stack;
209my @libpods;
210my($htmlroot, $htmldir, $htmlfile, $htmlfileurl);
211my($podfile, @podpath, $podroot);
212my $css;
54310121 213
99cb6bd8
MS
214my $recurse;
215my $quiet;
216my $verbose;
217my $doindex;
54310121 218
99cb6bd8
MS
219my $backlink;
220my($listlevel, @listend);
221my $after_lpar;
222my $ignore;
223
224my(%items_named, @items_seen);
225my($title, $header);
226
227my $top;
228my $paragraph;
229
230my %sections;
231
232# Caches
54310121
PP
233my %pages = (); # associative array used to find the location
234 # of pages referenced by L<> links.
54310121
PP
235my %items = (); # associative array used to find the location
236 # of =item directives referenced by C<> links
99cb6bd8
MS
237
238my %local_items;
239my $Is83;
240my $ptQuote;
241
242my $Curdir = File::Spec->curdir;
243
244init_globals();
39e571d4 245
54310121 246sub init_globals {
99cb6bd8
MS
247 $cachedir = "."; # The directory to which item and directory
248 # caches will be written.
249
250 $dircache = "pod2htmd.tmp";
251 $itemcache = "pod2htmi.tmp";
54310121 252
99cb6bd8 253 @begin_stack = (); # begin/end stack
54310121 254
99cb6bd8
MS
255 @libpods = (); # files to search for links from C<> directives
256 $htmlroot = "/"; # http-server base directory from which all
54310121 257 # relative paths in $podpath stem.
99cb6bd8 258 $htmldir = ""; # The directory to which the html pages
67398a75 259 # will (eventually) be written.
99cb6bd8
MS
260 $htmlfile = ""; # write to stdout by default
261 $htmlfileurl = "" ; # The url that other files would use to
262 # refer to this file. This is only used
263 # to make relative urls that point to
264 # other files.
265
266 $podfile = ""; # read from stdin by default
267 @podpath = (); # list of directories containing library pods.
268 $podroot = $Curdir; # filesystem base directory from which all
54310121 269 # relative paths in $podpath stem.
99cb6bd8
MS
270 $css = ''; # Cascading style sheet
271 $recurse = 1; # recurse on subdirectories in $podpath.
272 $quiet = 0; # not quiet by default
273 $verbose = 0; # not verbose by default
274 $doindex = 1; # non-zero if we should generate an index
275 $backlink = ''; # text for "back to top" links
276 $listlevel = 0; # current list depth
277 @listend = (); # the text to use to end the list.
278 $after_lpar = 0; # set to true after a par in an =item
279 $ignore = 1; # whether or not to format text. we don't
54310121
PP
280 # format text until we hit our first pod
281 # directive.
282
99cb6bd8
MS
283 @items_seen = (); # for multiples of the same item in perlfunc
284 %items_named = ();
285 $header = 0; # produce block header/footer
286 $title = ''; # title to give the pod(s)
287 $top = 1; # true if we are at the top of the doc. used
59ecbafa 288 # to prevent the first <hr /> directive.
99cb6bd8 289 $paragraph = ''; # which paragraph we're processing (used
54310121 290 # for error messages)
99cb6bd8
MS
291 $ptQuote = 0; # status of double-quote conversion
292 %sections = (); # sections within this page
3e3baf6d 293
99cb6bd8
MS
294 %local_items = ();
295 $Is83 = $^O eq 'dos'; # Is it an 8.3 filesystem?
54310121
PP
296}
297
2a28b791
GS
298#
299# clean_data: global clean-up of pod data
300#
301sub clean_data($){
302 my( $dataref ) = @_;
bf202ccd 303 for my $i ( 0..$#{$dataref} ) {
2a28b791
GS
304 ${$dataref}[$i] =~ s/\s+\Z//;
305
306 # have a look for all-space lines
4135c0a0 307 if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
2a28b791
GS
308 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
309 splice( @$dataref, $i, 1, @chunks );
310 }
311 }
312}
313
314
54310121
PP
315sub pod2html {
316 local(@ARGV) = @_;
317 local($/);
318 local $_;
319
320 init_globals();
321
39e571d4
ML
322 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
323
54310121 324 # cache of %pages and %items from last time we ran pod2html
54310121
PP
325
326 #undef $opt_help if defined $opt_help;
327
328 # parse the command-line parameters
329 parse_command_line();
330
59ecbafa
VS
331 # escape the backlink argument (same goes for title but is done later...)
332 $backlink = html_escape($backlink) if defined $backlink;
333
54310121
PP
334 # set some variables to their default values if necessary
335 local *POD;
59ecbafa 336 unless (@ARGV && $ARGV[0]) {
54310121
PP
337 $podfile = "-" unless $podfile; # stdin
338 open(POD, "<$podfile")
339 || die "$0: cannot open $podfile file for input: $!\n";
340 } else {
341 $podfile = $ARGV[0]; # XXX: might be more filenames
342 *POD = *ARGV;
59ecbafa 343 }
54310121
PP
344 $htmlfile = "-" unless $htmlfile; # stdout
345 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
fe6f1558 346 $htmldir =~ s#/\z## ; # so we don't get a //
29f227c9 347 if ( $htmlroot eq ''
59ecbafa 348 && defined( $htmldir )
29f227c9 349 && $htmldir ne ''
59ecbafa
VS
350 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
351 )
5a039dd3 352 {
29f227c9 353 # Set the 'base' url for this file, so that we can use it
59ecbafa 354 # as the location from which to calculate relative links
29f227c9
BS
355 # to other files. If this is '', then absolute links will
356 # be used throughout.
357 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 358 }
54310121
PP
359
360 # read the pod a paragraph at a time
361 warn "Scanning for sections in input file(s)\n" if $verbose;
362 $/ = "";
363 my @poddata = <POD>;
364 close(POD);
bbed45f6
JH
365
366 # be eol agnostic
367 for (@poddata) {
5551e547
JH
368 if (/\r/) {
369 if (/\r\n/) {
370 @poddata = map { s/\r\n/\n/g;
bbed45f6
JH
371 /\n\n/ ?
372 map { "$_\n\n" } split /\n\n/ :
373 $_ } @poddata;
374 } else {
5551e547 375 @poddata = map { s/\r/\n/g;
bbed45f6
JH
376 /\n\n/ ?
377 map { "$_\n\n" } split /\n\n/ :
378 $_ } @poddata;
379 }
380 last;
381 }
382 }
383
2a28b791 384 clean_data( \@poddata );
54310121
PP
385
386 # scan the pod for =head[1-6] directives and build an index
387 my $index = scan_headings(\%sections, @poddata);
388
3e3baf6d 389 unless($index) {
31e56455 390 warn "No headings in $podfile\n" if $verbose;
3e3baf6d
TB
391 }
392
54310121
PP
393 # open the output file
394 open(HTML, ">$htmlfile")
395 || die "$0: cannot open $htmlfile file for output: $!\n";
396
d011ffae
GS
397 # put a title in the HTML file if one wasn't specified
398 if ($title eq '') {
399 TITLE_SEARCH: {
59ecbafa 400 for (my $i = 0; $i < @poddata; $i++) {
d011ffae 401 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
59ecbafa 402 for my $para ( @poddata[$i, $i+1] ) {
d011ffae
GS
403 last TITLE_SEARCH
404 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
405 }
59ecbafa 406 }
54310121 407
59ecbafa 408 }
d011ffae
GS
409 }
410 }
fe6f1558 411 if (!$title and $podfile =~ /\.pod\z/) {
3e3baf6d 412 # probably a split pod so take first =head[12] as title
59ecbafa 413 for (my $i = 0; $i < @poddata; $i++) {
3e3baf6d 414 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
59ecbafa 415 }
3e3baf6d
TB
416 warn "adopted '$title' as title for $podfile\n"
417 if $verbose and $title;
59ecbafa 418 }
7b8d334a
GS
419 if ($title) {
420 $title =~ s/\s*\(.*\)//;
421 } else {
f8f99792 422 warn "$0: no title for $podfile.\n" unless $quiet;
fe6f1558 423 $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
54310121 424 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 425 warn "using $title" if $verbose;
54310121 426 }
59ecbafa
VS
427 $title = html_escape($title);
428
429 my $csslink = '';
430 my $bodystyle = ' style="background-color: white"';
431 my $tdstyle = ' style="background-color: #cccccc"';
432
433 if ($css) {
434 $csslink = qq(\n<link rel="stylesheet" href="$css" type="text/css" />);
435 $csslink =~ s,\\,/,g;
436 $csslink =~ s,(/.):,$1|,;
437 $bodystyle = '';
438 $tdstyle = '';
439 }
440
441 my $block = $header ? <<END_OF_BLOCK : '';
442<table border="0" width="100%" cellspacing="0" cellpadding="3">
443<tr><td class="block"$tdstyle valign="middle">
444<big><strong><span class="block">&nbsp;$title</span></strong></big>
445</td></tr>
446</table>
34db337b
JD
447END_OF_BLOCK
448
54310121 449 print HTML <<END_OF_HEAD;
59ecbafa
VS
450<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
451<html xmlns="http://www.w3.org/1999/xhtml">
8d481ee5 452<head>
59ecbafa
VS
453<title>$title</title>$csslink
454<link rev="made" href="mailto:$Config{perladmin}" />
455</head>
456
457<body$bodystyle>
34db337b 458$block
54310121
PP
459END_OF_HEAD
460
3e3baf6d
TB
461 # load/reload/validate/cache %pages and %items
462 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121
PP
463
464 # scan the pod for =item directives
2a28b791 465 scan_items( \%local_items, "", @poddata);
54310121
PP
466
467 # put an index at the top of the file. note, if $doindex is 0 we
468 # still generate an index, but surround it with an html comment.
469 # that way some other program can extract it if desired.
470 $index =~ s/--+/-/g;
59ecbafa 471 print HTML "<p><a name=\"__index__\"></a></p>\n";
54310121
PP
472 print HTML "<!-- INDEX BEGIN -->\n";
473 print HTML "<!--\n" unless $doindex;
474 print HTML $index;
475 print HTML "-->\n" unless $doindex;
476 print HTML "<!-- INDEX END -->\n\n";
59ecbafa 477 print HTML "<hr />\n" if $doindex and $index;
54310121
PP
478
479 # now convert this file
2a28b791 480 my $after_item; # set to true after an =item
59ecbafa 481 my $need_dd = 0;
2a28b791
GS
482 warn "Converting input file $podfile\n" if $verbose;
483 foreach my $i (0..$#poddata){
484 $ptQuote = 0; # status of quote conversion
485
54310121
PP
486 $_ = $poddata[$i];
487 $paragraph = $i+1;
488 if (/^(=.*)/s) { # is it a pod directive?
489 $ignore = 0;
2a28b791 490 $after_item = 0;
59ecbafa 491 $need_dd = 0;
54310121
PP
492 $_ = $1;
493 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
494 process_begin($1, $2);
495 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
496 process_end($1, $2);
497 } elsif (/^=cut/) { # =cut
498 process_cut();
499 } elsif (/^=pod/) { # =pod
500 process_pod();
501 } else {
502 next if @begin_stack && $begin_stack[-1] ne 'html';
503
7b8d334a 504 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791
GS
505 process_head( $1, $2, $doindex && $index );
506 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
59ecbafa 507 $need_dd = process_item( $1 );
2a28b791 508 $after_item = 1;
54310121
PP
509 } elsif (/^=over\s*(.*)/) { # =over N
510 process_over();
511 } elsif (/^=back/) { # =back
512 process_back();
a45bd81d 513 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121
PP
514 process_for($1,$2);
515 } else {
516 /^=(\S*)\s*/;
517 warn "$0: $podfile: unknown pod directive '$1' in "
518 . "paragraph $paragraph. ignoring.\n";
519 }
520 }
521 $top = 0;
522 }
523 else {
524 next if $ignore;
525 next if @begin_stack && $begin_stack[-1] ne 'html';
59ecbafa
VS
526 print HTML and next if @begin_stack && $begin_stack[-1] eq 'html';
527 print HTML "<dd>\n" if $need_dd;
54310121 528 my $text = $_;
2a28b791
GS
529 if( $text =~ /\A\s+/ ){
530 process_pre( \$text );
59ecbafa 531 print HTML "<pre>\n$text</pre>\n";
2a28b791
GS
532
533 } else {
534 process_text( \$text );
535
536 # experimental: check for a paragraph where all lines
537 # have some ...\t...\t...\n pattern
538 if( $text =~ /\t/ ){
539 my @lines = split( "\n", $text );
540 if( @lines > 1 ){
541 my $all = 2;
542 foreach my $line ( @lines ){
543 if( $line =~ /\S/ && $line !~ /\t/ ){
544 $all--;
545 last if $all == 0;
546 }
547 }
548 if( $all > 0 ){
59ecbafa
VS
549 $text =~ s/\t+/<td>/g;
550 $text =~ s/^/<tr><td>/gm;
551 $text = '<table cellspacing="0" cellpadding="0">' .
552 $text . '</table>';
2a28b791
GS
553 }
554 }
555 }
556 ## end of experimental
557
558 if( $after_item ){
559 print HTML "$text\n";
560 $after_lpar = 1;
561 } else {
59ecbafa 562 print HTML "<p>$text</p>\n";
2a28b791
GS
563 }
564 }
59ecbafa 565 print HTML "</dd>\n" if $need_dd;
2a28b791 566 $after_item = 0;
54310121
PP
567 }
568 }
569
570 # finish off any pending directives
571 finish_list();
2a28b791
GS
572
573 # link to page index
59ecbafa 574 print HTML "<p><a href=\"#__index__\"><small>$backlink</small></a></p>\n"
0e4548d5 575 if $doindex and $index and $backlink;
2a28b791 576
54310121 577 print HTML <<END_OF_TAIL;
34db337b 578$block
59ecbafa 579</body>
54310121 580
59ecbafa 581</html>
54310121
PP
582END_OF_TAIL
583
584 # close the html file
585 close(HTML);
586
587 warn "Finished\n" if $verbose;
588}
589
590##############################################################################
591
592my $usage; # see below
593sub usage {
594 my $podfile = shift;
595 warn "$0: $podfile: @_\n" if @_;
596 die $usage;
597}
598
599$usage =<<END_OF_USAGE;
600Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
601 --podpath=<name>:...:<name> --podroot=<name>
602 --libpods=<name>:...:<name> --recurse --verbose --index
b42210d7 603 --netscape --norecurse --noindex --cachedir=<name>
54310121 604
0e4548d5 605 --backlink - set text for "back to top" links (default: none).
b42210d7 606 --cachedir - directory for the item and directory cache files.
0e4548d5
GS
607 --css - stylesheet URL
608 --flush - flushes the item and directory caches.
609 --[no]header - produce block header/footer (default is no headers).
610 --help - prints this message.
611 --htmldir - directory for resulting HTML files.
612 --htmlroot - http-server base directory from which all relative paths
613 in podpath stem (default is /).
614 --[no]index - generate an index at the top of the resulting html
615 (default behaviour).
616 --infile - filename for the pod to convert (input taken from stdin
617 by default).
618 --libpods - colon-separated list of pages to search for =item pod
619 directives in as targets of C<> and implicit links (empty
620 by default). note, these are not filenames, but rather
621 page names like those that appear in L<> links.
0e4548d5
GS
622 --outfile - filename for the resulting html file (output sent to
623 stdout by default).
624 --podpath - colon-separated list of directories containing library
625 pods (empty by default).
626 --podroot - filesystem base directory from which all relative paths
627 in podpath stem (default is .).
628 --[no]quiet - supress some benign warning messages (default is off).
629 --[no]recurse - recurse on those subdirectories listed in podpath
630 (default behaviour).
631 --title - title that will appear in resulting html file.
632 --[no]verbose - self-explanatory (off by default).
59ecbafa 633 --[no]netscape - deprecated, has no effect. for backwards compatibility only.
54310121
PP
634
635END_OF_USAGE
636
637sub parse_command_line {
b42210d7
RV
638 my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
639 $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
640 $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
641 $opt_recurse,$opt_title,$opt_verbose);
0e4548d5 642
34db337b 643 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 644 my $result = GetOptions(
0e4548d5 645 'backlink=s' => \$opt_backlink,
b42210d7 646 'cachedir=s' => \$opt_cachedir,
0e4548d5 647 'css=s' => \$opt_css,
29f227c9 648 'flush' => \$opt_flush,
0e4548d5 649 'header!' => \$opt_header,
29f227c9
BS
650 'help' => \$opt_help,
651 'htmldir=s' => \$opt_htmldir,
54310121 652 'htmlroot=s' => \$opt_htmlroot,
29f227c9 653 'index!' => \$opt_index,
54310121
PP
654 'infile=s' => \$opt_infile,
655 'libpods=s' => \$opt_libpods,
656 'netscape!' => \$opt_netscape,
657 'outfile=s' => \$opt_outfile,
658 'podpath=s' => \$opt_podpath,
659 'podroot=s' => \$opt_podroot,
0e4548d5 660 'quiet!' => \$opt_quiet,
54310121
PP
661 'recurse!' => \$opt_recurse,
662 'title=s' => \$opt_title,
0e4548d5 663 'verbose!' => \$opt_verbose,
54310121
PP
664 );
665 usage("-", "invalid parameters") if not $result;
666
667 usage("-") if defined $opt_help; # see if the user asked for help
668 $opt_help = ""; # just to make -w shut-up.
669
54310121
PP
670 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
671 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
672
0e4548d5 673 $backlink = $opt_backlink if defined $opt_backlink;
b42210d7 674 $cachedir = $opt_cachedir if defined $opt_cachedir;
0e4548d5
GS
675 $css = $opt_css if defined $opt_css;
676 $header = $opt_header if defined $opt_header;
677 $htmldir = $opt_htmldir if defined $opt_htmldir;
678 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
679 $doindex = $opt_index if defined $opt_index;
680 $podfile = $opt_infile if defined $opt_infile;
0e4548d5
GS
681 $htmlfile = $opt_outfile if defined $opt_outfile;
682 $podroot = $opt_podroot if defined $opt_podroot;
683 $quiet = $opt_quiet if defined $opt_quiet;
684 $recurse = $opt_recurse if defined $opt_recurse;
685 $title = $opt_title if defined $opt_title;
686 $verbose = $opt_verbose if defined $opt_verbose;
687
54310121
PP
688 warn "Flushing item and directory caches\n"
689 if $opt_verbose && defined $opt_flush;
33869856
MS
690 $dircache = "$cachedir/pod2htmd.tmp";
691 $itemcache = "$cachedir/pod2htmi.tmp";
383e43c7
CB
692 if (defined $opt_flush) {
693 1 while unlink($dircache, $itemcache);
694 }
54310121
PP
695}
696
3e3baf6d
TB
697
698my $saved_cache_key;
699
700sub get_cache {
701 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
702 my @cache_key_args = @_;
703
704 # A first-level cache:
705 # Don't bother reading the cache files if they still apply
706 # and haven't changed since we last read them.
707
708 my $this_cache_key = cache_key(@cache_key_args);
709
710 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
711
712 # load the cache of %pages and %items if possible. $tests will be
713 # non-zero if successful.
714 my $tests = 0;
715 if (-f $dircache && -f $itemcache) {
716 warn "scanning for item cache\n" if $verbose;
717 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
718 }
719
720 # if we didn't succeed in loading the cache then we must (re)build
721 # %pages and %items.
722 if (!$tests) {
723 warn "scanning directories in pod-path\n" if $verbose;
724 scan_podpath($podroot, $recurse, 0);
725 }
726 $saved_cache_key = cache_key(@cache_key_args);
727}
728
729sub cache_key {
730 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
731 return join('!', $dircache, $itemcache, $recurse,
29f227c9 732 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d
TB
733}
734
54310121 735#
3e3baf6d 736# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121
PP
737# are valid caches of %pages and %items. if they are valid then it loads
738# them and returns a non-zero value.
739#
3e3baf6d 740sub load_cache {
54310121
PP
741 my($dircache, $itemcache, $podpath, $podroot) = @_;
742 my($tests);
743 local $_;
744
745 $tests = 0;
746
747 open(CACHE, "<$itemcache") ||
748 die "$0: error opening $itemcache for reading: $!\n";
749 $/ = "\n";
750
751 # is it the same podpath?
752 $_ = <CACHE>;
753 chomp($_);
3e3baf6d 754 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
755
756 # is it the same podroot?
757 $_ = <CACHE>;
758 chomp($_);
759 $tests++ if ($podroot eq $_);
760
761 # load the cache if its good
762 if ($tests != 2) {
763 close(CACHE);
54310121
PP
764 return 0;
765 }
766
767 warn "loading item cache\n" if $verbose;
768 while (<CACHE>) {
769 /(.*?) (.*)$/;
770 $items{$1} = $2;
771 }
772 close(CACHE);
773
774 warn "scanning for directory cache\n" if $verbose;
775 open(CACHE, "<$dircache") ||
776 die "$0: error opening $dircache for reading: $!\n";
777 $/ = "\n";
778 $tests = 0;
779
780 # is it the same podpath?
781 $_ = <CACHE>;
782 chomp($_);
3e3baf6d 783 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
784
785 # is it the same podroot?
786 $_ = <CACHE>;
787 chomp($_);
788 $tests++ if ($podroot eq $_);
789
790 # load the cache if its good
791 if ($tests != 2) {
792 close(CACHE);
54310121
PP
793 return 0;
794 }
795
796 warn "loading directory cache\n" if $verbose;
797 while (<CACHE>) {
798 /(.*?) (.*)$/;
799 $pages{$1} = $2;
800 }
801
802 close(CACHE);
803
804 return 1;
805}
806
807#
808# scan_podpath - scans the directories specified in @podpath for directories,
809# .pod files, and .pm files. it also scans the pod files specified in
810# @libpods for =item directives.
811#
812sub scan_podpath {
3e3baf6d 813 my($podroot, $recurse, $append) = @_;
54310121
PP
814 my($pwd, $dir);
815 my($libpod, $dirname, $pod, @files, @poddata);
816
3e3baf6d
TB
817 unless($append) {
818 %items = ();
819 %pages = ();
820 }
821
54310121
PP
822 # scan each directory listed in @podpath
823 $pwd = getcwd();
824 chdir($podroot)
825 || die "$0: error changing to directory $podroot: $!\n";
826 foreach $dir (@podpath) {
827 scan_dir($dir, $recurse);
828 }
829
830 # scan the pods listed in @libpods for =item directives
831 foreach $libpod (@libpods) {
832 # if the page isn't defined then we won't know where to find it
833 # on the system.
834 next unless defined $pages{$libpod} && $pages{$libpod};
835
836 # if there is a directory then use the .pod and .pm files within it.
29f227c9
BS
837 # NOTE: Only finds the first so-named directory in the tree.
838# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
839 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121
PP
840 # find all the .pod and .pm files within the directory
841 $dirname = $1;
842 opendir(DIR, $dirname) ||
843 die "$0: error opening directory $dirname: $!\n";
fe6f1558 844 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
54310121
PP
845 closedir(DIR);
846
847 # scan each .pod and .pm file for =item directives
848 foreach $pod (@files) {
849 open(POD, "<$dirname/$pod") ||
850 die "$0: error opening $dirname/$pod for input: $!\n";
851 @poddata = <POD>;
852 close(POD);
2a28b791 853 clean_data( \@poddata );
54310121 854
2a28b791 855 scan_items( \%items, "$dirname/$pod", @poddata);
54310121
PP
856 }
857
858 # use the names of files as =item directives too.
2a28b791
GS
859### Don't think this should be done this way - confuses issues.(WL)
860### foreach $pod (@files) {
861### $pod =~ /^(.*)(\.pod|\.pm)$/;
862### $items{$1} = "$dirname/$1.html" if $1;
863### }
54310121
PP
864 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
865 $pages{$libpod} =~ /([^:]*\.pm):/) {
866 # scan the .pod or .pm file for =item directives
867 $pod = $1;
868 open(POD, "<$pod") ||
869 die "$0: error opening $pod for input: $!\n";
870 @poddata = <POD>;
871 close(POD);
2a28b791 872 clean_data( \@poddata );
54310121 873
2a28b791 874 scan_items( \%items, "$pod", @poddata);
54310121
PP
875 } else {
876 warn "$0: shouldn't be here (line ".__LINE__."\n";
877 }
878 }
879 @poddata = (); # clean-up a bit
880
881 chdir($pwd)
882 || die "$0: error changing to directory $pwd: $!\n";
883
884 # cache the item list for later use
885 warn "caching items for later use\n" if $verbose;
886 open(CACHE, ">$itemcache") ||
887 die "$0: error open $itemcache for writing: $!\n";
888
889 print CACHE join(":", @podpath) . "\n$podroot\n";
890 foreach my $key (keys %items) {
891 print CACHE "$key $items{$key}\n";
892 }
893
894 close(CACHE);
895
896 # cache the directory list for later use
897 warn "caching directories for later use\n" if $verbose;
898 open(CACHE, ">$dircache") ||
899 die "$0: error open $dircache for writing: $!\n";
900
901 print CACHE join(":", @podpath) . "\n$podroot\n";
902 foreach my $key (keys %pages) {
903 print CACHE "$key $pages{$key}\n";
904 }
905
906 close(CACHE);
907}
908
909#
910# scan_dir - scans the directory specified in $dir for subdirectories, .pod
911# files, and .pm files. notes those that it finds. this information will
912# be used later in order to figure out where the pages specified in L<>
913# links are on the filesystem.
914#
915sub scan_dir {
916 my($dir, $recurse) = @_;
917 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
918 local $_;
919
920 @subdirs = ();
921 @pods = ();
922
923 opendir(DIR, $dir) ||
924 die "$0: error opening directory $dir: $!\n";
925 while (defined($_ = readdir(DIR))) {
926 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
927 $pages{$_} = "" unless defined $pages{$_};
928 $pages{$_} .= "$dir/$_:";
929 push(@subdirs, $_);
fe6f1558
GS
930 } elsif (/\.pod\z/) { # .pod
931 s/\.pod\z//;
54310121
PP
932 $pages{$_} = "" unless defined $pages{$_};
933 $pages{$_} .= "$dir/$_.pod:";
934 push(@pods, "$dir/$_.pod");
d3f9d0fb
SB
935 } elsif (/\.html\z/) { # .html
936 s/\.html\z//;
937 $pages{$_} = "" unless defined $pages{$_};
938 $pages{$_} .= "$dir/$_.pod:";
fe6f1558
GS
939 } elsif (/\.pm\z/) { # .pm
940 s/\.pm\z//;
54310121
PP
941 $pages{$_} = "" unless defined $pages{$_};
942 $pages{$_} .= "$dir/$_.pm:";
943 push(@pods, "$dir/$_.pm");
944 }
945 }
946 closedir(DIR);
947
948 # recurse on the subdirectories if necessary
949 if ($recurse) {
950 foreach my $subdir (@subdirs) {
951 scan_dir("$dir/$subdir", $recurse);
952 }
953 }
954}
955
956#
957# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
958# build an index.
959#
960sub scan_headings {
961 my($sections, @data) = @_;
2a28b791 962 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 963
be173d55
UC
964 # here we need local $ignore = 0;
965 # unfortunately, we can't have it, because $ignore is lexical
966 $ignore = 0;
967
54310121
PP
968 $listdepth = 0;
969 $index = "";
970
971 # scan for =head directives, note their name, and build an index
972 # pointing to each of them.
973 foreach my $line (@data) {
59ecbafa
VS
974 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
975 ($tag, $which_head, $otitle) = ($1,$2,$3);
2a28b791 976
59ecbafa 977 my $title = depod( $otitle );
2beb85b7 978 my $name = anchorify( $title );
59ecbafa 979 $$sections{$name} = 1;
59ecbafa 980 $title = process_text( \$otitle );
54310121 981
102c538a
DS
982 while ($which_head != $listdepth) {
983 if ($which_head > $listdepth) {
59ecbafa 984 $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
102c538a
DS
985 $listdepth++;
986 } elsif ($which_head < $listdepth) {
987 $listdepth--;
59ecbafa 988 $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
102c538a 989 }
54310121 990 }
54310121 991
59ecbafa
VS
992 $index .= "\n" . ("\t" x $listdepth) . "<li>" .
993 "<a href=\"#" . $name . "\">" .
994 $title . "</a></li>";
54310121
PP
995 }
996 }
997
998 # finish off the lists
999 while ($listdepth--) {
59ecbafa 1000 $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
54310121
PP
1001 }
1002
1003 # get rid of bogus lists
59ecbafa 1004 $index =~ s,\t*<ul>\s*</ul>\n,,g;
54310121 1005
bb9460ed 1006 $ignore = 1; # restore old value;
be173d55 1007
54310121
PP
1008 return $index;
1009}
1010
1011#
1012# scan_items - scans the pod specified by $pod for =item directives. we
1013# will use this information later on in resolving C<> links.
1014#
1015sub scan_items {
2a28b791 1016 my( $itemref, $pod, @poddata ) = @_;
54310121
PP
1017 my($i, $item);
1018 local $_;
1019
fe6f1558 1020 $pod =~ s/\.pod\z//;
54310121
PP
1021 $pod .= ".html" if $pod;
1022
1023 foreach $i (0..$#poddata) {
2a28b791
GS
1024 my $txt = depod( $poddata[$i] );
1025
1026 # figure out what kind of item it is.
1027 # Build string for referencing this item.
1028 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1029 next unless $1;
1030 $item = $1;
1031 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1032 $item = $1;
1033 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1034 $item = $1;
1035 } else {
1036 next;
54310121 1037 }
2a28b791
GS
1038 my $fid = fragment_id( $item );
1039 $$itemref{$fid} = "$pod" if $fid;
54310121
PP
1040 }
1041}
1042
1043#
1044# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1045#
1046sub process_head {
2a28b791 1047 my($tag, $heading, $hasindex) = @_;
54310121
PP
1048
1049 # figure out the level of the =head
1050 $tag =~ /head([1-6])/;
1051 my $level = $1;
1052
2a28b791
GS
1053 if( $listlevel ){
1054 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
1055 while( $listlevel ){
1056 process_back();
1057 }
1058 }
1059
59ecbafa 1060 print HTML "<p>\n";
2a28b791 1061 if( $level == 1 && ! $top ){
59ecbafa
VS
1062 print HTML "<a href=\"#__index__\"><small>$backlink</small></a>\n"
1063 if $hasindex and $backlink;
1064 print HTML "</p>\n<hr />\n"
1065 } else {
1066 print HTML "</p>\n";
2a28b791
GS
1067 }
1068
0d396dd4 1069 my $name = anchorify( depod( $heading ) );
2a28b791 1070 my $convert = process_text( \$heading );
59ecbafa 1071 print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
54310121
PP
1072}
1073
2a28b791 1074
54310121 1075#
2a28b791
GS
1076# emit_item_tag - print an =item's text
1077# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1078#
2a28b791
GS
1079my $EmittedItem;
1080
1081sub emit_item_tag($$$){
1082 my( $otext, $text, $compact ) = @_;
1083 my $item = fragment_id( $text );
54310121 1084
2a28b791
GS
1085 $EmittedItem = $item;
1086 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1087
59ecbafa 1088 print HTML '<strong>';
2a28b791
GS
1089 if ($items_named{$item}++) {
1090 print HTML process_text( \$otext );
1091 } else {
0d396dd4
SB
1092 my $name = 'item_' . $item;
1093 $name = anchorify($name);
59ecbafa 1094 print HTML qq{<a name="$name">}, process_text( \$otext ), '</a>';
2a28b791 1095 }
59ecbafa 1096 print HTML "</strong><br />\n";
2a28b791
GS
1097 undef( $EmittedItem );
1098}
1099
1100sub emit_li {
1101 my( $tag ) = @_;
1102 if( $items_seen[$listlevel]++ == 0 ){
1103 push( @listend, "</$tag>" );
1104 print HTML "<$tag>\n";
1105 }
59ecbafa
VS
1106 my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1107 print HTML "<$emitted>";
1108 return $emitted;
2a28b791
GS
1109}
1110
1111#
1112# process_item - convert a pod item tag and convert it to HTML format.
1113#
1114sub process_item {
1115 my( $otext ) = @_;
59ecbafa 1116 my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
54310121
PP
1117
1118 # lots of documents start a list without doing an =over. this is
1119 # bad! but, the proper thing to do seems to be to just assume
1120 # they did do an =over. so warn them once and then continue.
2a28b791
GS
1121 if( $listlevel == 0 ){
1122 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1123 process_over();
1124 }
54310121 1125
2a28b791
GS
1126 # formatting: insert a paragraph if preceding item has >1 paragraph
1127 if( $after_lpar ){
59ecbafa 1128 print HTML "<p></p>\n";
2a28b791
GS
1129 $after_lpar = 0;
1130 }
54310121
PP
1131
1132 # remove formatting instructions from the text
2a28b791
GS
1133 my $text = depod( $otext );
1134
59ecbafa
VS
1135 my $emitted; # the tag actually emitted, used for closing
1136
2a28b791
GS
1137 # all the list variants:
1138 if( $text =~ /\A\*/ ){ # bullet
59ecbafa
VS
1139 $emitted = emit_li( 'ul' );
1140 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1141 my $tag = $1;
1142 $otext =~ s/\A\*\s+//;
1143 emit_item_tag( $otext, $tag, 1 );
1144 }
54310121 1145
2a28b791 1146 } elsif( $text =~ /\A\d+/ ){ # numbered list
59ecbafa
VS
1147 $emitted = emit_li( 'ol' );
1148 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1149 my $tag = $1;
1150 $otext =~ s/\A\d+\.?\s*//;
1151 emit_item_tag( $otext, $tag, 1 );
1152 }
54310121 1153
2a28b791 1154 } else { # definition list
59ecbafa
VS
1155 $emitted = emit_li( 'dl' );
1156 if ($text =~ /\A(.+)\Z/s ){ # should have text
1157 emit_item_tag( $otext, $text, 1 );
1158 }
1159 $need_dd = 1;
54310121 1160 }
59ecbafa 1161 print HTML "</$emitted>" if $emitted;
54310121 1162 print HTML "\n";
59ecbafa 1163 return $need_dd;
54310121
PP
1164}
1165
1166#
2a28b791 1167# process_over - process a pod over tag and start a corresponding HTML list.
54310121
PP
1168#
1169sub process_over {
1170 # start a new list
1171 $listlevel++;
2a28b791
GS
1172 push( @items_seen, 0 );
1173 $after_lpar = 0;
54310121
PP
1174}
1175
1176#
1177# process_back - process a pod back tag and convert it to HTML format.
1178#
1179sub process_back {
2a28b791
GS
1180 if( $listlevel == 0 ){
1181 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1182 return;
1183 }
54310121
PP
1184
1185 # close off the list. note, I check to see if $listend[$listlevel] is
1186 # defined because an =item directive may have never appeared and thus
1187 # $listend[$listlevel] may have never been initialized.
1188 $listlevel--;
2a28b791 1189 if( defined $listend[$listlevel] ){
59ecbafa 1190 print HTML '<p></p>' if $after_lpar;
2a28b791
GS
1191 print HTML $listend[$listlevel];
1192 print HTML "\n";
1193 pop( @listend );
1194 }
1195 $after_lpar = 0;
54310121 1196
2a28b791
GS
1197 # clean up item count
1198 pop( @items_seen );
54310121
PP
1199}
1200
1201#
2a28b791 1202# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121
PP
1203#
1204sub process_cut {
1205 $ignore = 1;
1206}
1207
1208#
d1be9408 1209# process_pod - process a pod tag, thus stop ignoring pod directives
2a28b791 1210# until we see a corresponding cut.
54310121
PP
1211#
1212sub process_pod {
1213 # no need to set $ignore to 0 cause the main loop did it
1214}
1215
1216#
2a28b791 1217# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1218# it out verbatim, if illustration, center it, otherwise ignore it.
54310121
PP
1219#
1220sub process_for {
1221 my($whom, $text) = @_;
1222 if ( $whom =~ /^(pod2)?html$/i) {
1223 print HTML $text;
c4d9b39d
IZ
1224 } elsif ($whom =~ /^illustration$/i) {
1225 1 while chomp $text;
1226 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1227 $text .= $ext, last if -r "$text$ext";
1228 }
59ecbafa 1229 print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
c4d9b39d 1230 }
54310121
PP
1231}
1232
1233#
1234# process_begin - process a =begin pod tag. this pushes
1235# whom we're beginning on the begin stack. if there's a
1236# begin stack, we only print if it us.
1237#
1238sub process_begin {
1239 my($whom, $text) = @_;
1240 $whom = lc($whom);
1241 push (@begin_stack, $whom);
1242 if ( $whom =~ /^(pod2)?html$/) {
1243 print HTML $text if $text;
1244 }
1245}
1246
1247#
1248# process_end - process a =end pod tag. pop the
1249# begin stack. die if we're mismatched.
1250#
1251sub process_end {
1252 my($whom, $text) = @_;
1253 $whom = lc($whom);
1254 if ($begin_stack[-1] ne $whom ) {
1255 die "Unmatched begin/end at chunk $paragraph\n"
59ecbafa 1256 }
2a28b791 1257 pop( @begin_stack );
54310121
PP
1258}
1259
1260#
59ecbafa 1261# process_pre - indented paragraph, made into <pre></pre>
54310121 1262#
2a28b791
GS
1263sub process_pre {
1264 my( $text ) = @_;
1265 my( $rest );
54310121
PP
1266 return if $ignore;
1267
54310121
PP
1268 $rest = $$text;
1269
2a28b791 1270 # insert spaces in place of tabs
24e08cba
JF
1271 $rest =~ s#(.+)#
1272 my $line = $1;
1273 1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
be173d55
UC
1274 $line;
1275 #eg;
54310121 1276
2a28b791 1277 # convert some special chars to HTML escapes
59ecbafa 1278 $rest = html_escape($rest);
2a28b791
GS
1279
1280 # try and create links for all occurrences of perl.* within
1281 # the preformatted text.
1282 $rest =~ s{
1283 (\s*)(perl\w+)
1284 }{
1285 if ( defined $pages{$2} ){ # is a link
59ecbafa 1286 qq($1<a href="$htmlroot/$pages{$2}">$2</a>);
2a28b791 1287 } elsif (defined $pages{dosify($2)}) { # is a link
59ecbafa 1288 qq($1<a href="$htmlroot/$pages{dosify($2)}">$2</a>);
2a28b791
GS
1289 } else {
1290 "$1$2";
1291 }
1292 }xeg;
1293 $rest =~ s{
59ecbafa 1294 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
2a28b791
GS
1295 }{
1296 my $url ;
1297 if ( $htmlfileurl ne '' ){
59ecbafa 1298 # Here, we take advantage of the knowledge
2a28b791
GS
1299 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1300 # Since $htmlroot eq '', we need to prepend $htmldir
1301 # on the fron of the link to get the absolute path
1302 # of the link's target. We check for a leading '/'
1303 # to avoid corrupting links that are #, file:, etc.
1304 my $old_url = $3 ;
1305 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1306 $url = relativize_url( "$old_url.html", $htmlfileurl );
1307 } else {
1308 $url = "$3.html" ;
1309 }
1310 "$1$url" ;
1311 }xeg;
1312
1313 # Look for embedded URLs and make them into links. We don't
1314 # relativize them since they are best left as the author intended.
1315
1316 my $urls = '(' . join ('|', qw{
54310121
PP
1317 http
1318 telnet
1319 mailto
1320 news
1321 gopher
1322 file
1323 wais
1324 ftp
59ecbafa 1325 } )
54310121 1326 . ')';
59ecbafa 1327
2a28b791
GS
1328 my $ltrs = '\w';
1329 my $gunk = '/#~:.?+=&%@!\-';
add5afb0 1330 my $punc = '.:!?\-;';
2a28b791 1331 my $any = "${ltrs}${gunk}${punc}";
54310121 1332
2a28b791 1333 $rest =~ s{
69e2f044
D
1334 \b # start at word boundary
1335 ( # begin $1 {
1336 $urls : # need resource and a colon
1337 (?!:) # Ignore File::, among others.
1338 [$any] +? # followed by one or more of any valid
1339 # character, but be conservative and
1340 # take only what you need to....
1341 ) # end $1 }
1342 (?=
1343 &quot; &gt; # maybe pre-quoted '<a href="...">'
1344 | # or:
1345 [$punc]* # 0 or more punctuation
1346 (?: # followed
1347 [^$any] # by a non-url char
1348 | # or
1349 $ # end of the string
1350 ) #
1351 | # or else
1352 $ # then end of the string
54310121 1353 )
59ecbafa 1354 }{<a href="$1">$1</a>}igox;
54310121 1355
2a28b791
GS
1356 # text should be as it is (verbatim)
1357 $$text = $rest;
1358}
54310121 1359
54310121 1360
2a28b791
GS
1361#
1362# pure text processing
1363#
1364# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1365# we don't want this to happen within IS
1366#
1367sub pure_text($){
1368 my $text = shift();
1369 process_puretext( $text, \$ptQuote, 1 );
54310121
PP
1370}
1371
2a28b791
GS
1372sub inIS_text($){
1373 my $text = shift();
1374 process_puretext( $text, \$ptQuote, 0 );
1375}
54310121
PP
1376
1377#
1378# process_puretext - process pure text (without pod-escapes) converting
1379# double-quotes and handling implicit C<> links.
1380#
1381sub process_puretext {
2a28b791 1382 my($text, $quote, $notinIS) = @_;
54310121 1383
2a28b791
GS
1384 ## Guessing at func() or [$@%&]*var references in plain text is destined
1385 ## to produce some strange looking ref's. uncomment to disable:
1386 ## $notinIS = 0;
1387
1388 my(@words, $lead, $trail);
54310121 1389
2a28b791
GS
1390 # convert double-quotes to single-quotes
1391 if( $$quote && $text =~ s/"/''/s ){
1392 $$quote = 0;
1393 }
1394 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1395 $$quote = 1 if $text =~ s/"/``/s;
54310121
PP
1396
1397 # keep track of leading and trailing white-space
2a28b791
GS
1398 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1399 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1400
2a28b791
GS
1401 # split at space/non-space boundaries
1402 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121
PP
1403
1404 # process each word individually
1405 foreach my $word (@words) {
2a28b791
GS
1406 # skip space runs
1407 next if $word =~ /^\s*$/;
54310121 1408 # see if we can infer a link
02369fa5 1409 if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
54310121 1410 # has parenthesis so should have been a C<> ref
2a28b791 1411 ## try for a pagename (perlXXX(1))?
02369fa5
GS
1412 my( $func, $args ) = ( $1, $2 );
1413 if( $args =~ /^\d+$/ ){
2a28b791
GS
1414 my $url = page_sect( $word, '' );
1415 if( defined $url ){
59ecbafa 1416 $word = "<a href=\"$url\">the $word manpage</a>";
2a28b791
GS
1417 next;
1418 }
1419 }
02369fa5
GS
1420 ## try function name for a link, append tt'ed argument list
1421 $word = emit_C( $func, '', "($args)");
2a28b791
GS
1422
1423#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1424## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1425## # perl variables, should be a C<> ref
1426## $word = emit_C( $word );
1427
54310121
PP
1428 } elsif ($word =~ m,^\w+://\w,) {
1429 # looks like a URL
5a039dd3 1430 # Don't relativize it: leave it as the author intended
59ecbafa 1431 $word = qq(<a href="$word">$word</a>);
af47ee55 1432 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1433 # looks like an e-mail address
7b8d334a
GS
1434 my ($w1, $w2, $w3) = ("", $word, "");
1435 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1436 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
59ecbafa
VS
1437 $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1438 } else {
7b8d334a 1439 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1440 }
1441 }
1442
2a28b791
GS
1443 # put everything back together
1444 return $lead . join( '', @words ) . $trail;
1445}
1446
54310121 1447
2a28b791
GS
1448#
1449# process_text - handles plaintext that appears in the input pod file.
1450# there may be pod commands embedded within the text so those must be
1451# converted to html commands.
1452#
7ba65c74 1453
c68ea5d1
GS
1454sub process_text1($$;$$);
1455sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1456sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
7ba65c74 1457
2a28b791
GS
1458sub process_text {
1459 return if $ignore;
1460 my( $tref ) = @_;
1461 my $res = process_text1( 0, $tref );
1462 $$tref = $res;
1463}
1464
c68ea5d1
GS
1465sub process_text1($$;$$){
1466 my( $lev, $rstr, $func, $closing ) = @_;
2a28b791
GS
1467 my $res = '';
1468
60a48b2d
GS
1469 unless (defined $func) {
1470 $func = '';
1471 $lev++;
1472 }
1473
2a28b791
GS
1474 if( $func eq 'B' ){
1475 # B<text> - boldface
59ecbafa 1476 $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
2a28b791
GS
1477
1478 } elsif( $func eq 'C' ){
59ecbafa 1479 # C<code> - can be a ref or <code></code>
2a28b791 1480 # need to extract text
c68ea5d1 1481 my $par = go_ahead( $rstr, 'C', $closing );
2a28b791
GS
1482
1483 ## clean-up of the link target
1484 my $text = depod( $par );
1485
1486 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
59ecbafa 1487 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
2a28b791
GS
1488
1489 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1490
1491 } elsif( $func eq 'E' ){
1492 # E<x> - convert to character
be3174d2
GS
1493 $$rstr =~ s/^([^>]*)>//;
1494 my $escape = $1;
1495 $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1496 $res = "&$escape;";
2a28b791
GS
1497
1498 } elsif( $func eq 'F' ){
1499 # F<filename> - italizice
59ecbafa 1500 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
2a28b791
GS
1501
1502 } elsif( $func eq 'I' ){
1503 # I<text> - italizice
59ecbafa 1504 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
2a28b791
GS
1505
1506 } elsif( $func eq 'L' ){
1507 # L<link> - link
59ecbafa 1508 ## L<text|cross-ref> => produce text, use cross-ref for linking
2a28b791
GS
1509 ## L<cross-ref> => make text from cross-ref
1510 ## need to extract text
c68ea5d1 1511 my $par = go_ahead( $rstr, 'L', $closing );
2a28b791
GS
1512
1513 # some L<>'s that shouldn't be:
1514 # a) full-blown URL's are emitted as-is
1515 if( $par =~ m{^\w+://}s ){
1516 return make_URL_href( $par );
1517 }
1518 # b) C<...> is stripped and treated as C<>
1519 if( $par =~ /^C<(.*)>$/ ){
1520 my $text = depod( $1 );
1521 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1522 }
1523
1524 # analyze the contents
1525 $par =~ s/\n/ /g; # undo word-wrapped tags
1526 my $opar = $par;
1527 my $linktext;
1528 if( $par =~ s{^([^|]+)\|}{} ){
1529 $linktext = $1;
1530 }
59ecbafa 1531
2a28b791
GS
1532 # make sure sections start with a /
1533 $par =~ s{^"}{/"};
1534
1535 my( $page, $section, $ident );
1536
1537 # check for link patterns
1538 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
59ecbafa 1539 # we've got a name/ident (no quotes)
2a28b791
GS
1540 ( $page, $ident ) = ( $1, $2 );
1541 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1542
1543 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1544 # even though this should be a "section", we go for ident first
1545 ( $page, $ident ) = ( $1, $2 );
1546 ### print STDERR "--> L<$par> to page $page, section $section\n";
1547
1548 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1549 ( $page, $section ) = ( '', $par );
1550 ### print STDERR "--> L<$par> to void page, section $section\n";
1551
1552 } else {
1553 ( $page, $section ) = ( $par, '' );
1554 ### print STDERR "--> L<$par> to page $par, void section\n";
1555 }
1556
1557 # now, either $section or $ident is defined. the convoluted logic
1558 # below tries to resolve L<> according to what the user specified.
1559 # failing this, we try to find the next best thing...
1560 my( $url, $ltext, $fid );
1561
1562 RESOLVE: {
1563 if( defined $ident ){
1564 ## try to resolve $ident as an item
1565 ( $url, $fid ) = coderef( $page, $ident );
1566 if( $url ){
1567 if( ! defined( $linktext ) ){
1568 $linktext = $ident;
1569 $linktext .= " in " if $ident && $page;
1570 $linktext .= "the $page manpage" if $page;
1571 }
1572 ### print STDERR "got coderef url=$url\n";
1573 last RESOLVE;
1574 }
1575 ## no luck: go for a section (auto-quoting!)
1576 $section = $ident;
1577 }
1578 ## now go for a section
1579 my $htmlsection = htmlify( $section );
1580 $url = page_sect( $page, $htmlsection );
1581 if( $url ){
1582 if( ! defined( $linktext ) ){
1583 $linktext = $section;
1584 $linktext .= " in " if $section && $page;
1585 $linktext .= "the $page manpage" if $page;
1586 }
1587 ### print STDERR "got page/section url=$url\n";
1588 last RESOLVE;
1589 }
59ecbafa 1590 ## no luck: go for an ident
2a28b791
GS
1591 if( $section ){
1592 $ident = $section;
1593 } else {
1594 $ident = $page;
1595 $page = undef();
1596 }
1597 ( $url, $fid ) = coderef( $page, $ident );
1598 if( $url ){
1599 if( ! defined( $linktext ) ){
1600 $linktext = $ident;
1601 $linktext .= " in " if $ident && $page;
1602 $linktext .= "the $page manpage" if $page;
1603 }
1604 ### print STDERR "got section=>coderef url=$url\n";
1605 last RESOLVE;
1606 }
1607
1608 # warning; show some text.
1609 $linktext = $opar unless defined $linktext;
f8f99792 1610 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.\n";
2a28b791
GS
1611 }
1612
d1be9408 1613 # now we have a URL or just plain code
2a28b791
GS
1614 $$rstr = $linktext . '>' . $$rstr;
1615 if( defined( $url ) ){
59ecbafa 1616 $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
2a28b791 1617 } else {
59ecbafa 1618 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
2a28b791
GS
1619 }
1620
1621 } elsif( $func eq 'S' ){
1622 # S<text> - non-breaking spaces
1623 $res = process_text1( $lev, $rstr );
1624 $res =~ s/ /&nbsp;/g;
1625
1626 } elsif( $func eq 'X' ){
1627 # X<> - ignore
1628 $$rstr =~ s/^[^>]*>//;
1629
1630 } elsif( $func eq 'Z' ){
59ecbafa 1631 # Z<> - empty
f8f99792 1632 warn "$0: $podfile: invalid X<> in paragraph $paragraph.\n"
2a28b791
GS
1633 unless $$rstr =~ s/^>//;
1634
1635 } else {
c68ea5d1
GS
1636 my $term = pattern $closing;
1637 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 1638 # all others: either recurse into new function or
c68ea5d1 1639 # terminate at closing angle bracket(s)
2a28b791 1640 my $pt = $1;
c68ea5d1 1641 $pt .= $2 if !$3 && $lev == 1;
2a28b791 1642 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
c68ea5d1
GS
1643 return $res if !$3 && $lev > 1;
1644 if( $3 ){
1645 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1646 }
2a28b791
GS
1647 }
1648 if( $lev == 1 ){
1649 $res .= pure_text( $$rstr );
54310121 1650 } else {
f8f99792 1651 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.\n";
54310121
PP
1652 }
1653 }
2a28b791
GS
1654 return $res;
1655}
54310121 1656
2a28b791
GS
1657#
1658# go_ahead: extract text of an IS (can be nested)
1659#
c68ea5d1
GS
1660sub go_ahead($$$){
1661 my( $rstr, $func, $closing ) = @_;
2a28b791 1662 my $res = '';
c68ea5d1
GS
1663 my @closing = ($closing);
1664 while( $$rstr =~
1665 s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
2a28b791 1666 $res .= $1;
c68ea5d1
GS
1667 unless( $3 ){
1668 shift @closing;
1669 return $res unless @closing;
2a28b791 1670 } else {
c68ea5d1 1671 unshift @closing, closing $4;
2a28b791
GS
1672 }
1673 $res .= $2;
1674 }
f8f99792 1675 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.\n";
2a28b791 1676 return $res;
54310121
PP
1677}
1678
1679#
2a28b791
GS
1680# emit_C - output result of C<text>
1681# $text is the depod-ed text
54310121 1682#
02369fa5
GS
1683sub emit_C($;$$){
1684 my( $text, $nocode, $args ) = @_;
60a48b2d 1685 $args = '' unless defined $args;
2a28b791
GS
1686 my $res;
1687 my( $url, $fid ) = coderef( undef(), $text );
1688
1689 # need HTML-safe text
02369fa5 1690 my $linktext = html_escape( "$text$args" );
2a28b791
GS
1691
1692 if( defined( $url ) &&
1693 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
59ecbafa 1694 $res = "<a href=\"$url\"><code>$linktext</code></a>";
2a28b791
GS
1695 } elsif( 0 && $nocode ){
1696 $res = $linktext;
1697 } else {
59ecbafa 1698 $res = "<code>$linktext</code>";
2a28b791
GS
1699 }
1700 return $res;
54310121
PP
1701}
1702
1703#
2a28b791
GS
1704# html_escape: make text safe for HTML
1705#
1706sub html_escape {
1707 my $rest = $_[0];
1708 $rest =~ s/&/&amp;/g;
1709 $rest =~ s/</&lt;/g;
1710 $rest =~ s/>/&gt;/g;
1711 $rest =~ s/"/&quot;/g;
5b25816d
RS
1712 # &apos; is only in XHTML, not HTML4. Be conservative
1713 #$rest =~ s/'/&apos;/g;
2a28b791 1714 return $rest;
59ecbafa 1715}
2a28b791
GS
1716
1717
1718#
39e571d4
ML
1719# dosify - convert filenames to 8.3
1720#
1721sub dosify {
1722 my($str) = @_;
fe4c6be1 1723 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4
ML
1724 if ($Is83) {
1725 $str = lc $str;
1726 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1727 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1728 }
1729 return $str;
1730}
1731
1732#
d1be9408 1733# page_sect - make a URL from the text of a L<>
54310121 1734#
2a28b791
GS
1735sub page_sect($$) {
1736 my( $page, $section ) = @_;
1737 my( $linktext, $page83, $link); # work strings
1738
1739 # check if we know that this is a section in this page
1740 if (!defined $pages{$page} && defined $sections{$page}) {
1741 $section = $page;
1742 $page = "";
1743 ### print STDERR "reset page='', section=$section\n";
54310121
PP
1744 }
1745
39e571d4
ML
1746 $page83=dosify($page);
1747 $page=$page83 if (defined $pages{$page83});
54310121 1748 if ($page eq "") {
0d396dd4 1749 $link = "#" . anchorify( $section );
350ccacd 1750 } elsif ( $page =~ /::/ ) {
350ccacd 1751 $page =~ s,::,/,g;
29f227c9
BS
1752 # Search page cache for an entry keyed under the html page name,
1753 # then look to see what directory that page might be in. NOTE:
1754 # this will only find one page. A better solution might be to produce
1755 # an intermediate page that is an index to all such pages.
1756 my $page_name = $page ;
fe6f1558 1757 $page_name =~ s,^.*/,,s ;
59ecbafa
VS
1758 if ( defined( $pages{ $page_name } ) &&
1759 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
29f227c9
BS
1760 ) {
1761 $page = $1 ;
1762 }
1763 else {
1764 # NOTE: This branch assumes that all A::B pages are located in
1765 # $htmlroot/A/B.html . This is often incorrect, since they are
1766 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1767 # analyze the contents of %pages and figure out where any
1768 # cousins of A::B are, then assume that. So, if A::B isn't found,
1769 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1770 # lib/A/B.pm. This is also limited, but it's an improvement.
1771 # Maybe a hints file so that the links point to the correct places
2a28b791
GS
1772 # nonetheless?
1773
29f227c9 1774 }
350ccacd 1775 $link = "$htmlroot/$page.html";
0d396dd4 1776 $link .= "#" . anchorify( $section ) if ($section);
54310121 1777 } elsif (!defined $pages{$page}) {
54310121 1778 $link = "";
54310121 1779 } else {
0d396dd4 1780 $section = anchorify( $section ) if $section ne "";
2a28b791 1781 ### print STDERR "...section=$section\n";
54310121
PP
1782
1783 # if there is a directory by the name of the page, then assume that an
1784 # appropriate section will exist in the subdirectory
29f227c9
BS
1785# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1786 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1787 $link = "$htmlroot/$1/$section.html";
2a28b791 1788 ### print STDERR "...link=$link\n";
54310121
PP
1789
1790 # since there is no directory by the name of the page, the section will
1791 # have to exist within a .html of the same name. thus, make sure there
1792 # is a .pod or .pm that might become that .html
1793 } else {
2a28b791
GS
1794 $section = "#$section" if $section;
1795 ### print STDERR "...section=$section\n";
1796
54310121
PP
1797 # check if there is a .pod with the page name
1798 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1799 $link = "$htmlroot/$1.html$section";
1800 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1801 $link = "$htmlroot/$1.html$section";
1802 } else {
54310121 1803 $link = "";
54310121
PP
1804 }
1805 }
1806 }
1807
54310121 1808 if ($link) {
29f227c9
BS
1809 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1810 # implies $htmlroot eq ''. This means that the link in question
1811 # needs a prefix of $htmldir if it begins with '/'. The test for
1812 # the initial '/' is done to avoid '#'-only links, and to allow
1813 # for other kinds of links, like file:, ftp:, etc.
1814 my $url ;
1815 if ( $htmlfileurl ne '' ) {
fe6f1558 1816 $link = "$htmldir$link" if $link =~ m{^/}s;
2a28b791
GS
1817 $url = relativize_url( $link, $htmlfileurl );
1818# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9
BS
1819 }
1820 else {
1821 $url = $link ;
1822 }
2a28b791 1823 return $url;
29f227c9 1824
54310121 1825 } else {
2a28b791 1826 return undef();
54310121 1827 }
54310121
PP
1828}
1829
1830#
29f227c9
BS
1831# relativize_url - convert an absolute URL to one relative to a base URL.
1832# Assumes both end in a filename.
1833#
1834sub relativize_url {
1835 my ($dest,$source) = @_ ;
1836
59ecbafa 1837 my ($dest_volume,$dest_directory,$dest_file) =
29f227c9
BS
1838 File::Spec::Unix->splitpath( $dest ) ;
1839 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1840
59ecbafa 1841 my ($source_volume,$source_directory,$source_file) =
29f227c9
BS
1842 File::Spec::Unix->splitpath( $source ) ;
1843 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1844
1845 my $rel_path = '' ;
1846 if ( $dest ne '' ) {
1847 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1848 }
1849
59ecbafa 1850 if ( $rel_path ne '' &&
29f227c9 1851 substr( $rel_path, -1 ) ne '/' &&
59ecbafa 1852 substr( $dest_file, 0, 1 ) ne '#'
29f227c9
BS
1853 ) {
1854 $rel_path .= "/$dest_file" ;
1855 }
1856 else {
1857 $rel_path .= "$dest_file" ;
1858 }
1859
1860 return $rel_path ;
1861}
1862
54310121
PP
1863
1864#
2a28b791 1865# coderef - make URL from the text of a C<>
54310121 1866#
2a28b791
GS
1867sub coderef($$){
1868 my( $page, $item ) = @_;
1869 my( $url );
1870
1871 my $fid = fragment_id( $item );
2a28b791
GS
1872 if( defined( $page ) ){
1873 # we have been given a $page...
1874 $page =~ s{::}{/}g;
1875
1876 # Do we take it? Item could be a section!
228a48a5 1877 my $base = $items{$fid} || "";
2a28b791
GS
1878 $base =~ s{[^/]*/}{};
1879 if( $base ne "$page.html" ){
1880 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1881 $page = undef();
1882 }
54310121 1883
2a28b791
GS
1884 } else {
1885 # no page - local items precede cached items
67398a75
GS
1886 if( defined( $fid ) ){
1887 if( exists $local_items{$fid} ){
1888 $page = $local_items{$fid};
1889 } else {
1890 $page = $items{$fid};
1891 }
2a28b791
GS
1892 }
1893 }
54310121
PP
1894
1895 # if there was a pod file that we found earlier with an appropriate
1896 # =item directive, then create a link to that page.
2a28b791
GS
1897 if( defined $page ){
1898 if( $page ){
228a48a5 1899 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1900 $page = $1 . '.html';
29f227c9 1901 }
2beb85b7 1902 my $link = "$htmlroot/$page#item_" . anchorify($fid);
54310121 1903
2a28b791
GS
1904 # Here, we take advantage of the knowledge that $htmlfileurl
1905 # ne '' implies $htmlroot eq ''.
1906 if ( $htmlfileurl ne '' ) {
1907 $link = "$htmldir$link" ;
1908 $url = relativize_url( $link, $htmlfileurl ) ;
1909 } else {
1910 $url = $link ;
1911 }
1912 } else {
2beb85b7 1913 $url = "#item_" . anchorify($fid);
2a28b791 1914 }
54310121 1915
2a28b791 1916 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
59ecbafa 1917 }
2a28b791 1918 return( $url, $fid );
54310121
PP
1919}
1920
54310121
PP
1921
1922
1923#
29f227c9
BS
1924# Adapted from Nick Ing-Simmons' PodToHtml package.
1925sub relative_url {
1926 my $source_file = shift ;
1927 my $destination_file = shift;
1928
1929 my $source = URI::file->new_abs($source_file);
1930 my $uo = URI::file->new($destination_file,$source)->abs;
1931 return $uo->rel->as_string;
1932}
1933
1934
1935#
54310121
PP
1936# finish_list - finish off any pending HTML lists. this should be called
1937# after the entire pod file has been read and converted.
1938#
1939sub finish_list {
7b8d334a 1940 while ($listlevel > 0) {
59ecbafa 1941 print HTML "</dl>\n";
54310121
PP
1942 $listlevel--;
1943 }
1944}
1945
1946#
1947# htmlify - converts a pod section specification to a suitable section
2a28b791
GS
1948# specification for HTML. Note that we keep spaces and special characters
1949# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121
PP
1950#
1951sub htmlify {
2a28b791
GS
1952 my( $heading) = @_;
1953 $heading =~ s/(\s+)/ /g;
1954 $heading =~ s/\s+\Z//;
1955 $heading =~ s/\A\s+//;
1956 # The hyphen is a disgrace to the English language.
1957 $heading =~ s/[-"?]//g;
1958 $heading = lc( $heading );
1959 return $heading;
1960}
54310121 1961
2a28b791 1962#
776d31fe 1963# similar to htmlify, but turns non-alphanumerics into underscores
0d396dd4
SB
1964#
1965sub anchorify {
1966 my ($anchor) = @_;
1967 $anchor = htmlify($anchor);
776d31fe 1968 $anchor =~ s/\W/_/g;
0d396dd4
SB
1969 return $anchor;
1970}
1971
1972#
2a28b791
GS
1973# depod - convert text by eliminating all interior sequences
1974# Note: can be called with copy or modify semantics
1975#
1976my %E2c;
67398a75
GS
1977$E2c{lt} = '<';
1978$E2c{gt} = '>';
1979$E2c{sol} = '/';
2a28b791 1980$E2c{verbar} = '|';
67398a75 1981$E2c{amp} = '&'; # in Tk's pods
2a28b791 1982
c68ea5d1 1983sub depod1($;$$);
7ba65c74 1984
2a28b791
GS
1985sub depod($){
1986 my $string;
1987 if( ref( $_[0] ) ){
1988 $string = ${$_[0]};
1989 ${$_[0]} = depod1( \$string );
1990 } else {
1991 $string = $_[0];
1992 depod1( \$string );
59ecbafa 1993 }
2a28b791 1994}
54310121 1995
c68ea5d1
GS
1996sub depod1($;$$){
1997 my( $rstr, $func, $closing ) = @_;
2a28b791 1998 my $res = '';
228a48a5 1999 return $res unless defined $$rstr;
2a28b791
GS
2000 if( ! defined( $func ) ){
2001 # skip to next begin of an interior sequence
c68ea5d1 2002 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
2a28b791 2003 # recurse into its text
c68ea5d1 2004 $res .= $1 . depod1( $rstr, $2, closing $3);
2a28b791
GS
2005 }
2006 $res .= $$rstr;
2007 } elsif( $func eq 'E' ){
2008 # E<x> - convert to character
be3174d2 2009 $$rstr =~ s/^([^>]*)>//;
228a48a5 2010 $res .= $E2c{$1} || "";
2a28b791
GS
2011 } elsif( $func eq 'X' ){
2012 # X<> - ignore
2013 $$rstr =~ s/^[^>]*>//;
2014 } elsif( $func eq 'Z' ){
59ecbafa 2015 # Z<> - empty
2a28b791
GS
2016 $$rstr =~ s/^>//;
2017 } else {
2018 # all others: either recurse into new function or
2019 # terminate at closing angle bracket
c68ea5d1
GS
2020 my $term = pattern $closing;
2021 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
2a28b791 2022 $res .= $1;
c68ea5d1
GS
2023 last unless $3;
2024 $res .= depod1( $rstr, $3, closing $4 );
2a28b791
GS
2025 }
2026 ## If we're here and $2 ne '>': undelimited interior sequence.
2027 ## Ignored, as this is called without proper indication of where we are.
2028 ## Rely on process_text to produce diagnostics.
2029 }
2030 return $res;
2031}
54310121 2032
2a28b791
GS
2033#
2034# fragment_id - construct a fragment identifier from:
2035# a) =item text
2036# b) contents of C<...>
2037#
2038my @hc;
2039sub fragment_id {
2040 my $text = shift();
2041 $text =~ s/\s+\Z//s;
2042 if( $text ){
2043 # a method or function?
2044 return $1 if $text =~ /(\w+)\s*\(/;
2045 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2046
2047 # a variable name?
2048 return $1 if $text =~ /^([$@%*]\S+)/;
2049
2050 # some pattern matching operator?
2051 return $1 if $text =~ m|^(\w+/).*/\w*$|;
2052
2053 # fancy stuff... like "do { }"
2054 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2055
2056 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2057 # and some funnies with ... Module ...
2058 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
2059 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2060
2061 # text? normalize!
2062 $text =~ s/\s+/_/sg;
2063 $text =~ s{(\W)}{
2064 defined( $hc[ord($1)] ) ? $hc[ord($1)]
2065 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2066 $text = substr( $text, 0, 50 );
2067 } else {
2068 return undef();
2069 }
54310121
PP
2070}
2071
2a28b791
GS
2072#
2073# make_URL_href - generate HTML href from URL
2074# Special treatment for CGI queries.
2075#
2076sub make_URL_href($){
2077 my( $url ) = @_;
59ecbafa
VS
2078 if( $url !~
2079 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2080 $url = "<a href=\"$url\">$url</a>";
2a28b791
GS
2081 }
2082 return $url;
54310121
PP
2083}
2084
20851;