This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[inseparable changes from changes to perl-5.004_01-mt2]
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121
PP
1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
5require Exporter;
6@ISA = Exporter;
7@EXPORT = qw(pod2html htmlify);
8use Cwd;
9
10use Carp;
11
12use strict;
13
14=head1 NAME
15
16Pod::HTML - module to convert pod files to HTML
17
18=head1 SYNOPSIS
19
20 use Pod::HTML;
21 pod2html([options]);
22
23=head1 DESCRIPTION
24
25Converts files from pod format (see L<perlpod>) to HTML format. It
26can automatically generate indexes and cross-references, and it keeps
27a cache of things it knows how to cross-reference.
28
29=head1 ARGUMENTS
30
31Pod::Html takes the following arguments:
32
33=over 4
34
35=item help
36
37 --help
38
39Displays the usage message.
40
41=item htmlroot
42
43 --htmlroot=name
44
45Sets the base URL for the HTML files. When cross-references are made,
46the HTML root is prepended to the URL.
47
48=item infile
49
50 --infile=name
51
52Specify the pod file to convert. Input is taken from STDIN if no
53infile is specified.
54
55=item outfile
56
57 --outfile=name
58
59Specify the HTML file to create. Output goes to STDOUT if no outfile
60is specified.
61
62=item podroot
63
64 --podroot=name
65
66Specify the base directory for finding library pods.
67
68=item podpath
69
70 --podpath=name:...:name
71
72Specify which subdirectories of the podroot contain pod files whose
73HTML converted forms can be linked-to in cross-references.
74
75=item libpods
76
77 --libpods=name:...:name
78
79List of page names (eg, "perlfunc") which contain linkable C<=item>s.
80
81=item netscape
82
83 --netscape
84
85Use Netscape HTML directives when applicable.
86
87=item nonetscape
88
89 --nonetscape
90
91Do not use Netscape HTML directives (default).
92
93=item index
94
95 --index
96
97Generate an index at the top of the HTML file (default behaviour).
98
99=item noindex
100
101 --noindex
102
103Do not generate an index at the top of the HTML file.
104
105
106=item recurse
107
108 --recurse
109
110Recurse into subdirectories specified in podpath (default behaviour).
111
112=item norecurse
113
114 --norecurse
115
116Do not recurse into subdirectories specified in podpath.
117
118=item title
119
120 --title=title
121
122Specify the title of the resulting HTML file.
123
124=item verbose
125
126 --verbose
127
128Display progress messages.
129
130=back
131
132=head1 EXAMPLE
133
134 pod2html("pod2html",
135 "--podpath=lib:ext:pod:vms",
136 "--podroot=/usr/src/perl",
137 "--htmlroot=/perl/nmanual",
138 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
139 "--recurse",
140 "--infile=foo.pod",
141 "--outfile=/perl/nmanual/foo.html");
142
143=head1 AUTHOR
144
145Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
146
147=head1 BUGS
148
149Has trouble with C<> etc in = commands.
150
151=head1 SEE ALSO
152
153L<perlpod>
154
155=head1 COPYRIGHT
156
157This program is distributed under the Artistic License.
158
159=cut
160
161my $dircache = "pod2html-dircache";
162my $itemcache = "pod2html-itemcache";
163
164my @begin_stack = (); # begin/end stack
165
166my @libpods = (); # files to search for links from C<> directives
167my $htmlroot = "/"; # http-server base directory from which all
168 # relative paths in $podpath stem.
169my $htmlfile = ""; # write to stdout by default
170my $podfile = ""; # read from stdin by default
171my @podpath = (); # list of directories containing library pods.
172my $podroot = "."; # filesystem base directory from which all
173 # relative paths in $podpath stem.
174my $recurse = 1; # recurse on subdirectories in $podpath.
175my $verbose = 0; # not verbose by default
176my $doindex = 1; # non-zero if we should generate an index
177my $listlevel = 0; # current list depth
178my @listitem = (); # stack of HTML commands to use when a =item is
179 # encountered. the top of the stack is the
180 # current list.
181my @listdata = (); # similar to @listitem, but for the text after
182 # an =item
183my @listend = (); # similar to @listitem, but the text to use to
184 # end the list.
185my $ignore = 1; # whether or not to format text. we don't
186 # format text until we hit our first pod
187 # directive.
188
189my %items_named = (); # for the multiples of the same item in perlfunc
190my @items_seen = ();
191my $netscape = 0; # whether or not to use netscape directives.
192my $title; # title to give the pod(s)
193my $top = 1; # true if we are at the top of the doc. used
194 # to prevent the first <HR> directive.
195my $paragraph; # which paragraph we're processing (used
196 # for error messages)
197my %pages = (); # associative array used to find the location
198 # of pages referenced by L<> links.
199my %sections = (); # sections within this page
200my %items = (); # associative array used to find the location
201 # of =item directives referenced by C<> links
202sub init_globals {
203$dircache = "pod2html-dircache";
204$itemcache = "pod2html-itemcache";
205
206@begin_stack = (); # begin/end stack
207
208@libpods = (); # files to search for links from C<> directives
209$htmlroot = "/"; # http-server base directory from which all
210 # relative paths in $podpath stem.
211$htmlfile = ""; # write to stdout by default
212$podfile = ""; # read from stdin by default
213@podpath = (); # list of directories containing library pods.
214$podroot = "."; # filesystem base directory from which all
215 # relative paths in $podpath stem.
216$recurse = 1; # recurse on subdirectories in $podpath.
217$verbose = 0; # not verbose by default
218$doindex = 1; # non-zero if we should generate an index
219$listlevel = 0; # current list depth
220@listitem = (); # stack of HTML commands to use when a =item is
221 # encountered. the top of the stack is the
222 # current list.
223@listdata = (); # similar to @listitem, but for the text after
224 # an =item
225@listend = (); # similar to @listitem, but the text to use to
226 # end the list.
227$ignore = 1; # whether or not to format text. we don't
228 # format text until we hit our first pod
229 # directive.
230
231@items_seen = ();
232%items_named = ();
233$netscape = 0; # whether or not to use netscape directives.
234$title = ''; # title to give the pod(s)
235$top = 1; # true if we are at the top of the doc. used
236 # to prevent the first <HR> directive.
237$paragraph = ''; # which paragraph we're processing (used
238 # for error messages)
239%pages = (); # associative array used to find the location
240 # of pages referenced by L<> links.
241%sections = (); # sections within this page
242%items = (); # associative array used to find the location
243 # of =item directives referenced by C<> links
244
245}
246
247sub pod2html {
248 local(@ARGV) = @_;
249 local($/);
250 local $_;
251
252 init_globals();
253
254 # cache of %pages and %items from last time we ran pod2html
255 my $podpath = '';
256
257 #undef $opt_help if defined $opt_help;
258
259 # parse the command-line parameters
260 parse_command_line();
261
262 # set some variables to their default values if necessary
263 local *POD;
264 unless (@ARGV && $ARGV[0]) {
265 $podfile = "-" unless $podfile; # stdin
266 open(POD, "<$podfile")
267 || die "$0: cannot open $podfile file for input: $!\n";
268 } else {
269 $podfile = $ARGV[0]; # XXX: might be more filenames
270 *POD = *ARGV;
271 }
272 $htmlfile = "-" unless $htmlfile; # stdout
273 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
274
275 # read the pod a paragraph at a time
276 warn "Scanning for sections in input file(s)\n" if $verbose;
277 $/ = "";
278 my @poddata = <POD>;
279 close(POD);
280
281 # scan the pod for =head[1-6] directives and build an index
282 my $index = scan_headings(\%sections, @poddata);
283
284 # open the output file
285 open(HTML, ">$htmlfile")
286 || die "$0: cannot open $htmlfile file for output: $!\n";
287
288 # put a title in the HTML file
289 $title = '';
290 TITLE_SEARCH: {
291 for (my $i = 0; $i < @poddata; $i++) {
292 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
293 for my $para ( @poddata[$i, $i+1] ) {
294 last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+\s*.*)/s;
295 }
296 }
297
298 }
299 }
300 unless ($title) {
301 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
302 $title = ($podfile eq "-" ? 'No Title' : $1);
303 warn "found $title" if $verbose;
304 }
305 if ($title =~ /\.pm/) {
306 warn "$0: no title for $podfile";
307 $title = $podfile;
308 }
309 print HTML <<END_OF_HEAD;
310 <HTML>
311 <HEAD>
312 <TITLE>$title</TITLE>
313 </HEAD>
314
315 <BODY>
316
317END_OF_HEAD
318
319 # load a cache of %pages and %items if possible. $tests will be
320 # non-zero if successful.
321 my $tests = 0;
322 if (-f $dircache && -f $itemcache) {
323 warn "scanning for item cache\n" if $verbose;
324 $tests = find_cache($dircache, $itemcache, $podpath, $podroot);
325 }
326
327 # if we didn't succeed in loading the cache then we must (re)build
328 # %pages and %items.
329 if (!$tests) {
330 warn "scanning directories in pod-path\n" if $verbose;
331 scan_podpath($podroot, $recurse);
332 }
333
334 # scan the pod for =item directives
335 scan_items("", \%items, @poddata);
336
337 # put an index at the top of the file. note, if $doindex is 0 we
338 # still generate an index, but surround it with an html comment.
339 # that way some other program can extract it if desired.
340 $index =~ s/--+/-/g;
341 print HTML "<!-- INDEX BEGIN -->\n";
342 print HTML "<!--\n" unless $doindex;
343 print HTML $index;
344 print HTML "-->\n" unless $doindex;
345 print HTML "<!-- INDEX END -->\n\n";
346 print HTML "<HR>\n" if $doindex;
347
348 # now convert this file
349 warn "Converting input file\n" if $verbose;
350 foreach my $i (0..$#poddata) {
351 $_ = $poddata[$i];
352 $paragraph = $i+1;
353 if (/^(=.*)/s) { # is it a pod directive?
354 $ignore = 0;
355 $_ = $1;
356 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
357 process_begin($1, $2);
358 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
359 process_end($1, $2);
360 } elsif (/^=cut/) { # =cut
361 process_cut();
362 } elsif (/^=pod/) { # =pod
363 process_pod();
364 } else {
365 next if @begin_stack && $begin_stack[-1] ne 'html';
366
367 if (/^=(head[1-6])\s+(.*)/s) { # =head[1-6] heading
368 process_head($1, $2);
369 } elsif (/^=item\s*(.*)/sm) { # =item text
370 process_item($1);
371 } elsif (/^=over\s*(.*)/) { # =over N
372 process_over();
373 } elsif (/^=back/) { # =back
374 process_back();
375 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
376 process_for($1,$2);
377 } else {
378 /^=(\S*)\s*/;
379 warn "$0: $podfile: unknown pod directive '$1' in "
380 . "paragraph $paragraph. ignoring.\n";
381 }
382 }
383 $top = 0;
384 }
385 else {
386 next if $ignore;
387 next if @begin_stack && $begin_stack[-1] ne 'html';
388 my $text = $_;
389 process_text(\$text, 1);
390 print HTML "$text\n<P>\n\n";
391 }
392 }
393
394 # finish off any pending directives
395 finish_list();
396 print HTML <<END_OF_TAIL;
397 </BODY>
398
399 </HTML>
400END_OF_TAIL
401
402 # close the html file
403 close(HTML);
404
405 warn "Finished\n" if $verbose;
406}
407
408##############################################################################
409
410my $usage; # see below
411sub usage {
412 my $podfile = shift;
413 warn "$0: $podfile: @_\n" if @_;
414 die $usage;
415}
416
417$usage =<<END_OF_USAGE;
418Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
419 --podpath=<name>:...:<name> --podroot=<name>
420 --libpods=<name>:...:<name> --recurse --verbose --index
421 --netscape --norecurse --noindex
422
423 --flush - flushes the item and directory caches.
424 --help - prints this message.
425 --htmlroot - http-server base directory from which all relative paths
426 in podpath stem (default is /).
427 --index - generate an index at the top of the resulting html
428 (default).
429 --infile - filename for the pod to convert (input taken from stdin
430 by default).
431 --libpods - colon-separated list of pages to search for =item pod
432 directives in as targets of C<> and implicit links (empty
433 by default). note, these are not filenames, but rather
434 page names like those that appear in L<> links.
435 --netscape - will use netscape html directives when applicable.
436 --nonetscape - will not use netscape directives (default).
437 --outfile - filename for the resulting html file (output sent to
438 stdout by default).
439 --podpath - colon-separated list of directories containing library
440 pods. empty by default.
441 --podroot - filesystem base directory from which all relative paths
442 in podpath stem (default is .).
443 --noindex - don't generate an index at the top of the resulting html.
444 --norecurse - don't recurse on those subdirectories listed in podpath.
445 --recurse - recurse on those subdirectories listed in podpath
446 (default behavior).
447 --title - title that will appear in resulting html file.
448 --verbose - self-explanatory
449
450END_OF_USAGE
451
452sub parse_command_line {
453 my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
454 my $result = GetOptions(
455 'flush' => \$opt_flush,
456 'help' => \$opt_help,
457 'htmlroot=s' => \$opt_htmlroot,
458 'index!' => \$opt_index,
459 'infile=s' => \$opt_infile,
460 'libpods=s' => \$opt_libpods,
461 'netscape!' => \$opt_netscape,
462 'outfile=s' => \$opt_outfile,
463 'podpath=s' => \$opt_podpath,
464 'podroot=s' => \$opt_podroot,
465 'norecurse' => \$opt_norecurse,
466 'recurse!' => \$opt_recurse,
467 'title=s' => \$opt_title,
468 'verbose' => \$opt_verbose,
469 );
470 usage("-", "invalid parameters") if not $result;
471
472 usage("-") if defined $opt_help; # see if the user asked for help
473 $opt_help = ""; # just to make -w shut-up.
474
475 $podfile = $opt_infile if defined $opt_infile;
476 $htmlfile = $opt_outfile if defined $opt_outfile;
477
478 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
479 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
480
481 warn "Flushing item and directory caches\n"
482 if $opt_verbose && defined $opt_flush;
483 unlink($dircache, $itemcache) if defined $opt_flush;
484
485 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
486 $podroot = $opt_podroot if defined $opt_podroot;
487
488 $doindex = $opt_index if defined $opt_index;
489 $recurse = $opt_recurse if defined $opt_recurse;
490 $title = $opt_title if defined $opt_title;
491 $verbose = defined $opt_verbose ? 1 : 0;
492 $netscape = $opt_netscape if defined $opt_netscape;
493}
494
495#
496# find_cache - tries to find if the caches stored in $dircache and $itemcache
497# are valid caches of %pages and %items. if they are valid then it loads
498# them and returns a non-zero value.
499#
500sub find_cache {
501 my($dircache, $itemcache, $podpath, $podroot) = @_;
502 my($tests);
503 local $_;
504
505 $tests = 0;
506
507 open(CACHE, "<$itemcache") ||
508 die "$0: error opening $itemcache for reading: $!\n";
509 $/ = "\n";
510
511 # is it the same podpath?
512 $_ = <CACHE>;
513 chomp($_);
514 $tests++ if (join(":", @podpath) eq $_);
515
516 # is it the same podroot?
517 $_ = <CACHE>;
518 chomp($_);
519 $tests++ if ($podroot eq $_);
520
521 # load the cache if its good
522 if ($tests != 2) {
523 close(CACHE);
524
525 %items = ();
526 return 0;
527 }
528
529 warn "loading item cache\n" if $verbose;
530 while (<CACHE>) {
531 /(.*?) (.*)$/;
532 $items{$1} = $2;
533 }
534 close(CACHE);
535
536 warn "scanning for directory cache\n" if $verbose;
537 open(CACHE, "<$dircache") ||
538 die "$0: error opening $dircache for reading: $!\n";
539 $/ = "\n";
540 $tests = 0;
541
542 # is it the same podpath?
543 $_ = <CACHE>;
544 chomp($_);
545 $tests++ if (join(":", @podpath) eq $_);
546
547 # is it the same podroot?
548 $_ = <CACHE>;
549 chomp($_);
550 $tests++ if ($podroot eq $_);
551
552 # load the cache if its good
553 if ($tests != 2) {
554 close(CACHE);
555
556 %pages = ();
557 %items = ();
558 return 0;
559 }
560
561 warn "loading directory cache\n" if $verbose;
562 while (<CACHE>) {
563 /(.*?) (.*)$/;
564 $pages{$1} = $2;
565 }
566
567 close(CACHE);
568
569 return 1;
570}
571
572#
573# scan_podpath - scans the directories specified in @podpath for directories,
574# .pod files, and .pm files. it also scans the pod files specified in
575# @libpods for =item directives.
576#
577sub scan_podpath {
578 my($podroot, $recurse) = @_;
579 my($pwd, $dir);
580 my($libpod, $dirname, $pod, @files, @poddata);
581
582 # scan each directory listed in @podpath
583 $pwd = getcwd();
584 chdir($podroot)
585 || die "$0: error changing to directory $podroot: $!\n";
586 foreach $dir (@podpath) {
587 scan_dir($dir, $recurse);
588 }
589
590 # scan the pods listed in @libpods for =item directives
591 foreach $libpod (@libpods) {
592 # if the page isn't defined then we won't know where to find it
593 # on the system.
594 next unless defined $pages{$libpod} && $pages{$libpod};
595
596 # if there is a directory then use the .pod and .pm files within it.
597 if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
598 # find all the .pod and .pm files within the directory
599 $dirname = $1;
600 opendir(DIR, $dirname) ||
601 die "$0: error opening directory $dirname: $!\n";
602 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
603 closedir(DIR);
604
605 # scan each .pod and .pm file for =item directives
606 foreach $pod (@files) {
607 open(POD, "<$dirname/$pod") ||
608 die "$0: error opening $dirname/$pod for input: $!\n";
609 @poddata = <POD>;
610 close(POD);
611
612 scan_items("$dirname/$pod", @poddata);
613 }
614
615 # use the names of files as =item directives too.
616 foreach $pod (@files) {
617 $pod =~ /^(.*)(\.pod|\.pm)$/;
618 $items{$1} = "$dirname/$1.html" if $1;
619 }
620 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
621 $pages{$libpod} =~ /([^:]*\.pm):/) {
622 # scan the .pod or .pm file for =item directives
623 $pod = $1;
624 open(POD, "<$pod") ||
625 die "$0: error opening $pod for input: $!\n";
626 @poddata = <POD>;
627 close(POD);
628
629 scan_items("$pod", @poddata);
630 } else {
631 warn "$0: shouldn't be here (line ".__LINE__."\n";
632 }
633 }
634 @poddata = (); # clean-up a bit
635
636 chdir($pwd)
637 || die "$0: error changing to directory $pwd: $!\n";
638
639 # cache the item list for later use
640 warn "caching items for later use\n" if $verbose;
641 open(CACHE, ">$itemcache") ||
642 die "$0: error open $itemcache for writing: $!\n";
643
644 print CACHE join(":", @podpath) . "\n$podroot\n";
645 foreach my $key (keys %items) {
646 print CACHE "$key $items{$key}\n";
647 }
648
649 close(CACHE);
650
651 # cache the directory list for later use
652 warn "caching directories for later use\n" if $verbose;
653 open(CACHE, ">$dircache") ||
654 die "$0: error open $dircache for writing: $!\n";
655
656 print CACHE join(":", @podpath) . "\n$podroot\n";
657 foreach my $key (keys %pages) {
658 print CACHE "$key $pages{$key}\n";
659 }
660
661 close(CACHE);
662}
663
664#
665# scan_dir - scans the directory specified in $dir for subdirectories, .pod
666# files, and .pm files. notes those that it finds. this information will
667# be used later in order to figure out where the pages specified in L<>
668# links are on the filesystem.
669#
670sub scan_dir {
671 my($dir, $recurse) = @_;
672 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
673 local $_;
674
675 @subdirs = ();
676 @pods = ();
677
678 opendir(DIR, $dir) ||
679 die "$0: error opening directory $dir: $!\n";
680 while (defined($_ = readdir(DIR))) {
681 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
682 $pages{$_} = "" unless defined $pages{$_};
683 $pages{$_} .= "$dir/$_:";
684 push(@subdirs, $_);
685 } elsif (/\.pod$/) { # .pod
686 s/\.pod$//;
687 $pages{$_} = "" unless defined $pages{$_};
688 $pages{$_} .= "$dir/$_.pod:";
689 push(@pods, "$dir/$_.pod");
690 } elsif (/\.pm$/) { # .pm
691 s/\.pm$//;
692 $pages{$_} = "" unless defined $pages{$_};
693 $pages{$_} .= "$dir/$_.pm:";
694 push(@pods, "$dir/$_.pm");
695 }
696 }
697 closedir(DIR);
698
699 # recurse on the subdirectories if necessary
700 if ($recurse) {
701 foreach my $subdir (@subdirs) {
702 scan_dir("$dir/$subdir", $recurse);
703 }
704 }
705}
706
707#
708# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
709# build an index.
710#
711sub scan_headings {
712 my($sections, @data) = @_;
713 my($tag, $which_head, $title, $listdepth, $index);
714
be173d55
UC
715 # here we need local $ignore = 0;
716 # unfortunately, we can't have it, because $ignore is lexical
717 $ignore = 0;
718
54310121
PP
719 $listdepth = 0;
720 $index = "";
721
722 # scan for =head directives, note their name, and build an index
723 # pointing to each of them.
724 foreach my $line (@data) {
725 if ($line =~ /^\s*=(head)([1-6])\s+(.*)/) {
726 ($tag,$which_head, $title) = ($1,$2,$3);
727 chomp($title);
728 $$sections{htmlify(0,$title)} = 1;
729
730 if ($which_head > $listdepth) {
731 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
732 } elsif ($which_head < $listdepth) {
733 $listdepth--;
734 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
735 }
736 $listdepth = $which_head;
737
738 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
be173d55
UC
739 "<A HREF=\"#" . htmlify(0,$title) . "\">" .
740 process_text(\$title, 0) . "</A>";
54310121
PP
741 }
742 }
743
744 # finish off the lists
745 while ($listdepth--) {
746 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
747 }
748
749 # get rid of bogus lists
750 $index =~ s,\t*<UL>\s*</UL>\n,,g;
751
be173d55
UC
752 $ignore = 1; # retore old value;
753
54310121
PP
754 return $index;
755}
756
757#
758# scan_items - scans the pod specified by $pod for =item directives. we
759# will use this information later on in resolving C<> links.
760#
761sub scan_items {
762 my($pod, @poddata) = @_;
763 my($i, $item);
764 local $_;
765
766 $pod =~ s/\.pod$//;
767 $pod .= ".html" if $pod;
768
769 foreach $i (0..$#poddata) {
770 $_ = $poddata[$i];
771
772 # remove any formatting instructions
773 s,[A-Z]<([^<>]*)>,$1,g;
774
775 # figure out what kind of item it is and get the first word of
776 # it's name.
777 if (/^=item\s+(\w*)\s*.*$/s) {
778 if ($1 eq "*") { # bullet list
779 /\A=item\s+\*\s*(.*?)\s*\Z/s;
780 $item = $1;
781 } elsif ($1 =~ /^[0-9]+/) { # numbered list
782 /\A=item\s+[0-9]+\.?(.*?)\s*\Z/s;
783 $item = $1;
784 } else {
785# /\A=item\s+(.*?)\s*\Z/s;
786 /\A=item\s+(\w*)/s;
787 $item = $1;
788 }
789
790 $items{$item} = "$pod" if $item;
791 }
792 }
793}
794
795#
796# process_head - convert a pod head[1-6] tag and convert it to HTML format.
797#
798sub process_head {
799 my($tag, $heading) = @_;
800 my $firstword;
801
802 # figure out the level of the =head
803 $tag =~ /head([1-6])/;
804 my $level = $1;
805
806 # can't have a heading full of spaces and speechmarks and so on
807 $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
808
809 print HTML "<P>\n" unless $listlevel;
810 print HTML "<HR>\n" unless $listlevel || $top;
811 print HTML "<H$level>"; # unless $listlevel;
812 #print HTML "<H$level>" unless $listlevel;
be173d55 813 my $convert = $heading; process_text(\$convert, 0);
54310121
PP
814 print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
815 print HTML "</H$level>"; # unless $listlevel;
816 print HTML "\n";
817}
818
819#
820# process_item - convert a pod item tag and convert it to HTML format.
821#
822sub process_item {
823 my $text = $_[0];
824 my($i, $quote, $name);
825
826 my $need_preamble = 0;
827 my $this_entry;
828
829
830 # lots of documents start a list without doing an =over. this is
831 # bad! but, the proper thing to do seems to be to just assume
832 # they did do an =over. so warn them once and then continue.
833 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
834 unless $listlevel;
835 process_over() unless $listlevel;
836
837 return unless $listlevel;
838
839 # remove formatting instructions from the text
840 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
841 pre_escape(\$text);
842
843 $need_preamble = $items_seen[$listlevel]++ == 0;
844
845 # check if this is the first =item after an =over
846 $i = $listlevel - 1;
847 my $need_new = $listlevel >= @listitem;
848
849 if ($text =~ /\A\*/) { # bullet
850
851 if ($need_preamble) {
852 push(@listend, "</UL>");
853 print HTML "<UL>\n";
854 }
855
856 print HTML "<LI><STRONG>";
857 $text =~ /\A\*\s*(.*)\Z/s;
858 print HTML "<A NAME=\"item_" . htmlify(1,$1) . "\">" if $1 && !$items_named{$1}++;
859 $quote = 1;
860 #print HTML process_puretext($1, \$quote);
861 print HTML $1;
862 print HTML "</A>" if $1;
863 print HTML "</STRONG>";
864
865 } elsif ($text =~ /\A[0-9#]+/) { # numbered list
866
867 if ($need_preamble) {
868 push(@listend, "</OL>");
869 print HTML "<OL>\n";
870 }
871
872 print HTML "<LI><STRONG>";
873 $text =~ /\A[0-9]+\.?(.*)\Z/s;
874 print HTML "<A NAME=\"item_" . htmlify(0,$1) . "\">" if $1;
875 $quote = 1;
876 #print HTML process_puretext($1, \$quote);
877 print HTML $1 if $1;
878 print HTML "</A>" if $1;
879 print HTML "</STRONG>";
880
881 } else { # all others
882
883 if ($need_preamble) {
884 push(@listend, '</DL>');
885 print HTML "<DL>\n";
886 }
887
888 print HTML "<DT><STRONG>";
889 print HTML "<A NAME=\"item_" . htmlify(1,$text) . "\">"
890 if $text && !$items_named{($text =~ /(\S+)/)[0]}++;
891 # preceding craziness so that the duplicate leading bits in
892 # perlfunc work to find just the first one. otherwise
893 # open etc would have many names
894 $quote = 1;
895 #print HTML process_puretext($text, \$quote);
896 print HTML $text;
897 print HTML "</A>" if $text;
898 print HTML "</STRONG>";
899
900 print HTML '<DD>';
901 }
902
903 print HTML "\n";
904}
905
906#
907# process_over - process a pod over tag and start a corresponding HTML
908# list.
909#
910sub process_over {
911 # start a new list
912 $listlevel++;
913}
914
915#
916# process_back - process a pod back tag and convert it to HTML format.
917#
918sub process_back {
2ceaccd7 919 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
54310121
PP
920 unless $listlevel;
921 return unless $listlevel;
922
923 # close off the list. note, I check to see if $listend[$listlevel] is
924 # defined because an =item directive may have never appeared and thus
925 # $listend[$listlevel] may have never been initialized.
926 $listlevel--;
927 print HTML $listend[$listlevel] if defined $listend[$listlevel];
928 print HTML "\n";
929
930 # don't need the corresponding perl code anymore
931 pop(@listitem);
932 pop(@listdata);
933 pop(@listend);
934
935 pop(@items_seen);
936}
937
938#
939# process_cut - process a pod cut tag, thus stop ignoring pod directives.
940#
941sub process_cut {
942 $ignore = 1;
943}
944
945#
946# process_pod - process a pod pod tag, thus ignore pod directives until we see a
947# corresponding cut.
948#
949sub process_pod {
950 # no need to set $ignore to 0 cause the main loop did it
951}
952
953#
954# process_for - process a =for pod tag. if it's for html, split
955# it out verbatim, otherwise ignore it.
956#
957sub process_for {
958 my($whom, $text) = @_;
959 if ( $whom =~ /^(pod2)?html$/i) {
960 print HTML $text;
961 }
962}
963
964#
965# process_begin - process a =begin pod tag. this pushes
966# whom we're beginning on the begin stack. if there's a
967# begin stack, we only print if it us.
968#
969sub process_begin {
970 my($whom, $text) = @_;
971 $whom = lc($whom);
972 push (@begin_stack, $whom);
973 if ( $whom =~ /^(pod2)?html$/) {
974 print HTML $text if $text;
975 }
976}
977
978#
979# process_end - process a =end pod tag. pop the
980# begin stack. die if we're mismatched.
981#
982sub process_end {
983 my($whom, $text) = @_;
984 $whom = lc($whom);
985 if ($begin_stack[-1] ne $whom ) {
986 die "Unmatched begin/end at chunk $paragraph\n"
987 }
988 pop @begin_stack;
989}
990
991#
992# process_text - handles plaintext that appears in the input pod file.
993# there may be pod commands embedded within the text so those must be
994# converted to html commands.
995#
996sub process_text {
997 my($text, $escapeQuotes) = @_;
998 my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
999 my($podcommand, $params, $tag, $quote);
1000
1001 return if $ignore;
1002
1003 $quote = 0; # status of double-quote conversion
1004 $result = "";
1005 $rest = $$text;
1006
1007 if ($rest =~ /^\s+/) { # preformatted text, no pod directives
be173d55
UC
1008 $rest =~ s/\n+\Z//;
1009 $rest =~ s#.*#
1010 my $line = $&;
1011 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1012 $line;
1013 #eg;
54310121
PP
1014
1015 $rest =~ s/&/&amp;/g;
1016 $rest =~ s/</&lt;/g;
1017 $rest =~ s/>/&gt;/g;
1018 $rest =~ s/"/&quot;/g;
1019
1020 # try and create links for all occurrences of perl.* within
1021 # the preformatted text.
1022 $rest =~ s{
1023 (\s*)(perl\w+)
1024 }{
1025 if (defined $pages{$2}) { # is a link
1026 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1027 } else {
1028 "$1$2";
1029 }
1030 }xeg;
1031 $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1032
1033 my $urls = '(' . join ('|', qw{
1034 http
1035 telnet
1036 mailto
1037 news
1038 gopher
1039 file
1040 wais
1041 ftp
1042 } )
1043 . ')';
1044
1045 my $ltrs = '\w';
1046 my $gunk = '/#~:.?+=&%@!\-';
1047 my $punc = '.:?\-';
1048 my $any = "${ltrs}${gunk}${punc}";
1049
1050 $rest =~ s{
1051 \b # start at word boundary
1052 ( # begin $1 {
1053 $urls : # need resource and a colon
1054 [$any] +? # followed by on or more
1055 # of any valid character, but
1056 # be conservative and take only
1057 # what you need to....
1058 ) # end $1 }
1059 (?= # look-ahead non-consumptive assertion
1060 [$punc]* # either 0 or more puntuation
1061 [^$any] # followed by a non-url char
1062 | # or else
1063 $ # then end of the string
1064 )
1065 }{<A HREF="$1">$1</A>}igox;
1066
1067 $result = "<PRE>" # text should be as it is (verbatim)
1068 . "$rest\n"
1069 . "</PRE>\n";
1070 } else { # formatted text
1071 # parse through the string, stopping each time we find a
1072 # pod-escape. once the string has been throughly processed
1073 # we can output it.
1074 while ($rest) {
1075 # check to see if there are any possible pod directives in
1076 # the remaining part of the text.
1077 if ($rest =~ m/[BCEIFLSZ]</) {
1078 warn "\$rest\t= $rest\n" unless
1079 $rest =~ /\A
1080 ([^<]*?)
1081 ([BCEIFLSZ]?)
1082 <
1083 (.*)\Z/xs;
1084
1085 $s1 = $1; # pure text
1086 $s2 = $2; # the type of pod-escape that follows
1087 $s3 = '<'; # '<'
1088 $s4 = $3; # the rest of the string
1089 } else {
1090 $s1 = $rest;
1091 $s2 = "";
1092 $s3 = "";
1093 $s4 = "";
1094 }
1095
1096 if ($s3 eq '<' && $s2) { # a pod-escape
1097 $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1098 $podcommand = "$s2<";
1099 $rest = $s4;
1100
1101 # find the matching '>'
1102 $match = 1;
1103 $bf = 0;
1104 while ($match && !$bf) {
1105 $bf = 1;
1106 if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1107 $bf = 0;
1108 $match++;
1109 $podcommand .= $1;
1110 $rest = $2;
1111 } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1112 $bf = 0;
1113 $match--;
1114 $podcommand .= $1;
1115 $rest = $2;
1116 }
1117 }
1118
1119 if ($match != 0) {
1120 warn <<WARN;
1121$0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1122WARN
1123 $result .= substr $podcommand, 0, 2;
1124 $rest = substr($podcommand, 2) . $rest;
1125 next;
1126 }
1127
1128 # pull out the parameters to the pod-escape
1129 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1130 $tag = $1;
1131 $params = $2;
1132
1133 # process the text within the pod-escape so that any escapes
1134 # which must occur do.
1135 process_text(\$params, 0) unless $tag eq 'L';
1136
1137 $s1 = $params;
1138 if (!$tag || $tag eq " ") { # <> : no tag
1139 $s1 = "&lt;$params&gt;";
1140 } elsif ($tag eq "L") { # L<> : link
1141 $s1 = process_L($params);
1142 } elsif ($tag eq "I" || # I<> : italicize text
1143 $tag eq "B" || # B<> : bold text
1144 $tag eq "F") { # F<> : file specification
1145 $s1 = process_BFI($tag, $params);
1146 } elsif ($tag eq "C") { # C<> : literal code
1147 $s1 = process_C($params, 1);
1148 } elsif ($tag eq "E") { # E<> : escape
1149 $s1 = process_E($params);
1150 } elsif ($tag eq "Z") { # Z<> : zero-width character
1151 $s1 = process_Z($params);
1152 } elsif ($tag eq "S") { # S<> : non-breaking space
1153 $s1 = process_S($params);
1154 } elsif ($tag eq "X") { # S<> : non-breaking space
1155 $s1 = process_X($params);
1156 } else {
1157 warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1158 }
1159
1160 $result .= "$s1";
1161 } else {
1162 # for pure text we must deal with implicit links and
1163 # double-quotes among other things.
1164 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1165 $rest = $s4;
1166 }
1167 }
1168 }
1169 $$text = $result;
1170}
1171
1172sub html_escape {
1173 my $rest = $_[0];
1174 $rest =~ s/&/&amp;/g;
1175 $rest =~ s/</&lt;/g;
1176 $rest =~ s/>/&gt;/g;
1177 $rest =~ s/"/&quot;/g;
1178 return $rest;
1179}
1180
1181#
1182# process_puretext - process pure text (without pod-escapes) converting
1183# double-quotes and handling implicit C<> links.
1184#
1185sub process_puretext {
1186 my($text, $quote) = @_;
1187 my(@words, $result, $rest, $lead, $trail);
1188
1189 # convert double-quotes to single-quotes
1190 $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1191 while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1192
1193 $$quote = ($text =~ m/"/ ? 1 : 0);
1194 $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1195
1196 # keep track of leading and trailing white-space
1197 $lead = ($text =~ /\A(\s*)/s ? $1 : "");
1198 $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1199
1200 # collapse all white space into a single space
1201 $text =~ s/\s+/ /g;
1202 @words = split(" ", $text);
1203
1204 # process each word individually
1205 foreach my $word (@words) {
1206 # see if we can infer a link
1207 if ($word =~ /^\w+\(/) {
1208 # has parenthesis so should have been a C<> ref
1209 $word = process_C($word);
1210# $word =~ /^[^()]*]\(/;
1211# if (defined $items{$1} && $items{$1}) {
1212# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1213# . htmlify(0,$word)
1214# . "\">$word</A></CODE>";
1215# } elsif (defined $items{$word} && $items{$word}) {
1216# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1217# . htmlify(0,$word)
1218# . "\">$word</A></CODE>";
1219# } else {
1220# $word = "\n<CODE><A HREF=\"#item_"
1221# . htmlify(0,$word)
1222# . "\">$word</A></CODE>";
1223# }
1224 } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1225 # perl variables, should be a C<> ref
1226 $word = process_C($word, 1);
1227 } elsif ($word =~ m,^\w+://\w,) {
1228 # looks like a URL
1229 $word = qq(<A HREF="$word">$word</A>);
1230 } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1231 # looks like an e-mail address
1232 $word = qq(<A HREF="MAILTO:$word">$word</A>);
1233 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
1234 $word = html_escape($word) if $word =~ /[&<>]/;
1235 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1236 } else {
1237 $word = html_escape($word) if $word =~ /[&<>]/;
1238 }
1239 }
1240
1241 # build a new string based upon our conversion
1242 $result = "";
1243 $rest = join(" ", @words);
1244 while (length($rest) > 75) {
1245 if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1246 $rest =~ m/^(\S*)\s(.*?)$/o) {
1247
1248 $result .= "$1\n";
1249 $rest = $2;
1250 } else {
1251 $result .= "$rest\n";
1252 $rest = "";
1253 }
1254 }
1255 $result .= $rest if $rest;
1256
1257 # restore the leading and trailing white-space
1258 $result = "$lead$result$trail";
1259
1260 return $result;
1261}
1262
1263#
1264# pre_escape - convert & in text to $amp;
1265#
1266sub pre_escape {
1267 my($str) = @_;
1268
1269 $$str =~ s,&,&amp;,g;
1270}
1271
1272#
1273# process_L - convert a pod L<> directive to a corresponding HTML link.
1274# most of the links made are inferred rather than known about directly
1275# (i.e it's not known whether the =head\d section exists in the target file,
1276# or whether a .pod file exists in the case of split files). however, the
1277# guessing usually works.
1278#
1279# Unlike the other directives, this should be called with an unprocessed
1280# string, else tags in the link won't be matched.
1281#
1282sub process_L {
1283 my($str) = @_;
1284 my($s1, $s2, $linktext, $page, $section, $link); # work strings
1285
1286 $str =~ s/\n/ /g; # undo word-wrapped tags
1287 $s1 = $str;
1288 for ($s1) {
1289 # a :: acts like a /
1290 s,::,/,;
1291
1292 # make sure sections start with a /
1293 s,^",/",g;
1294 s,^,/,g if (!m,/, && / /);
1295
1296 # check if there's a section specified
1297 if (m,^(.*?)/"?(.*?)"?$,) { # yes
1298 ($page, $section) = ($1, $2);
1299 } else { # no
1300 ($page, $section) = ($str, "");
1301 }
1302
1303 # check if we know that this is a section in this page
1304 if (!defined $pages{$page} && defined $sections{$page}) {
1305 $section = $page;
1306 $page = "";
1307 }
1308 }
1309
1310 if ($page eq "") {
1311 $link = "#" . htmlify(0,$section);
1312 $linktext = $section;
1313 } elsif (!defined $pages{$page}) {
1314 warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1315 $link = "";
1316 $linktext = $page;
1317 } else {
1318 $linktext = ($section ? "$section" : "the $page manpage");
1319 $section = htmlify(0,$section) if $section ne "";
1320
1321 # if there is a directory by the name of the page, then assume that an
1322 # appropriate section will exist in the subdirectory
1323 if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1324 $link = "$htmlroot/$1/$section.html";
1325
1326 # since there is no directory by the name of the page, the section will
1327 # have to exist within a .html of the same name. thus, make sure there
1328 # is a .pod or .pm that might become that .html
1329 } else {
1330 $section = "#$section";
1331 # check if there is a .pod with the page name
1332 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1333 $link = "$htmlroot/$1.html$section";
1334 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1335 $link = "$htmlroot/$1.html$section";
1336 } else {
1337 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1338 "no .pod or .pm found\n";
1339 $link = "";
1340 $linktext = $section;
1341 }
1342 }
1343 }
1344
1345 process_text(\$linktext, 0);
1346 if ($link) {
1347 $s1 = "<A HREF=\"$link\">$linktext</A>";
1348 } else {
1349 $s1 = "<EM>$linktext</EM>";
1350 }
1351 return $s1;
1352}
1353
1354#
1355# process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1356# convert them to corresponding HTML directives.
1357#
1358sub process_BFI {
1359 my($tag, $str) = @_;
1360 my($s1); # work string
1361 my(%repltext) = ( 'B' => 'STRONG',
1362 'F' => 'EM',
1363 'I' => 'EM');
1364
1365 # extract the modified text and convert to HTML
1366 $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1367 return $s1;
1368}
1369
1370#
1371# process_C - process the C<> pod-escape.
1372#
1373sub process_C {
1374 my($str, $doref) = @_;
1375 my($s1, $s2);
1376
1377 $s1 = $str;
1378 $s1 =~ s/\([^()]*\)//g; # delete parentheses
be173d55 1379 $s2 = $s1;
54310121
PP
1380 $s1 =~ s/\W//g; # delete bogus characters
1381
1382 # if there was a pod file that we found earlier with an appropriate
1383 # =item directive, then create a link to that page.
1384 if ($doref && defined $items{$s1}) {
1385 $s1 = ($items{$s1} ?
1386 "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" :
1387 "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>");
1388 $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1389 confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1390 } else {
1391 $s1 = "<CODE>$str</CODE>";
1392 # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1393 }
1394
1395
1396 return $s1;
1397}
1398
1399#
1400# process_E - process the E<> pod directive which seems to escape a character.
1401#
1402sub process_E {
1403 my($str) = @_;
1404
1405 for ($str) {
1406 s,([^/].*),\&$1\;,g;
1407 }
1408
1409 return $str;
1410}
1411
1412#
1413# process_Z - process the Z<> pod directive which really just amounts to
1414# ignoring it. this allows someone to start a paragraph with an =
1415#
1416sub process_Z {
1417 my($str) = @_;
1418
1419 # there is no equivalent in HTML for this so just ignore it.
1420 $str = "";
1421 return $str;
1422}
1423
1424#
1425# process_S - process the S<> pod directive which means to convert all
1426# spaces in the string to non-breaking spaces (in HTML-eze).
1427#
1428sub process_S {
1429 my($str) = @_;
1430
1431 # convert all spaces in the text to non-breaking spaces in HTML.
1432 $str =~ s/ /&nbsp;/g;
1433 return $str;
1434}
1435
1436#
1437# process_X - this is supposed to make an index entry. we'll just
1438# ignore it.
1439#
1440sub process_X {
1441 return '';
1442}
1443
1444
1445#
1446# finish_list - finish off any pending HTML lists. this should be called
1447# after the entire pod file has been read and converted.
1448#
1449sub finish_list {
1450 while ($listlevel >= 0) {
1451 print HTML "</DL>\n";
1452 $listlevel--;
1453 }
1454}
1455
1456#
1457# htmlify - converts a pod section specification to a suitable section
1458# specification for HTML. if first arg is 1, only takes 1st word.
1459#
1460sub htmlify {
1461 my($compact, $heading) = @_;
1462
1463 if ($compact) {
1464 $heading =~ /^(\w+)/;
1465 $heading = $1;
1466 }
1467
1468 # $heading = lc($heading);
1469 $heading =~ s/[^\w\s]/_/g;
1470 $heading =~ s/(\s+)/ /g;
1471 $heading =~ s/^\s*(.*?)\s*$/$1/s;
1472 $heading =~ s/ /_/g;
1473 $heading =~ s/\A(.{32}).*\Z/$1/s;
1474 $heading =~ s/\s+\Z//;
1475 $heading =~ s/_{2,}/_/g;
1476
1477 return $heading;
1478}
1479
1480BEGIN {
1481}
1482
14831;
1484