This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Show value of $fastslow on failure
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
0e4548d5 2use strict;
54310121 3require Exporter;
0e4548d5
GS
4
5use vars qw($VERSION @ISA @EXPORT);
d6a466d7 6$VERSION = 1.04;
0e4548d5 7@ISA = qw(Exporter);
54310121 8@EXPORT = qw(pod2html htmlify);
54310121 9
10use Carp;
0e4548d5
GS
11use Config;
12use Cwd;
af75be11 13use File::Spec;
0e4548d5
GS
14use File::Spec::Unix;
15use Getopt::Long;
54310121 16
3ec07288
FK
17use locale; # make \w work right in non-ASCII lands
18
54310121 19=head1 NAME
20
7b8d334a 21Pod::Html - module to convert pod files to HTML
54310121 22
23=head1 SYNOPSIS
24
7b8d334a 25 use Pod::Html;
54310121 26 pod2html([options]);
27
28=head1 DESCRIPTION
29
30Converts files from pod format (see L<perlpod>) to HTML format. It
31can automatically generate indexes and cross-references, and it keeps
32a cache of things it knows how to cross-reference.
33
34=head1 ARGUMENTS
35
36Pod::Html takes the following arguments:
37
38=over 4
39
0e4548d5
GS
40=item backlink
41
42 --backlink="Back to Top"
43
44Adds "Back to Top" links in front of every HEAD1 heading (except for
45the first). By default, no backlink are being generated.
46
b42210d7
RV
47=item cachedir
48
49 --cachedir=name
50
51Creates the item and directory caches in the given directory.
52
0e4548d5
GS
53=item css
54
55 --css=stylesheet
56
57Specify the URL of a cascading style sheet.
58
59=item flush
60
61 --flush
62
63Flushes the item and directory caches.
64
65=item header
66
67 --header
68 --noheader
69
70Creates header and footer blocks containing the text of the NAME
71section. By default, no headers are being generated.
72
54310121 73=item help
74
75 --help
76
77Displays the usage message.
78
5a039dd3
BS
79=item htmldir
80
81 --htmldir=name
82
83Sets the directory in which the resulting HTML file is placed. This
29f227c9
BS
84is used to generate relative links to other files. Not passing this
85causes all links to be absolute, since this is the value that tells
86Pod::Html the root of the documentation tree.
5a039dd3 87
54310121 88=item htmlroot
89
90 --htmlroot=name
91
92Sets the base URL for the HTML files. When cross-references are made,
93the HTML root is prepended to the URL.
94
0e4548d5
GS
95=item index
96
97 --index
98 --noindex
99
100Generate an index at the top of the HTML file. This is the default
101behaviour.
102
54310121 103=item infile
104
105 --infile=name
106
107Specify the pod file to convert. Input is taken from STDIN if no
108infile is specified.
109
54310121 110=item libpods
111
112 --libpods=name:...:name
113
114List of page names (eg, "perlfunc") which contain linkable C<=item>s.
115
116=item netscape
117
118 --netscape
0e4548d5 119 --nonetscape
54310121 120
0e4548d5
GS
121Use Netscape HTML directives when applicable. By default, they will
122B<not> be used.
54310121 123
0e4548d5 124=item outfile
54310121 125
0e4548d5 126 --outfile=name
54310121 127
0e4548d5
GS
128Specify the HTML file to create. Output goes to STDOUT if no outfile
129is specified.
54310121 130
0e4548d5 131=item podpath
54310121 132
0e4548d5 133 --podpath=name:...:name
54310121 134
0e4548d5
GS
135Specify which subdirectories of the podroot contain pod files whose
136HTML converted forms can be linked-to in cross-references.
54310121 137
0e4548d5 138=item podroot
54310121 139
0e4548d5 140 --podroot=name
54310121 141
0e4548d5 142Specify the base directory for finding library pods.
54310121 143
0e4548d5 144=item quiet
54310121 145
0e4548d5
GS
146 --quiet
147 --noquiet
54310121 148
0e4548d5
GS
149Don't display I<mostly harmless> warning messages. These messages
150will be displayed by default. But this is not the same as C<verbose>
151mode.
54310121 152
0e4548d5 153=item recurse
54310121 154
0e4548d5 155 --recurse
54310121 156 --norecurse
157
0e4548d5 158Recurse into subdirectories specified in podpath (default behaviour).
54310121 159
160=item title
161
162 --title=title
163
164Specify the title of the resulting HTML file.
165
166=item verbose
167
168 --verbose
0e4548d5 169 --noverbose
54310121 170
0e4548d5 171Display progress messages. By default, they won't be displayed.
34db337b 172
54310121 173=back
174
175=head1 EXAMPLE
176
177 pod2html("pod2html",
178 "--podpath=lib:ext:pod:vms",
179 "--podroot=/usr/src/perl",
180 "--htmlroot=/perl/nmanual",
181 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
182 "--recurse",
183 "--infile=foo.pod",
184 "--outfile=/perl/nmanual/foo.html");
185
34db337b
JD
186=head1 ENVIRONMENT
187
188Uses $Config{pod2html} to setup default options.
189
54310121 190=head1 AUTHOR
191
192Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
193
54310121 194=head1 SEE ALSO
195
196L<perlpod>
197
198=head1 COPYRIGHT
199
200This program is distributed under the Artistic License.
201
202=cut
203
b42210d7
RV
204my $cachedir = "."; # The directory to which item and directory
205 # caches will be written.
1f763251
GS
206my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
207my $dircache = "pod2htmd$cache_ext";
208my $itemcache = "pod2htmi$cache_ext";
54310121 209
210my @begin_stack = (); # begin/end stack
211
29f227c9
BS
212my @libpods = (); # files to search for links from C<> directives
213my $htmlroot = "/"; # http-server base directory from which all
54310121 214 # relative paths in $podpath stem.
5a039dd3
BS
215my $htmldir = ""; # The directory to which the html pages
216 # will (eventually) be written.
54310121 217my $htmlfile = ""; # write to stdout by default
29f227c9 218my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3
BS
219 # refer to this file. This is only used
220 # to make relative urls that point to
221 # other files.
54310121 222my $podfile = ""; # read from stdin by default
223my @podpath = (); # list of directories containing library pods.
af75be11 224my $podroot = File::Spec->curdir; # filesystem base directory from which all
54310121 225 # relative paths in $podpath stem.
34db337b 226my $css = ''; # Cascading style sheet
54310121 227my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 228my $quiet = 0; # not quiet by default
54310121 229my $verbose = 0; # not verbose by default
230my $doindex = 1; # non-zero if we should generate an index
0e4548d5 231my $backlink = ''; # text for "back to top" links
54310121 232my $listlevel = 0; # current list depth
2a28b791
GS
233my @listend = (); # the text to use to end the list.
234my $after_lpar = 0; # set to true after a par in an =item
54310121 235my $ignore = 1; # whether or not to format text. we don't
236 # format text until we hit our first pod
237 # directive.
238
239my %items_named = (); # for the multiples of the same item in perlfunc
240my @items_seen = ();
241my $netscape = 0; # whether or not to use netscape directives.
242my $title; # title to give the pod(s)
34db337b 243my $header = 0; # produce block header/footer
54310121 244my $top = 1; # true if we are at the top of the doc. used
245 # to prevent the first <HR> directive.
246my $paragraph; # which paragraph we're processing (used
247 # for error messages)
2a28b791 248my $ptQuote = 0; # status of double-quote conversion
54310121 249my %pages = (); # associative array used to find the location
250 # of pages referenced by L<> links.
251my %sections = (); # sections within this page
252my %items = (); # associative array used to find the location
253 # of =item directives referenced by C<> links
2a28b791 254my %local_items = (); # local items - avoid destruction of %items
39e571d4
LM
255my $Is83; # is dos with short filenames (8.3)
256
54310121 257sub init_globals {
04f720db
JD
258$dircache = "pod2htmd$cache_ext";
259$itemcache = "pod2htmi$cache_ext";
54310121 260
261@begin_stack = (); # begin/end stack
262
263@libpods = (); # files to search for links from C<> directives
264$htmlroot = "/"; # http-server base directory from which all
265 # relative paths in $podpath stem.
67398a75
GS
266$htmldir = ""; # The directory to which the html pages
267 # will (eventually) be written.
54310121 268$htmlfile = ""; # write to stdout by default
269$podfile = ""; # read from stdin by default
270@podpath = (); # list of directories containing library pods.
af75be11 271$podroot = File::Spec->curdir; # filesystem base directory from which all
54310121 272 # relative paths in $podpath stem.
34db337b 273$css = ''; # Cascading style sheet
54310121 274$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 275$quiet = 0; # not quiet by default
54310121 276$verbose = 0; # not verbose by default
277$doindex = 1; # non-zero if we should generate an index
0e4548d5 278$backlink = ''; # text for "back to top" links
54310121 279$listlevel = 0; # current list depth
2a28b791
GS
280@listend = (); # the text to use to end the list.
281$after_lpar = 0; # set to true after a par in an =item
54310121 282$ignore = 1; # whether or not to format text. we don't
283 # format text until we hit our first pod
284 # directive.
285
286@items_seen = ();
287%items_named = ();
288$netscape = 0; # whether or not to use netscape directives.
34db337b 289$header = 0; # produce block header/footer
54310121 290$title = ''; # title to give the pod(s)
291$top = 1; # true if we are at the top of the doc. used
292 # to prevent the first <HR> directive.
293$paragraph = ''; # which paragraph we're processing (used
294 # for error messages)
54310121 295%sections = (); # sections within this page
3e3baf6d
TB
296
297# These are not reinitialised here but are kept as a cache.
298# See get_cache and related cache management code.
299#%pages = (); # associative array used to find the location
300 # of pages referenced by L<> links.
301#%items = (); # associative array used to find the location
54310121 302 # of =item directives referenced by C<> links
2a28b791 303%local_items = ();
39e571d4 304$Is83=$^O eq 'dos';
54310121 305}
306
2a28b791
GS
307#
308# clean_data: global clean-up of pod data
309#
310sub clean_data($){
311 my( $dataref ) = @_;
bf202ccd 312 for my $i ( 0..$#{$dataref} ) {
2a28b791
GS
313 ${$dataref}[$i] =~ s/\s+\Z//;
314
315 # have a look for all-space lines
4135c0a0 316 if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
2a28b791
GS
317 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
318 splice( @$dataref, $i, 1, @chunks );
319 }
320 }
321}
322
323
54310121 324sub pod2html {
325 local(@ARGV) = @_;
326 local($/);
327 local $_;
328
329 init_globals();
330
39e571d4
LM
331 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
332
54310121 333 # cache of %pages and %items from last time we ran pod2html
54310121 334
335 #undef $opt_help if defined $opt_help;
336
337 # parse the command-line parameters
338 parse_command_line();
339
340 # set some variables to their default values if necessary
341 local *POD;
342 unless (@ARGV && $ARGV[0]) {
343 $podfile = "-" unless $podfile; # stdin
344 open(POD, "<$podfile")
345 || die "$0: cannot open $podfile file for input: $!\n";
346 } else {
347 $podfile = $ARGV[0]; # XXX: might be more filenames
348 *POD = *ARGV;
349 }
350 $htmlfile = "-" unless $htmlfile; # stdout
351 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
fe6f1558 352 $htmldir =~ s#/\z## ; # so we don't get a //
29f227c9
BS
353 if ( $htmlroot eq ''
354 && defined( $htmldir )
355 && $htmldir ne ''
356 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
357 )
5a039dd3 358 {
29f227c9
BS
359 # Set the 'base' url for this file, so that we can use it
360 # as the location from which to calculate relative links
361 # to other files. If this is '', then absolute links will
362 # be used throughout.
363 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 364 }
54310121 365
366 # read the pod a paragraph at a time
367 warn "Scanning for sections in input file(s)\n" if $verbose;
368 $/ = "";
369 my @poddata = <POD>;
370 close(POD);
2a28b791 371 clean_data( \@poddata );
54310121 372
373 # scan the pod for =head[1-6] directives and build an index
374 my $index = scan_headings(\%sections, @poddata);
375
3e3baf6d 376 unless($index) {
31e56455 377 warn "No headings in $podfile\n" if $verbose;
3e3baf6d
TB
378 }
379
54310121 380 # open the output file
381 open(HTML, ">$htmlfile")
382 || die "$0: cannot open $htmlfile file for output: $!\n";
383
d011ffae
GS
384 # put a title in the HTML file if one wasn't specified
385 if ($title eq '') {
386 TITLE_SEARCH: {
bf202ccd 387 for my $i ( 0..$#poddata ) {
d011ffae
GS
388 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
389 for my $para ( @poddata[$i, $i+1] ) {
390 last TITLE_SEARCH
391 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
392 }
393 }
54310121 394
d011ffae
GS
395 }
396 }
397 }
fe6f1558 398 if (!$title and $podfile =~ /\.pod\z/) {
3e3baf6d 399 # probably a split pod so take first =head[12] as title
bf202ccd 400 for my $i ( 0..$#poddata ) {
3e3baf6d
TB
401 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
402 }
403 warn "adopted '$title' as title for $podfile\n"
404 if $verbose and $title;
405 }
7b8d334a
GS
406 if ($title) {
407 $title =~ s/\s*\(.*\)//;
408 } else {
34db337b 409 warn "$0: no title for $podfile" unless $quiet;
fe6f1558 410 $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
54310121 411 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 412 warn "using $title" if $verbose;
54310121 413 }
34db337b
JD
414 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
415 $csslink =~ s,\\,/,g;
416 $csslink =~ s,(/.):,$1|,;
417
418 my $block = $header ? <<END_OF_BLOCK : '';
419<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
420<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
421<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
422</TD></TR>
423</TABLE>
424END_OF_BLOCK
425
54310121 426 print HTML <<END_OF_HEAD;
7b8d334a
GS
427<HTML>
428<HEAD>
34db337b 429<TITLE>$title</TITLE>$csslink
7b8d334a 430</HEAD>
54310121 431
7b8d334a 432<BODY>
34db337b 433$block
54310121 434END_OF_HEAD
435
3e3baf6d
TB
436 # load/reload/validate/cache %pages and %items
437 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 438
439 # scan the pod for =item directives
2a28b791 440 scan_items( \%local_items, "", @poddata);
54310121 441
442 # put an index at the top of the file. note, if $doindex is 0 we
443 # still generate an index, but surround it with an html comment.
444 # that way some other program can extract it if desired.
445 $index =~ s/--+/-/g;
2a28b791 446 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 447 print HTML "<!-- INDEX BEGIN -->\n";
448 print HTML "<!--\n" unless $doindex;
449 print HTML $index;
450 print HTML "-->\n" unless $doindex;
451 print HTML "<!-- INDEX END -->\n\n";
31e56455 452 print HTML "<HR>\n" if $doindex and $index;
54310121 453
454 # now convert this file
2a28b791
GS
455 my $after_item; # set to true after an =item
456 warn "Converting input file $podfile\n" if $verbose;
457 foreach my $i (0..$#poddata){
458 $ptQuote = 0; # status of quote conversion
459
54310121 460 $_ = $poddata[$i];
461 $paragraph = $i+1;
462 if (/^(=.*)/s) { # is it a pod directive?
463 $ignore = 0;
2a28b791 464 $after_item = 0;
54310121 465 $_ = $1;
466 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
467 process_begin($1, $2);
468 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
469 process_end($1, $2);
470 } elsif (/^=cut/) { # =cut
471 process_cut();
472 } elsif (/^=pod/) { # =pod
473 process_pod();
474 } else {
475 next if @begin_stack && $begin_stack[-1] ne 'html';
476
7b8d334a 477 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791
GS
478 process_head( $1, $2, $doindex && $index );
479 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
480 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 481 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791
GS
482 process_item( $1 );
483 $after_item = 1;
54310121 484 } elsif (/^=over\s*(.*)/) { # =over N
485 process_over();
486 } elsif (/^=back/) { # =back
487 process_back();
a45bd81d 488 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121 489 process_for($1,$2);
490 } else {
491 /^=(\S*)\s*/;
492 warn "$0: $podfile: unknown pod directive '$1' in "
493 . "paragraph $paragraph. ignoring.\n";
494 }
495 }
496 $top = 0;
497 }
498 else {
499 next if $ignore;
500 next if @begin_stack && $begin_stack[-1] ne 'html';
adbecb01 501 print HTML and next if @begin_stack && $begin_stack[-1] eq 'html';
54310121 502 my $text = $_;
2a28b791
GS
503 if( $text =~ /\A\s+/ ){
504 process_pre( \$text );
505 print HTML "<PRE>\n$text</PRE>\n";
506
507 } else {
508 process_text( \$text );
509
510 # experimental: check for a paragraph where all lines
511 # have some ...\t...\t...\n pattern
512 if( $text =~ /\t/ ){
513 my @lines = split( "\n", $text );
514 if( @lines > 1 ){
515 my $all = 2;
516 foreach my $line ( @lines ){
517 if( $line =~ /\S/ && $line !~ /\t/ ){
518 $all--;
519 last if $all == 0;
520 }
521 }
522 if( $all > 0 ){
523 $text =~ s/\t+/<TD>/g;
524 $text =~ s/^/<TR><TD>/gm;
525 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
526 $text . '</TABLE>';
527 }
528 }
529 }
530 ## end of experimental
531
532 if( $after_item ){
533 print HTML "$text\n";
534 $after_lpar = 1;
535 } else {
536 print HTML "<P>$text</P>\n";
537 }
538 }
539 $after_item = 0;
54310121 540 }
541 }
542
543 # finish off any pending directives
544 finish_list();
2a28b791
GS
545
546 # link to page index
0e4548d5
GS
547 print HTML "<P><A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A></P>\n"
548 if $doindex and $index and $backlink;
2a28b791 549
54310121 550 print HTML <<END_OF_TAIL;
34db337b 551$block
7b8d334a 552</BODY>
54310121 553
7b8d334a 554</HTML>
54310121 555END_OF_TAIL
556
557 # close the html file
558 close(HTML);
559
560 warn "Finished\n" if $verbose;
561}
562
563##############################################################################
564
565my $usage; # see below
566sub usage {
567 my $podfile = shift;
568 warn "$0: $podfile: @_\n" if @_;
569 die $usage;
570}
571
572$usage =<<END_OF_USAGE;
573Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
574 --podpath=<name>:...:<name> --podroot=<name>
575 --libpods=<name>:...:<name> --recurse --verbose --index
b42210d7 576 --netscape --norecurse --noindex --cachedir=<name>
54310121 577
0e4548d5 578 --backlink - set text for "back to top" links (default: none).
b42210d7 579 --cachedir - directory for the item and directory cache files.
0e4548d5
GS
580 --css - stylesheet URL
581 --flush - flushes the item and directory caches.
582 --[no]header - produce block header/footer (default is no headers).
583 --help - prints this message.
584 --htmldir - directory for resulting HTML files.
585 --htmlroot - http-server base directory from which all relative paths
586 in podpath stem (default is /).
587 --[no]index - generate an index at the top of the resulting html
588 (default behaviour).
589 --infile - filename for the pod to convert (input taken from stdin
590 by default).
591 --libpods - colon-separated list of pages to search for =item pod
592 directives in as targets of C<> and implicit links (empty
593 by default). note, these are not filenames, but rather
594 page names like those that appear in L<> links.
595 --[no]netscape - will use netscape html directives when applicable.
596 (default is not to use them).
597 --outfile - filename for the resulting html file (output sent to
598 stdout by default).
599 --podpath - colon-separated list of directories containing library
600 pods (empty by default).
601 --podroot - filesystem base directory from which all relative paths
602 in podpath stem (default is .).
603 --[no]quiet - supress some benign warning messages (default is off).
604 --[no]recurse - recurse on those subdirectories listed in podpath
605 (default behaviour).
606 --title - title that will appear in resulting html file.
607 --[no]verbose - self-explanatory (off by default).
54310121 608
609END_OF_USAGE
610
611sub parse_command_line {
b42210d7
RV
612 my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
613 $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
614 $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
615 $opt_recurse,$opt_title,$opt_verbose);
0e4548d5 616
34db337b 617 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 618 my $result = GetOptions(
0e4548d5 619 'backlink=s' => \$opt_backlink,
b42210d7 620 'cachedir=s' => \$opt_cachedir,
0e4548d5 621 'css=s' => \$opt_css,
29f227c9 622 'flush' => \$opt_flush,
0e4548d5 623 'header!' => \$opt_header,
29f227c9
BS
624 'help' => \$opt_help,
625 'htmldir=s' => \$opt_htmldir,
54310121 626 'htmlroot=s' => \$opt_htmlroot,
29f227c9 627 'index!' => \$opt_index,
54310121 628 'infile=s' => \$opt_infile,
629 'libpods=s' => \$opt_libpods,
630 'netscape!' => \$opt_netscape,
631 'outfile=s' => \$opt_outfile,
632 'podpath=s' => \$opt_podpath,
633 'podroot=s' => \$opt_podroot,
0e4548d5 634 'quiet!' => \$opt_quiet,
54310121 635 'recurse!' => \$opt_recurse,
636 'title=s' => \$opt_title,
0e4548d5 637 'verbose!' => \$opt_verbose,
54310121 638 );
639 usage("-", "invalid parameters") if not $result;
640
641 usage("-") if defined $opt_help; # see if the user asked for help
642 $opt_help = ""; # just to make -w shut-up.
643
54310121 644 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
645 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
646
0e4548d5 647 $backlink = $opt_backlink if defined $opt_backlink;
b42210d7 648 $cachedir = $opt_cachedir if defined $opt_cachedir;
0e4548d5
GS
649 $css = $opt_css if defined $opt_css;
650 $header = $opt_header if defined $opt_header;
651 $htmldir = $opt_htmldir if defined $opt_htmldir;
652 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
653 $doindex = $opt_index if defined $opt_index;
654 $podfile = $opt_infile if defined $opt_infile;
655 $netscape = $opt_netscape if defined $opt_netscape;
656 $htmlfile = $opt_outfile if defined $opt_outfile;
657 $podroot = $opt_podroot if defined $opt_podroot;
658 $quiet = $opt_quiet if defined $opt_quiet;
659 $recurse = $opt_recurse if defined $opt_recurse;
660 $title = $opt_title if defined $opt_title;
661 $verbose = $opt_verbose if defined $opt_verbose;
662
54310121 663 warn "Flushing item and directory caches\n"
664 if $opt_verbose && defined $opt_flush;
b42210d7
RV
665 $dircache = "$cachedir/pod2htmd$cache_ext";
666 $itemcache = "$cachedir/pod2htmi$cache_ext";
54310121 667 unlink($dircache, $itemcache) if defined $opt_flush;
54310121 668}
669
3e3baf6d
TB
670
671my $saved_cache_key;
672
673sub get_cache {
674 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
675 my @cache_key_args = @_;
676
677 # A first-level cache:
678 # Don't bother reading the cache files if they still apply
679 # and haven't changed since we last read them.
680
681 my $this_cache_key = cache_key(@cache_key_args);
682
683 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
684
685 # load the cache of %pages and %items if possible. $tests will be
686 # non-zero if successful.
687 my $tests = 0;
688 if (-f $dircache && -f $itemcache) {
689 warn "scanning for item cache\n" if $verbose;
690 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
691 }
692
693 # if we didn't succeed in loading the cache then we must (re)build
694 # %pages and %items.
695 if (!$tests) {
696 warn "scanning directories in pod-path\n" if $verbose;
697 scan_podpath($podroot, $recurse, 0);
698 }
699 $saved_cache_key = cache_key(@cache_key_args);
700}
701
702sub cache_key {
703 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
704 return join('!', $dircache, $itemcache, $recurse,
29f227c9 705 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d
TB
706}
707
54310121 708#
3e3baf6d 709# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 710# are valid caches of %pages and %items. if they are valid then it loads
711# them and returns a non-zero value.
712#
3e3baf6d 713sub load_cache {
54310121 714 my($dircache, $itemcache, $podpath, $podroot) = @_;
715 my($tests);
716 local $_;
717
718 $tests = 0;
719
720 open(CACHE, "<$itemcache") ||
721 die "$0: error opening $itemcache for reading: $!\n";
722 $/ = "\n";
723
724 # is it the same podpath?
725 $_ = <CACHE>;
726 chomp($_);
3e3baf6d 727 $tests++ if (join(":", @$podpath) eq $_);
54310121 728
729 # is it the same podroot?
730 $_ = <CACHE>;
731 chomp($_);
732 $tests++ if ($podroot eq $_);
733
734 # load the cache if its good
735 if ($tests != 2) {
736 close(CACHE);
54310121 737 return 0;
738 }
739
740 warn "loading item cache\n" if $verbose;
741 while (<CACHE>) {
742 /(.*?) (.*)$/;
743 $items{$1} = $2;
744 }
745 close(CACHE);
746
747 warn "scanning for directory cache\n" if $verbose;
748 open(CACHE, "<$dircache") ||
749 die "$0: error opening $dircache for reading: $!\n";
750 $/ = "\n";
751 $tests = 0;
752
753 # is it the same podpath?
754 $_ = <CACHE>;
755 chomp($_);
3e3baf6d 756 $tests++ if (join(":", @$podpath) eq $_);
54310121 757
758 # is it the same podroot?
759 $_ = <CACHE>;
760 chomp($_);
761 $tests++ if ($podroot eq $_);
762
763 # load the cache if its good
764 if ($tests != 2) {
765 close(CACHE);
54310121 766 return 0;
767 }
768
769 warn "loading directory cache\n" if $verbose;
770 while (<CACHE>) {
771 /(.*?) (.*)$/;
772 $pages{$1} = $2;
773 }
774
775 close(CACHE);
776
777 return 1;
778}
779
780#
781# scan_podpath - scans the directories specified in @podpath for directories,
782# .pod files, and .pm files. it also scans the pod files specified in
783# @libpods for =item directives.
784#
785sub scan_podpath {
3e3baf6d 786 my($podroot, $recurse, $append) = @_;
54310121 787 my($pwd, $dir);
788 my($libpod, $dirname, $pod, @files, @poddata);
789
3e3baf6d
TB
790 unless($append) {
791 %items = ();
792 %pages = ();
793 }
794
54310121 795 # scan each directory listed in @podpath
796 $pwd = getcwd();
797 chdir($podroot)
798 || die "$0: error changing to directory $podroot: $!\n";
799 foreach $dir (@podpath) {
800 scan_dir($dir, $recurse);
801 }
802
803 # scan the pods listed in @libpods for =item directives
804 foreach $libpod (@libpods) {
805 # if the page isn't defined then we won't know where to find it
806 # on the system.
807 next unless defined $pages{$libpod} && $pages{$libpod};
808
809 # if there is a directory then use the .pod and .pm files within it.
29f227c9
BS
810 # NOTE: Only finds the first so-named directory in the tree.
811# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
812 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 813 # find all the .pod and .pm files within the directory
814 $dirname = $1;
815 opendir(DIR, $dirname) ||
816 die "$0: error opening directory $dirname: $!\n";
fe6f1558 817 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
54310121 818 closedir(DIR);
819
820 # scan each .pod and .pm file for =item directives
821 foreach $pod (@files) {
822 open(POD, "<$dirname/$pod") ||
823 die "$0: error opening $dirname/$pod for input: $!\n";
824 @poddata = <POD>;
825 close(POD);
2a28b791 826 clean_data( \@poddata );
54310121 827
2a28b791 828 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 829 }
830
831 # use the names of files as =item directives too.
2a28b791
GS
832### Don't think this should be done this way - confuses issues.(WL)
833### foreach $pod (@files) {
834### $pod =~ /^(.*)(\.pod|\.pm)$/;
835### $items{$1} = "$dirname/$1.html" if $1;
836### }
54310121 837 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
838 $pages{$libpod} =~ /([^:]*\.pm):/) {
839 # scan the .pod or .pm file for =item directives
840 $pod = $1;
841 open(POD, "<$pod") ||
842 die "$0: error opening $pod for input: $!\n";
843 @poddata = <POD>;
844 close(POD);
2a28b791 845 clean_data( \@poddata );
54310121 846
2a28b791 847 scan_items( \%items, "$pod", @poddata);
54310121 848 } else {
849 warn "$0: shouldn't be here (line ".__LINE__."\n";
850 }
851 }
852 @poddata = (); # clean-up a bit
853
854 chdir($pwd)
855 || die "$0: error changing to directory $pwd: $!\n";
856
857 # cache the item list for later use
858 warn "caching items for later use\n" if $verbose;
859 open(CACHE, ">$itemcache") ||
860 die "$0: error open $itemcache for writing: $!\n";
861
862 print CACHE join(":", @podpath) . "\n$podroot\n";
863 foreach my $key (keys %items) {
864 print CACHE "$key $items{$key}\n";
865 }
866
867 close(CACHE);
868
869 # cache the directory list for later use
870 warn "caching directories for later use\n" if $verbose;
871 open(CACHE, ">$dircache") ||
872 die "$0: error open $dircache for writing: $!\n";
873
874 print CACHE join(":", @podpath) . "\n$podroot\n";
875 foreach my $key (keys %pages) {
876 print CACHE "$key $pages{$key}\n";
877 }
878
879 close(CACHE);
880}
881
882#
883# scan_dir - scans the directory specified in $dir for subdirectories, .pod
884# files, and .pm files. notes those that it finds. this information will
885# be used later in order to figure out where the pages specified in L<>
886# links are on the filesystem.
887#
888sub scan_dir {
889 my($dir, $recurse) = @_;
890 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
891 local $_;
892
893 @subdirs = ();
894 @pods = ();
895
896 opendir(DIR, $dir) ||
897 die "$0: error opening directory $dir: $!\n";
898 while (defined($_ = readdir(DIR))) {
899 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
900 $pages{$_} = "" unless defined $pages{$_};
901 $pages{$_} .= "$dir/$_:";
902 push(@subdirs, $_);
fe6f1558
GS
903 } elsif (/\.pod\z/) { # .pod
904 s/\.pod\z//;
54310121 905 $pages{$_} = "" unless defined $pages{$_};
906 $pages{$_} .= "$dir/$_.pod:";
907 push(@pods, "$dir/$_.pod");
d3f9d0fb
SB
908 } elsif (/\.html\z/) { # .html
909 s/\.html\z//;
910 $pages{$_} = "" unless defined $pages{$_};
911 $pages{$_} .= "$dir/$_.pod:";
fe6f1558
GS
912 } elsif (/\.pm\z/) { # .pm
913 s/\.pm\z//;
54310121 914 $pages{$_} = "" unless defined $pages{$_};
915 $pages{$_} .= "$dir/$_.pm:";
916 push(@pods, "$dir/$_.pm");
917 }
918 }
919 closedir(DIR);
920
921 # recurse on the subdirectories if necessary
922 if ($recurse) {
923 foreach my $subdir (@subdirs) {
924 scan_dir("$dir/$subdir", $recurse);
925 }
926 }
927}
928
929#
930# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
931# build an index.
932#
933sub scan_headings {
934 my($sections, @data) = @_;
2a28b791 935 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 936
be173d55
HM
937 # here we need local $ignore = 0;
938 # unfortunately, we can't have it, because $ignore is lexical
939 $ignore = 0;
940
54310121 941 $listdepth = 0;
942 $index = "";
943
944 # scan for =head directives, note their name, and build an index
945 # pointing to each of them.
946 foreach my $line (@data) {
bb9460ed 947 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791
GS
948 ($tag, $which_head, $otitle) = ($1,$2,$3);
949
950 my $title = depod( $otitle );
951 my $name = htmlify( $title );
952 $$sections{$name} = 1;
953 $title = process_text( \$otitle );
54310121 954
102c538a
DS
955 while ($which_head != $listdepth) {
956 if ($which_head > $listdepth) {
957 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
958 $listdepth++;
959 } elsif ($which_head < $listdepth) {
960 $listdepth--;
961 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
962 }
54310121 963 }
54310121 964
965 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791
GS
966 "<A HREF=\"#" . $name . "\">" .
967 $title . "</A></LI>";
54310121 968 }
969 }
970
971 # finish off the lists
972 while ($listdepth--) {
973 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
974 }
975
976 # get rid of bogus lists
977 $index =~ s,\t*<UL>\s*</UL>\n,,g;
978
bb9460ed 979 $ignore = 1; # restore old value;
be173d55 980
54310121 981 return $index;
982}
983
984#
985# scan_items - scans the pod specified by $pod for =item directives. we
986# will use this information later on in resolving C<> links.
987#
988sub scan_items {
2a28b791 989 my( $itemref, $pod, @poddata ) = @_;
54310121 990 my($i, $item);
991 local $_;
992
fe6f1558 993 $pod =~ s/\.pod\z//;
54310121 994 $pod .= ".html" if $pod;
995
996 foreach $i (0..$#poddata) {
2a28b791
GS
997 my $txt = depod( $poddata[$i] );
998
999 # figure out what kind of item it is.
1000 # Build string for referencing this item.
1001 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1002 next unless $1;
1003 $item = $1;
1004 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1005 $item = $1;
1006 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1007 $item = $1;
1008 } else {
1009 next;
54310121 1010 }
2a28b791
GS
1011 my $fid = fragment_id( $item );
1012 $$itemref{$fid} = "$pod" if $fid;
54310121 1013 }
1014}
1015
1016#
1017# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1018#
1019sub process_head {
2a28b791 1020 my($tag, $heading, $hasindex) = @_;
54310121 1021
1022 # figure out the level of the =head
1023 $tag =~ /head([1-6])/;
1024 my $level = $1;
1025
2a28b791
GS
1026 if( $listlevel ){
1027 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
1028 while( $listlevel ){
1029 process_back();
1030 }
1031 }
1032
1033 print HTML "<P>\n";
1034 if( $level == 1 && ! $top ){
0e4548d5
GS
1035 print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n"
1036 if $hasindex and $backlink;
2a28b791
GS
1037 print HTML "<HR>\n"
1038 }
1039
1040 my $name = htmlify( depod( $heading ) );
1041 my $convert = process_text( \$heading );
1042 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1043}
1044
2a28b791 1045
54310121 1046#
2a28b791
GS
1047# emit_item_tag - print an =item's text
1048# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1049#
2a28b791
GS
1050my $EmittedItem;
1051
1052sub emit_item_tag($$$){
1053 my( $otext, $text, $compact ) = @_;
1054 my $item = fragment_id( $text );
54310121 1055
2a28b791
GS
1056 $EmittedItem = $item;
1057 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1058
2a28b791
GS
1059 print HTML '<STRONG>';
1060 if ($items_named{$item}++) {
1061 print HTML process_text( \$otext );
1062 } else {
1063 my $name = 'item_' . $item;
1064 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1065 }
1066 print HTML "</STRONG><BR>\n";
1067 undef( $EmittedItem );
1068}
1069
1070sub emit_li {
1071 my( $tag ) = @_;
1072 if( $items_seen[$listlevel]++ == 0 ){
1073 push( @listend, "</$tag>" );
1074 print HTML "<$tag>\n";
1075 }
1076 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1077}
1078
1079#
1080# process_item - convert a pod item tag and convert it to HTML format.
1081#
1082sub process_item {
1083 my( $otext ) = @_;
54310121 1084
1085 # lots of documents start a list without doing an =over. this is
1086 # bad! but, the proper thing to do seems to be to just assume
1087 # they did do an =over. so warn them once and then continue.
2a28b791
GS
1088 if( $listlevel == 0 ){
1089 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1090 process_over();
1091 }
54310121 1092
2a28b791
GS
1093 # formatting: insert a paragraph if preceding item has >1 paragraph
1094 if( $after_lpar ){
1095 print HTML "<P></P>\n";
1096 $after_lpar = 0;
1097 }
54310121 1098
1099 # remove formatting instructions from the text
2a28b791
GS
1100 my $text = depod( $otext );
1101
1102 # all the list variants:
1103 if( $text =~ /\A\*/ ){ # bullet
1104 emit_li( 'UL' );
1105 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1106 my $tag = $1;
1107 $otext =~ s/\A\*\s+//;
1108 emit_item_tag( $otext, $tag, 1 );
54310121 1109 }
1110
2a28b791
GS
1111 } elsif( $text =~ /\A\d+/ ){ # numbered list
1112 emit_li( 'OL' );
1113 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1114 my $tag = $1;
1115 $otext =~ s/\A\d+\.?\s*//;
1116 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1117 }
54310121 1118
2a28b791
GS
1119 } else { # definition list
1120 emit_li( 'DL' );
1121 if ($text =~ /\A(.+)\Z/s ){ # should have text
1122 emit_item_tag( $otext, $text, 1 );
7b8d334a 1123 }
54310121 1124 print HTML '<DD>';
1125 }
54310121 1126 print HTML "\n";
1127}
1128
1129#
2a28b791 1130# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1131#
1132sub process_over {
1133 # start a new list
1134 $listlevel++;
2a28b791
GS
1135 push( @items_seen, 0 );
1136 $after_lpar = 0;
54310121 1137}
1138
1139#
1140# process_back - process a pod back tag and convert it to HTML format.
1141#
1142sub process_back {
2a28b791
GS
1143 if( $listlevel == 0 ){
1144 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1145 return;
1146 }
54310121 1147
1148 # close off the list. note, I check to see if $listend[$listlevel] is
1149 # defined because an =item directive may have never appeared and thus
1150 # $listend[$listlevel] may have never been initialized.
1151 $listlevel--;
2a28b791
GS
1152 if( defined $listend[$listlevel] ){
1153 print HTML '<P></P>' if $after_lpar;
1154 print HTML $listend[$listlevel];
1155 print HTML "\n";
1156 pop( @listend );
1157 }
1158 $after_lpar = 0;
54310121 1159
2a28b791
GS
1160 # clean up item count
1161 pop( @items_seen );
54310121 1162}
1163
1164#
2a28b791 1165# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1166#
1167sub process_cut {
1168 $ignore = 1;
1169}
1170
1171#
d1be9408 1172# process_pod - process a pod tag, thus stop ignoring pod directives
2a28b791 1173# until we see a corresponding cut.
54310121 1174#
1175sub process_pod {
1176 # no need to set $ignore to 0 cause the main loop did it
1177}
1178
1179#
2a28b791 1180# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1181# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1182#
1183sub process_for {
1184 my($whom, $text) = @_;
1185 if ( $whom =~ /^(pod2)?html$/i) {
1186 print HTML $text;
c4d9b39d
IZ
1187 } elsif ($whom =~ /^illustration$/i) {
1188 1 while chomp $text;
1189 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1190 $text .= $ext, last if -r "$text$ext";
1191 }
1192 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1193 }
54310121 1194}
1195
1196#
1197# process_begin - process a =begin pod tag. this pushes
1198# whom we're beginning on the begin stack. if there's a
1199# begin stack, we only print if it us.
1200#
1201sub process_begin {
1202 my($whom, $text) = @_;
1203 $whom = lc($whom);
1204 push (@begin_stack, $whom);
1205 if ( $whom =~ /^(pod2)?html$/) {
1206 print HTML $text if $text;
1207 }
1208}
1209
1210#
1211# process_end - process a =end pod tag. pop the
1212# begin stack. die if we're mismatched.
1213#
1214sub process_end {
1215 my($whom, $text) = @_;
1216 $whom = lc($whom);
1217 if ($begin_stack[-1] ne $whom ) {
1218 die "Unmatched begin/end at chunk $paragraph\n"
1219 }
2a28b791 1220 pop( @begin_stack );
54310121 1221}
1222
1223#
2a28b791 1224# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1225#
2a28b791
GS
1226sub process_pre {
1227 my( $text ) = @_;
1228 my( $rest );
54310121 1229 return if $ignore;
1230
54310121 1231 $rest = $$text;
1232
2a28b791
GS
1233 # insert spaces in place of tabs
1234 $rest =~ s#.*#
be173d55
HM
1235 my $line = $&;
1236 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1237 $line;
1238 #eg;
54310121 1239
2a28b791
GS
1240 # convert some special chars to HTML escapes
1241 $rest =~ s/&/&amp;/g;
1242 $rest =~ s/</&lt;/g;
1243 $rest =~ s/>/&gt;/g;
1244 $rest =~ s/"/&quot;/g;
1245
1246 # try and create links for all occurrences of perl.* within
1247 # the preformatted text.
1248 $rest =~ s{
1249 (\s*)(perl\w+)
1250 }{
1251 if ( defined $pages{$2} ){ # is a link
1252 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1253 } elsif (defined $pages{dosify($2)}) { # is a link
1254 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1255 } else {
1256 "$1$2";
1257 }
1258 }xeg;
1259 $rest =~ s{
1260 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1261 }{
1262 my $url ;
1263 if ( $htmlfileurl ne '' ){
1264 # Here, we take advantage of the knowledge
1265 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1266 # Since $htmlroot eq '', we need to prepend $htmldir
1267 # on the fron of the link to get the absolute path
1268 # of the link's target. We check for a leading '/'
1269 # to avoid corrupting links that are #, file:, etc.
1270 my $old_url = $3 ;
1271 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1272 $url = relativize_url( "$old_url.html", $htmlfileurl );
1273 } else {
1274 $url = "$3.html" ;
1275 }
1276 "$1$url" ;
1277 }xeg;
1278
1279 # Look for embedded URLs and make them into links. We don't
1280 # relativize them since they are best left as the author intended.
1281
1282 my $urls = '(' . join ('|', qw{
54310121 1283 http
1284 telnet
1285 mailto
1286 news
1287 gopher
1288 file
1289 wais
1290 ftp
1291 } )
1292 . ')';
1293
2a28b791
GS
1294 my $ltrs = '\w';
1295 my $gunk = '/#~:.?+=&%@!\-';
1296 my $punc = '.:?\-';
1297 my $any = "${ltrs}${gunk}${punc}";
54310121 1298
2a28b791 1299 $rest =~ s{
54310121 1300 \b # start at word boundary
1301 ( # begin $1 {
29f227c9
BS
1302 $urls : # need resource and a colon
1303 (?!:) # Ignore File::, among others.
54310121 1304 [$any] +? # followed by on or more
1305 # of any valid character, but
1306 # be conservative and take only
1307 # what you need to....
1308 ) # end $1 }
1309 (?= # look-ahead non-consumptive assertion
1310 [$punc]* # either 0 or more puntuation
1311 [^$any] # followed by a non-url char
1312 | # or else
1313 $ # then end of the string
1314 )
1315 }{<A HREF="$1">$1</A>}igox;
1316
2a28b791
GS
1317 # text should be as it is (verbatim)
1318 $$text = $rest;
1319}
54310121 1320
54310121 1321
2a28b791
GS
1322#
1323# pure text processing
1324#
1325# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1326# we don't want this to happen within IS
1327#
1328sub pure_text($){
1329 my $text = shift();
1330 process_puretext( $text, \$ptQuote, 1 );
54310121 1331}
1332
2a28b791
GS
1333sub inIS_text($){
1334 my $text = shift();
1335 process_puretext( $text, \$ptQuote, 0 );
1336}
54310121 1337
1338#
1339# process_puretext - process pure text (without pod-escapes) converting
1340# double-quotes and handling implicit C<> links.
1341#
1342sub process_puretext {
2a28b791 1343 my($text, $quote, $notinIS) = @_;
54310121 1344
2a28b791
GS
1345 ## Guessing at func() or [$@%&]*var references in plain text is destined
1346 ## to produce some strange looking ref's. uncomment to disable:
1347 ## $notinIS = 0;
1348
1349 my(@words, $lead, $trail);
54310121 1350
2a28b791
GS
1351 # convert double-quotes to single-quotes
1352 if( $$quote && $text =~ s/"/''/s ){
1353 $$quote = 0;
1354 }
1355 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1356 $$quote = 1 if $text =~ s/"/``/s;
54310121 1357
1358 # keep track of leading and trailing white-space
2a28b791
GS
1359 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1360 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1361
2a28b791
GS
1362 # split at space/non-space boundaries
1363 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1364
1365 # process each word individually
1366 foreach my $word (@words) {
2a28b791
GS
1367 # skip space runs
1368 next if $word =~ /^\s*$/;
54310121 1369 # see if we can infer a link
02369fa5 1370 if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
54310121 1371 # has parenthesis so should have been a C<> ref
2a28b791 1372 ## try for a pagename (perlXXX(1))?
02369fa5
GS
1373 my( $func, $args ) = ( $1, $2 );
1374 if( $args =~ /^\d+$/ ){
2a28b791
GS
1375 my $url = page_sect( $word, '' );
1376 if( defined $url ){
1377 $word = "<A HREF=\"$url\">the $word manpage</A>";
1378 next;
1379 }
1380 }
02369fa5
GS
1381 ## try function name for a link, append tt'ed argument list
1382 $word = emit_C( $func, '', "($args)");
2a28b791
GS
1383
1384#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1385## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1386## # perl variables, should be a C<> ref
1387## $word = emit_C( $word );
1388
54310121 1389 } elsif ($word =~ m,^\w+://\w,) {
1390 # looks like a URL
5a039dd3 1391 # Don't relativize it: leave it as the author intended
54310121 1392 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1393 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1394 # looks like an e-mail address
7b8d334a
GS
1395 my ($w1, $w2, $w3) = ("", $word, "");
1396 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1397 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1398 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1399 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1400 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1401 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1402 } else {
7b8d334a 1403 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1404 }
1405 }
1406
2a28b791
GS
1407 # put everything back together
1408 return $lead . join( '', @words ) . $trail;
1409}
1410
54310121 1411
2a28b791
GS
1412#
1413# process_text - handles plaintext that appears in the input pod file.
1414# there may be pod commands embedded within the text so those must be
1415# converted to html commands.
1416#
7ba65c74 1417
c68ea5d1
GS
1418sub process_text1($$;$$);
1419sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1420sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
7ba65c74 1421
2a28b791
GS
1422sub process_text {
1423 return if $ignore;
1424 my( $tref ) = @_;
1425 my $res = process_text1( 0, $tref );
1426 $$tref = $res;
1427}
1428
c68ea5d1
GS
1429sub process_text1($$;$$){
1430 my( $lev, $rstr, $func, $closing ) = @_;
2a28b791
GS
1431 my $res = '';
1432
60a48b2d
GS
1433 unless (defined $func) {
1434 $func = '';
1435 $lev++;
1436 }
1437
2a28b791
GS
1438 if( $func eq 'B' ){
1439 # B<text> - boldface
1440 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1441
1442 } elsif( $func eq 'C' ){
1443 # C<code> - can be a ref or <CODE></CODE>
1444 # need to extract text
c68ea5d1 1445 my $par = go_ahead( $rstr, 'C', $closing );
2a28b791
GS
1446
1447 ## clean-up of the link target
1448 my $text = depod( $par );
1449
1450 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1451 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1452
1453 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1454
1455 } elsif( $func eq 'E' ){
1456 # E<x> - convert to character
be3174d2
GS
1457 $$rstr =~ s/^([^>]*)>//;
1458 my $escape = $1;
1459 $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1460 $res = "&$escape;";
2a28b791
GS
1461
1462 } elsif( $func eq 'F' ){
1463 # F<filename> - italizice
1464 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1465
1466 } elsif( $func eq 'I' ){
1467 # I<text> - italizice
1468 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1469
1470 } elsif( $func eq 'L' ){
1471 # L<link> - link
1472 ## L<text|cross-ref> => produce text, use cross-ref for linking
1473 ## L<cross-ref> => make text from cross-ref
1474 ## need to extract text
c68ea5d1 1475 my $par = go_ahead( $rstr, 'L', $closing );
2a28b791
GS
1476
1477 # some L<>'s that shouldn't be:
1478 # a) full-blown URL's are emitted as-is
1479 if( $par =~ m{^\w+://}s ){
1480 return make_URL_href( $par );
1481 }
1482 # b) C<...> is stripped and treated as C<>
1483 if( $par =~ /^C<(.*)>$/ ){
1484 my $text = depod( $1 );
1485 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1486 }
1487
1488 # analyze the contents
1489 $par =~ s/\n/ /g; # undo word-wrapped tags
1490 my $opar = $par;
1491 my $linktext;
1492 if( $par =~ s{^([^|]+)\|}{} ){
1493 $linktext = $1;
1494 }
1495
1496 # make sure sections start with a /
1497 $par =~ s{^"}{/"};
1498
1499 my( $page, $section, $ident );
1500
1501 # check for link patterns
1502 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1503 # we've got a name/ident (no quotes)
1504 ( $page, $ident ) = ( $1, $2 );
1505 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1506
1507 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1508 # even though this should be a "section", we go for ident first
1509 ( $page, $ident ) = ( $1, $2 );
1510 ### print STDERR "--> L<$par> to page $page, section $section\n";
1511
1512 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1513 ( $page, $section ) = ( '', $par );
1514 ### print STDERR "--> L<$par> to void page, section $section\n";
1515
1516 } else {
1517 ( $page, $section ) = ( $par, '' );
1518 ### print STDERR "--> L<$par> to page $par, void section\n";
1519 }
1520
1521 # now, either $section or $ident is defined. the convoluted logic
1522 # below tries to resolve L<> according to what the user specified.
1523 # failing this, we try to find the next best thing...
1524 my( $url, $ltext, $fid );
1525
1526 RESOLVE: {
1527 if( defined $ident ){
1528 ## try to resolve $ident as an item
1529 ( $url, $fid ) = coderef( $page, $ident );
1530 if( $url ){
1531 if( ! defined( $linktext ) ){
1532 $linktext = $ident;
1533 $linktext .= " in " if $ident && $page;
1534 $linktext .= "the $page manpage" if $page;
1535 }
1536 ### print STDERR "got coderef url=$url\n";
1537 last RESOLVE;
1538 }
1539 ## no luck: go for a section (auto-quoting!)
1540 $section = $ident;
1541 }
1542 ## now go for a section
1543 my $htmlsection = htmlify( $section );
1544 $url = page_sect( $page, $htmlsection );
1545 if( $url ){
1546 if( ! defined( $linktext ) ){
1547 $linktext = $section;
1548 $linktext .= " in " if $section && $page;
1549 $linktext .= "the $page manpage" if $page;
1550 }
1551 ### print STDERR "got page/section url=$url\n";
1552 last RESOLVE;
1553 }
1554 ## no luck: go for an ident
1555 if( $section ){
1556 $ident = $section;
1557 } else {
1558 $ident = $page;
1559 $page = undef();
1560 }
1561 ( $url, $fid ) = coderef( $page, $ident );
1562 if( $url ){
1563 if( ! defined( $linktext ) ){
1564 $linktext = $ident;
1565 $linktext .= " in " if $ident && $page;
1566 $linktext .= "the $page manpage" if $page;
1567 }
1568 ### print STDERR "got section=>coderef url=$url\n";
1569 last RESOLVE;
1570 }
1571
1572 # warning; show some text.
1573 $linktext = $opar unless defined $linktext;
1574 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1575 }
1576
d1be9408 1577 # now we have a URL or just plain code
2a28b791
GS
1578 $$rstr = $linktext . '>' . $$rstr;
1579 if( defined( $url ) ){
1580 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1581 } else {
1582 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1583 }
1584
1585 } elsif( $func eq 'S' ){
1586 # S<text> - non-breaking spaces
1587 $res = process_text1( $lev, $rstr );
1588 $res =~ s/ /&nbsp;/g;
1589
1590 } elsif( $func eq 'X' ){
1591 # X<> - ignore
1592 $$rstr =~ s/^[^>]*>//;
1593
1594 } elsif( $func eq 'Z' ){
1595 # Z<> - empty
1596 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1597 unless $$rstr =~ s/^>//;
1598
1599 } else {
c68ea5d1
GS
1600 my $term = pattern $closing;
1601 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 1602 # all others: either recurse into new function or
c68ea5d1 1603 # terminate at closing angle bracket(s)
2a28b791 1604 my $pt = $1;
c68ea5d1 1605 $pt .= $2 if !$3 && $lev == 1;
2a28b791 1606 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
c68ea5d1
GS
1607 return $res if !$3 && $lev > 1;
1608 if( $3 ){
1609 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1610 }
2a28b791
GS
1611 }
1612 if( $lev == 1 ){
1613 $res .= pure_text( $$rstr );
54310121 1614 } else {
2a28b791 1615 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1616 }
1617 }
2a28b791
GS
1618 return $res;
1619}
54310121 1620
2a28b791
GS
1621#
1622# go_ahead: extract text of an IS (can be nested)
1623#
c68ea5d1
GS
1624sub go_ahead($$$){
1625 my( $rstr, $func, $closing ) = @_;
2a28b791 1626 my $res = '';
c68ea5d1
GS
1627 my @closing = ($closing);
1628 while( $$rstr =~
1629 s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
2a28b791 1630 $res .= $1;
c68ea5d1
GS
1631 unless( $3 ){
1632 shift @closing;
1633 return $res unless @closing;
2a28b791 1634 } else {
c68ea5d1 1635 unshift @closing, closing $4;
2a28b791
GS
1636 }
1637 $res .= $2;
1638 }
1639 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1640 return $res;
54310121 1641}
1642
1643#
2a28b791
GS
1644# emit_C - output result of C<text>
1645# $text is the depod-ed text
54310121 1646#
02369fa5
GS
1647sub emit_C($;$$){
1648 my( $text, $nocode, $args ) = @_;
60a48b2d 1649 $args = '' unless defined $args;
2a28b791
GS
1650 my $res;
1651 my( $url, $fid ) = coderef( undef(), $text );
1652
1653 # need HTML-safe text
02369fa5 1654 my $linktext = html_escape( "$text$args" );
2a28b791
GS
1655
1656 if( defined( $url ) &&
1657 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1658 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1659 } elsif( 0 && $nocode ){
1660 $res = $linktext;
1661 } else {
1662 $res = "<CODE>$linktext</CODE>";
1663 }
1664 return $res;
54310121 1665}
1666
1667#
2a28b791
GS
1668# html_escape: make text safe for HTML
1669#
1670sub html_escape {
1671 my $rest = $_[0];
1672 $rest =~ s/&/&amp;/g;
1673 $rest =~ s/</&lt;/g;
1674 $rest =~ s/>/&gt;/g;
1675 $rest =~ s/"/&quot;/g;
1676 return $rest;
1677}
1678
1679
1680#
39e571d4
LM
1681# dosify - convert filenames to 8.3
1682#
1683sub dosify {
1684 my($str) = @_;
fe4c6be1 1685 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4
LM
1686 if ($Is83) {
1687 $str = lc $str;
1688 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1689 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1690 }
1691 return $str;
1692}
1693
1694#
d1be9408 1695# page_sect - make a URL from the text of a L<>
54310121 1696#
2a28b791
GS
1697sub page_sect($$) {
1698 my( $page, $section ) = @_;
1699 my( $linktext, $page83, $link); # work strings
1700
1701 # check if we know that this is a section in this page
1702 if (!defined $pages{$page} && defined $sections{$page}) {
1703 $section = $page;
1704 $page = "";
1705 ### print STDERR "reset page='', section=$section\n";
54310121 1706 }
1707
39e571d4
LM
1708 $page83=dosify($page);
1709 $page=$page83 if (defined $pages{$page83});
54310121 1710 if ($page eq "") {
2a28b791 1711 $link = "#" . htmlify( $section );
350ccacd 1712 } elsif ( $page =~ /::/ ) {
350ccacd 1713 $page =~ s,::,/,g;
29f227c9
BS
1714 # Search page cache for an entry keyed under the html page name,
1715 # then look to see what directory that page might be in. NOTE:
1716 # this will only find one page. A better solution might be to produce
1717 # an intermediate page that is an index to all such pages.
1718 my $page_name = $page ;
fe6f1558 1719 $page_name =~ s,^.*/,,s ;
29f227c9
BS
1720 if ( defined( $pages{ $page_name } ) &&
1721 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1722 ) {
1723 $page = $1 ;
1724 }
1725 else {
1726 # NOTE: This branch assumes that all A::B pages are located in
1727 # $htmlroot/A/B.html . This is often incorrect, since they are
1728 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1729 # analyze the contents of %pages and figure out where any
1730 # cousins of A::B are, then assume that. So, if A::B isn't found,
1731 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1732 # lib/A/B.pm. This is also limited, but it's an improvement.
1733 # Maybe a hints file so that the links point to the correct places
2a28b791
GS
1734 # nonetheless?
1735
29f227c9 1736 }
350ccacd 1737 $link = "$htmlroot/$page.html";
2a28b791 1738 $link .= "#" . htmlify( $section ) if ($section);
54310121 1739 } elsif (!defined $pages{$page}) {
54310121 1740 $link = "";
54310121 1741 } else {
2a28b791
GS
1742 $section = htmlify( $section ) if $section ne "";
1743 ### print STDERR "...section=$section\n";
54310121 1744
1745 # if there is a directory by the name of the page, then assume that an
1746 # appropriate section will exist in the subdirectory
29f227c9
BS
1747# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1748 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1749 $link = "$htmlroot/$1/$section.html";
2a28b791 1750 ### print STDERR "...link=$link\n";
54310121 1751
1752 # since there is no directory by the name of the page, the section will
1753 # have to exist within a .html of the same name. thus, make sure there
1754 # is a .pod or .pm that might become that .html
1755 } else {
2a28b791
GS
1756 $section = "#$section" if $section;
1757 ### print STDERR "...section=$section\n";
1758
54310121 1759 # check if there is a .pod with the page name
1760 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1761 $link = "$htmlroot/$1.html$section";
1762 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1763 $link = "$htmlroot/$1.html$section";
1764 } else {
54310121 1765 $link = "";
54310121 1766 }
1767 }
1768 }
1769
54310121 1770 if ($link) {
29f227c9
BS
1771 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1772 # implies $htmlroot eq ''. This means that the link in question
1773 # needs a prefix of $htmldir if it begins with '/'. The test for
1774 # the initial '/' is done to avoid '#'-only links, and to allow
1775 # for other kinds of links, like file:, ftp:, etc.
1776 my $url ;
1777 if ( $htmlfileurl ne '' ) {
fe6f1558 1778 $link = "$htmldir$link" if $link =~ m{^/}s;
2a28b791
GS
1779 $url = relativize_url( $link, $htmlfileurl );
1780# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9
BS
1781 }
1782 else {
1783 $url = $link ;
1784 }
2a28b791 1785 return $url;
29f227c9 1786
54310121 1787 } else {
2a28b791 1788 return undef();
54310121 1789 }
54310121 1790}
1791
1792#
29f227c9
BS
1793# relativize_url - convert an absolute URL to one relative to a base URL.
1794# Assumes both end in a filename.
1795#
1796sub relativize_url {
1797 my ($dest,$source) = @_ ;
1798
1799 my ($dest_volume,$dest_directory,$dest_file) =
1800 File::Spec::Unix->splitpath( $dest ) ;
1801 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1802
1803 my ($source_volume,$source_directory,$source_file) =
1804 File::Spec::Unix->splitpath( $source ) ;
1805 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1806
1807 my $rel_path = '' ;
1808 if ( $dest ne '' ) {
1809 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1810 }
1811
1812 if ( $rel_path ne '' &&
1813 substr( $rel_path, -1 ) ne '/' &&
1814 substr( $dest_file, 0, 1 ) ne '#'
1815 ) {
1816 $rel_path .= "/$dest_file" ;
1817 }
1818 else {
1819 $rel_path .= "$dest_file" ;
1820 }
1821
1822 return $rel_path ;
1823}
1824
54310121 1825
1826#
2a28b791 1827# coderef - make URL from the text of a C<>
54310121 1828#
2a28b791
GS
1829sub coderef($$){
1830 my( $page, $item ) = @_;
1831 my( $url );
1832
1833 my $fid = fragment_id( $item );
2a28b791
GS
1834 if( defined( $page ) ){
1835 # we have been given a $page...
1836 $page =~ s{::}{/}g;
1837
1838 # Do we take it? Item could be a section!
228a48a5 1839 my $base = $items{$fid} || "";
2a28b791
GS
1840 $base =~ s{[^/]*/}{};
1841 if( $base ne "$page.html" ){
1842 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1843 $page = undef();
1844 }
54310121 1845
2a28b791
GS
1846 } else {
1847 # no page - local items precede cached items
67398a75
GS
1848 if( defined( $fid ) ){
1849 if( exists $local_items{$fid} ){
1850 $page = $local_items{$fid};
1851 } else {
1852 $page = $items{$fid};
1853 }
2a28b791
GS
1854 }
1855 }
54310121 1856
1857 # if there was a pod file that we found earlier with an appropriate
1858 # =item directive, then create a link to that page.
2a28b791
GS
1859 if( defined $page ){
1860 if( $page ){
228a48a5 1861 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1862 $page = $1 . '.html';
29f227c9 1863 }
2a28b791 1864 my $link = "$htmlroot/$page#item_$fid";
54310121 1865
2a28b791
GS
1866 # Here, we take advantage of the knowledge that $htmlfileurl
1867 # ne '' implies $htmlroot eq ''.
1868 if ( $htmlfileurl ne '' ) {
1869 $link = "$htmldir$link" ;
1870 $url = relativize_url( $link, $htmlfileurl ) ;
1871 } else {
1872 $url = $link ;
1873 }
1874 } else {
1875 $url = "#item_" . $fid;
1876 }
54310121 1877
2a28b791
GS
1878 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1879 }
1880 return( $url, $fid );
54310121 1881}
1882
54310121 1883
1884
1885#
29f227c9
BS
1886# Adapted from Nick Ing-Simmons' PodToHtml package.
1887sub relative_url {
1888 my $source_file = shift ;
1889 my $destination_file = shift;
1890
1891 my $source = URI::file->new_abs($source_file);
1892 my $uo = URI::file->new($destination_file,$source)->abs;
1893 return $uo->rel->as_string;
1894}
1895
1896
1897#
54310121 1898# finish_list - finish off any pending HTML lists. this should be called
1899# after the entire pod file has been read and converted.
1900#
1901sub finish_list {
7b8d334a 1902 while ($listlevel > 0) {
54310121 1903 print HTML "</DL>\n";
1904 $listlevel--;
1905 }
1906}
1907
1908#
1909# htmlify - converts a pod section specification to a suitable section
2a28b791
GS
1910# specification for HTML. Note that we keep spaces and special characters
1911# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1912#
1913sub htmlify {
2a28b791
GS
1914 my( $heading) = @_;
1915 $heading =~ s/(\s+)/ /g;
1916 $heading =~ s/\s+\Z//;
1917 $heading =~ s/\A\s+//;
1918 # The hyphen is a disgrace to the English language.
1919 $heading =~ s/[-"?]//g;
1920 $heading = lc( $heading );
1921 return $heading;
1922}
54310121 1923
2a28b791
GS
1924#
1925# depod - convert text by eliminating all interior sequences
1926# Note: can be called with copy or modify semantics
1927#
1928my %E2c;
67398a75
GS
1929$E2c{lt} = '<';
1930$E2c{gt} = '>';
1931$E2c{sol} = '/';
2a28b791 1932$E2c{verbar} = '|';
67398a75 1933$E2c{amp} = '&'; # in Tk's pods
2a28b791 1934
c68ea5d1 1935sub depod1($;$$);
7ba65c74 1936
2a28b791
GS
1937sub depod($){
1938 my $string;
1939 if( ref( $_[0] ) ){
1940 $string = ${$_[0]};
1941 ${$_[0]} = depod1( \$string );
1942 } else {
1943 $string = $_[0];
1944 depod1( \$string );
1945 }
1946}
54310121 1947
c68ea5d1
GS
1948sub depod1($;$$){
1949 my( $rstr, $func, $closing ) = @_;
2a28b791 1950 my $res = '';
228a48a5 1951 return $res unless defined $$rstr;
2a28b791
GS
1952 if( ! defined( $func ) ){
1953 # skip to next begin of an interior sequence
c68ea5d1 1954 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
2a28b791 1955 # recurse into its text
c68ea5d1 1956 $res .= $1 . depod1( $rstr, $2, closing $3);
2a28b791
GS
1957 }
1958 $res .= $$rstr;
1959 } elsif( $func eq 'E' ){
1960 # E<x> - convert to character
be3174d2 1961 $$rstr =~ s/^([^>]*)>//;
228a48a5 1962 $res .= $E2c{$1} || "";
2a28b791
GS
1963 } elsif( $func eq 'X' ){
1964 # X<> - ignore
1965 $$rstr =~ s/^[^>]*>//;
1966 } elsif( $func eq 'Z' ){
1967 # Z<> - empty
1968 $$rstr =~ s/^>//;
1969 } else {
1970 # all others: either recurse into new function or
1971 # terminate at closing angle bracket
c68ea5d1
GS
1972 my $term = pattern $closing;
1973 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
2a28b791 1974 $res .= $1;
c68ea5d1
GS
1975 last unless $3;
1976 $res .= depod1( $rstr, $3, closing $4 );
2a28b791
GS
1977 }
1978 ## If we're here and $2 ne '>': undelimited interior sequence.
1979 ## Ignored, as this is called without proper indication of where we are.
1980 ## Rely on process_text to produce diagnostics.
1981 }
1982 return $res;
1983}
54310121 1984
2a28b791
GS
1985#
1986# fragment_id - construct a fragment identifier from:
1987# a) =item text
1988# b) contents of C<...>
1989#
1990my @hc;
1991sub fragment_id {
1992 my $text = shift();
1993 $text =~ s/\s+\Z//s;
1994 if( $text ){
1995 # a method or function?
1996 return $1 if $text =~ /(\w+)\s*\(/;
1997 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1998
1999 # a variable name?
2000 return $1 if $text =~ /^([$@%*]\S+)/;
2001
2002 # some pattern matching operator?
2003 return $1 if $text =~ m|^(\w+/).*/\w*$|;
2004
2005 # fancy stuff... like "do { }"
2006 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2007
2008 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2009 # and some funnies with ... Module ...
2010 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
2011 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2012
2013 # text? normalize!
2014 $text =~ s/\s+/_/sg;
2015 $text =~ s{(\W)}{
2016 defined( $hc[ord($1)] ) ? $hc[ord($1)]
2017 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2018 $text = substr( $text, 0, 50 );
2019 } else {
2020 return undef();
2021 }
54310121 2022}
2023
2a28b791
GS
2024#
2025# make_URL_href - generate HTML href from URL
2026# Special treatment for CGI queries.
2027#
2028sub make_URL_href($){
2029 my( $url ) = @_;
2030 if( $url !~
228a48a5 2031 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791
GS
2032 $url = "<A HREF=\"$url\">$url</A>";
2033 }
2034 return $url;
54310121 2035}
2036
20371;