This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
typo in change#5408
[perl5.git] / lib / Pod / Html.pm
CommitLineData
54310121
PP
1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
2a28b791 8$VERSION = 1.03;
54310121
PP
9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288
FK
15use locale; # make \w work right in non-ASCII lands
16
54310121
PP
17use strict;
18
7b8d334a
GS
19use Config;
20
54310121
PP
21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121
PP
24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121
PP
28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3
BS
48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9
BS
53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121
PP
57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
34db337b
JD
140=item css
141
142 --css=stylesheet
143
144Specify the URL of a cascading style sheet.
145
54310121
PP
146=item verbose
147
148 --verbose
149
150Display progress messages.
151
34db337b
JD
152=item quiet
153
154 --quiet
155
156Don't display I<mostly harmless> warning messages.
157
54310121
PP
158=back
159
160=head1 EXAMPLE
161
162 pod2html("pod2html",
163 "--podpath=lib:ext:pod:vms",
164 "--podroot=/usr/src/perl",
165 "--htmlroot=/perl/nmanual",
166 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
167 "--recurse",
168 "--infile=foo.pod",
169 "--outfile=/perl/nmanual/foo.html");
170
34db337b
JD
171=head1 ENVIRONMENT
172
173Uses $Config{pod2html} to setup default options.
174
54310121
PP
175=head1 AUTHOR
176
177Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
178
54310121
PP
179=head1 SEE ALSO
180
181L<perlpod>
182
183=head1 COPYRIGHT
184
185This program is distributed under the Artistic License.
186
187=cut
188
1f763251
GS
189my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
190my $dircache = "pod2htmd$cache_ext";
191my $itemcache = "pod2htmi$cache_ext";
54310121
PP
192
193my @begin_stack = (); # begin/end stack
194
29f227c9
BS
195my @libpods = (); # files to search for links from C<> directives
196my $htmlroot = "/"; # http-server base directory from which all
54310121 197 # relative paths in $podpath stem.
5a039dd3
BS
198my $htmldir = ""; # The directory to which the html pages
199 # will (eventually) be written.
54310121 200my $htmlfile = ""; # write to stdout by default
29f227c9 201my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3
BS
202 # refer to this file. This is only used
203 # to make relative urls that point to
204 # other files.
54310121
PP
205my $podfile = ""; # read from stdin by default
206my @podpath = (); # list of directories containing library pods.
207my $podroot = "."; # filesystem base directory from which all
208 # relative paths in $podpath stem.
34db337b 209my $css = ''; # Cascading style sheet
54310121 210my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 211my $quiet = 0; # not quiet by default
54310121
PP
212my $verbose = 0; # not verbose by default
213my $doindex = 1; # non-zero if we should generate an index
214my $listlevel = 0; # current list depth
2a28b791
GS
215my @listend = (); # the text to use to end the list.
216my $after_lpar = 0; # set to true after a par in an =item
54310121
PP
217my $ignore = 1; # whether or not to format text. we don't
218 # format text until we hit our first pod
219 # directive.
220
221my %items_named = (); # for the multiples of the same item in perlfunc
222my @items_seen = ();
223my $netscape = 0; # whether or not to use netscape directives.
224my $title; # title to give the pod(s)
34db337b 225my $header = 0; # produce block header/footer
54310121
PP
226my $top = 1; # true if we are at the top of the doc. used
227 # to prevent the first <HR> directive.
228my $paragraph; # which paragraph we're processing (used
229 # for error messages)
2a28b791 230my $ptQuote = 0; # status of double-quote conversion
54310121
PP
231my %pages = (); # associative array used to find the location
232 # of pages referenced by L<> links.
233my %sections = (); # sections within this page
234my %items = (); # associative array used to find the location
235 # of =item directives referenced by C<> links
2a28b791 236my %local_items = (); # local items - avoid destruction of %items
39e571d4
ML
237my $Is83; # is dos with short filenames (8.3)
238
54310121 239sub init_globals {
04f720db
JD
240$dircache = "pod2htmd$cache_ext";
241$itemcache = "pod2htmi$cache_ext";
54310121
PP
242
243@begin_stack = (); # begin/end stack
244
245@libpods = (); # files to search for links from C<> directives
246$htmlroot = "/"; # http-server base directory from which all
247 # relative paths in $podpath stem.
67398a75
GS
248$htmldir = ""; # The directory to which the html pages
249 # will (eventually) be written.
54310121
PP
250$htmlfile = ""; # write to stdout by default
251$podfile = ""; # read from stdin by default
252@podpath = (); # list of directories containing library pods.
253$podroot = "."; # filesystem base directory from which all
254 # relative paths in $podpath stem.
34db337b 255$css = ''; # Cascading style sheet
54310121 256$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 257$quiet = 0; # not quiet by default
54310121
PP
258$verbose = 0; # not verbose by default
259$doindex = 1; # non-zero if we should generate an index
260$listlevel = 0; # current list depth
2a28b791
GS
261@listend = (); # the text to use to end the list.
262$after_lpar = 0; # set to true after a par in an =item
54310121
PP
263$ignore = 1; # whether or not to format text. we don't
264 # format text until we hit our first pod
265 # directive.
266
267@items_seen = ();
268%items_named = ();
269$netscape = 0; # whether or not to use netscape directives.
34db337b 270$header = 0; # produce block header/footer
54310121
PP
271$title = ''; # title to give the pod(s)
272$top = 1; # true if we are at the top of the doc. used
273 # to prevent the first <HR> directive.
274$paragraph = ''; # which paragraph we're processing (used
275 # for error messages)
54310121 276%sections = (); # sections within this page
3e3baf6d
TB
277
278# These are not reinitialised here but are kept as a cache.
279# See get_cache and related cache management code.
280#%pages = (); # associative array used to find the location
281 # of pages referenced by L<> links.
282#%items = (); # associative array used to find the location
54310121 283 # of =item directives referenced by C<> links
2a28b791 284%local_items = ();
39e571d4 285$Is83=$^O eq 'dos';
54310121
PP
286}
287
2a28b791
GS
288#
289# clean_data: global clean-up of pod data
290#
291sub clean_data($){
292 my( $dataref ) = @_;
293 my $i;
294 for( $i = 0; $i <= $#$dataref; $i++ ){
295 ${$dataref}[$i] =~ s/\s+\Z//;
296
297 # have a look for all-space lines
298 if( ${$dataref}[$i] =~ /^\s+$/m ){
299 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
300 splice( @$dataref, $i, 1, @chunks );
301 }
302 }
303}
304
305
54310121
PP
306sub pod2html {
307 local(@ARGV) = @_;
308 local($/);
309 local $_;
310
311 init_globals();
312
39e571d4
ML
313 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
314
54310121 315 # cache of %pages and %items from last time we ran pod2html
54310121
PP
316
317 #undef $opt_help if defined $opt_help;
318
319 # parse the command-line parameters
320 parse_command_line();
321
322 # set some variables to their default values if necessary
323 local *POD;
324 unless (@ARGV && $ARGV[0]) {
325 $podfile = "-" unless $podfile; # stdin
326 open(POD, "<$podfile")
327 || die "$0: cannot open $podfile file for input: $!\n";
328 } else {
329 $podfile = $ARGV[0]; # XXX: might be more filenames
330 *POD = *ARGV;
331 }
332 $htmlfile = "-" unless $htmlfile; # stdout
333 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9
BS
334 $htmldir =~ s#/$## ; # so we don't get a //
335 if ( $htmlroot eq ''
336 && defined( $htmldir )
337 && $htmldir ne ''
338 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
339 )
5a039dd3 340 {
29f227c9
BS
341 # Set the 'base' url for this file, so that we can use it
342 # as the location from which to calculate relative links
343 # to other files. If this is '', then absolute links will
344 # be used throughout.
345 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 346 }
54310121
PP
347
348 # read the pod a paragraph at a time
349 warn "Scanning for sections in input file(s)\n" if $verbose;
350 $/ = "";
351 my @poddata = <POD>;
352 close(POD);
2a28b791 353 clean_data( \@poddata );
54310121
PP
354
355 # scan the pod for =head[1-6] directives and build an index
356 my $index = scan_headings(\%sections, @poddata);
357
3e3baf6d 358 unless($index) {
31e56455 359 warn "No headings in $podfile\n" if $verbose;
3e3baf6d
TB
360 }
361
54310121
PP
362 # open the output file
363 open(HTML, ">$htmlfile")
364 || die "$0: cannot open $htmlfile file for output: $!\n";
365
d011ffae
GS
366 # put a title in the HTML file if one wasn't specified
367 if ($title eq '') {
368 TITLE_SEARCH: {
369 for (my $i = 0; $i < @poddata; $i++) {
370 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
371 for my $para ( @poddata[$i, $i+1] ) {
372 last TITLE_SEARCH
373 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
374 }
375 }
54310121 376
d011ffae
GS
377 }
378 }
379 }
3e3baf6d
TB
380 if (!$title and $podfile =~ /\.pod$/) {
381 # probably a split pod so take first =head[12] as title
382 for (my $i = 0; $i < @poddata; $i++) {
383 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
384 }
385 warn "adopted '$title' as title for $podfile\n"
386 if $verbose and $title;
387 }
7b8d334a
GS
388 if ($title) {
389 $title =~ s/\s*\(.*\)//;
390 } else {
34db337b 391 warn "$0: no title for $podfile" unless $quiet;
54310121
PP
392 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
393 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 394 warn "using $title" if $verbose;
54310121 395 }
34db337b
JD
396 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
397 $csslink =~ s,\\,/,g;
398 $csslink =~ s,(/.):,$1|,;
399
400 my $block = $header ? <<END_OF_BLOCK : '';
401<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
402<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
403<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
404</TD></TR>
405</TABLE>
406END_OF_BLOCK
407
54310121 408 print HTML <<END_OF_HEAD;
7b8d334a
GS
409<HTML>
410<HEAD>
34db337b 411<TITLE>$title</TITLE>$csslink
7b8d334a
GS
412<LINK REV="made" HREF="mailto:$Config{perladmin}">
413</HEAD>
54310121 414
7b8d334a 415<BODY>
34db337b 416$block
54310121
PP
417END_OF_HEAD
418
3e3baf6d
TB
419 # load/reload/validate/cache %pages and %items
420 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121
PP
421
422 # scan the pod for =item directives
2a28b791 423 scan_items( \%local_items, "", @poddata);
54310121
PP
424
425 # put an index at the top of the file. note, if $doindex is 0 we
426 # still generate an index, but surround it with an html comment.
427 # that way some other program can extract it if desired.
428 $index =~ s/--+/-/g;
2a28b791 429 print HTML "<A NAME=\"__index__\"></A>\n";
54310121
PP
430 print HTML "<!-- INDEX BEGIN -->\n";
431 print HTML "<!--\n" unless $doindex;
432 print HTML $index;
433 print HTML "-->\n" unless $doindex;
434 print HTML "<!-- INDEX END -->\n\n";
31e56455 435 print HTML "<HR>\n" if $doindex and $index;
54310121
PP
436
437 # now convert this file
2a28b791
GS
438 my $after_item; # set to true after an =item
439 warn "Converting input file $podfile\n" if $verbose;
440 foreach my $i (0..$#poddata){
441 $ptQuote = 0; # status of quote conversion
442
54310121
PP
443 $_ = $poddata[$i];
444 $paragraph = $i+1;
445 if (/^(=.*)/s) { # is it a pod directive?
446 $ignore = 0;
2a28b791 447 $after_item = 0;
54310121
PP
448 $_ = $1;
449 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
450 process_begin($1, $2);
451 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
452 process_end($1, $2);
453 } elsif (/^=cut/) { # =cut
454 process_cut();
455 } elsif (/^=pod/) { # =pod
456 process_pod();
457 } else {
458 next if @begin_stack && $begin_stack[-1] ne 'html';
459
7b8d334a 460 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791
GS
461 process_head( $1, $2, $doindex && $index );
462 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
463 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 464 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791
GS
465 process_item( $1 );
466 $after_item = 1;
54310121
PP
467 } elsif (/^=over\s*(.*)/) { # =over N
468 process_over();
469 } elsif (/^=back/) { # =back
470 process_back();
a45bd81d 471 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121
PP
472 process_for($1,$2);
473 } else {
474 /^=(\S*)\s*/;
475 warn "$0: $podfile: unknown pod directive '$1' in "
476 . "paragraph $paragraph. ignoring.\n";
477 }
478 }
479 $top = 0;
480 }
481 else {
482 next if $ignore;
483 next if @begin_stack && $begin_stack[-1] ne 'html';
484 my $text = $_;
2a28b791
GS
485 if( $text =~ /\A\s+/ ){
486 process_pre( \$text );
487 print HTML "<PRE>\n$text</PRE>\n";
488
489 } else {
490 process_text( \$text );
491
492 # experimental: check for a paragraph where all lines
493 # have some ...\t...\t...\n pattern
494 if( $text =~ /\t/ ){
495 my @lines = split( "\n", $text );
496 if( @lines > 1 ){
497 my $all = 2;
498 foreach my $line ( @lines ){
499 if( $line =~ /\S/ && $line !~ /\t/ ){
500 $all--;
501 last if $all == 0;
502 }
503 }
504 if( $all > 0 ){
505 $text =~ s/\t+/<TD>/g;
506 $text =~ s/^/<TR><TD>/gm;
507 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
508 $text . '</TABLE>';
509 }
510 }
511 }
512 ## end of experimental
513
514 if( $after_item ){
515 print HTML "$text\n";
516 $after_lpar = 1;
517 } else {
518 print HTML "<P>$text</P>\n";
519 }
520 }
521 $after_item = 0;
54310121
PP
522 }
523 }
524
525 # finish off any pending directives
526 finish_list();
2a28b791
GS
527
528 # link to page index
529 print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
530 if $doindex and $index;
531
54310121 532 print HTML <<END_OF_TAIL;
34db337b 533$block
7b8d334a 534</BODY>
54310121 535
7b8d334a 536</HTML>
54310121
PP
537END_OF_TAIL
538
539 # close the html file
540 close(HTML);
541
542 warn "Finished\n" if $verbose;
543}
544
545##############################################################################
546
547my $usage; # see below
548sub usage {
549 my $podfile = shift;
550 warn "$0: $podfile: @_\n" if @_;
551 die $usage;
552}
553
554$usage =<<END_OF_USAGE;
555Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
556 --podpath=<name>:...:<name> --podroot=<name>
557 --libpods=<name>:...:<name> --recurse --verbose --index
558 --netscape --norecurse --noindex
559
560 --flush - flushes the item and directory caches.
561 --help - prints this message.
562 --htmlroot - http-server base directory from which all relative paths
563 in podpath stem (default is /).
564 --index - generate an index at the top of the resulting html
565 (default).
566 --infile - filename for the pod to convert (input taken from stdin
567 by default).
568 --libpods - colon-separated list of pages to search for =item pod
569 directives in as targets of C<> and implicit links (empty
570 by default). note, these are not filenames, but rather
571 page names like those that appear in L<> links.
572 --netscape - will use netscape html directives when applicable.
573 --nonetscape - will not use netscape directives (default).
574 --outfile - filename for the resulting html file (output sent to
575 stdout by default).
576 --podpath - colon-separated list of directories containing library
577 pods. empty by default.
578 --podroot - filesystem base directory from which all relative paths
579 in podpath stem (default is .).
580 --noindex - don't generate an index at the top of the resulting html.
581 --norecurse - don't recurse on those subdirectories listed in podpath.
582 --recurse - recurse on those subdirectories listed in podpath
583 (default behavior).
584 --title - title that will appear in resulting html file.
34db337b
JD
585 --header - produce block header/footer
586 --css - stylesheet URL
54310121 587 --verbose - self-explanatory
34db337b 588 --quiet - supress some benign warning messages
54310121
PP
589
590END_OF_USAGE
591
592sub parse_command_line {
2a28b791 593 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
34db337b 594 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 595 my $result = GetOptions(
29f227c9
BS
596 'flush' => \$opt_flush,
597 'help' => \$opt_help,
598 'htmldir=s' => \$opt_htmldir,
54310121 599 'htmlroot=s' => \$opt_htmlroot,
29f227c9 600 'index!' => \$opt_index,
54310121
PP
601 'infile=s' => \$opt_infile,
602 'libpods=s' => \$opt_libpods,
603 'netscape!' => \$opt_netscape,
604 'outfile=s' => \$opt_outfile,
605 'podpath=s' => \$opt_podpath,
606 'podroot=s' => \$opt_podroot,
54310121
PP
607 'recurse!' => \$opt_recurse,
608 'title=s' => \$opt_title,
34db337b
JD
609 'header' => \$opt_header,
610 'css=s' => \$opt_css,
54310121 611 'verbose' => \$opt_verbose,
34db337b 612 'quiet' => \$opt_quiet,
54310121
PP
613 );
614 usage("-", "invalid parameters") if not $result;
615
616 usage("-") if defined $opt_help; # see if the user asked for help
617 $opt_help = ""; # just to make -w shut-up.
618
619 $podfile = $opt_infile if defined $opt_infile;
620 $htmlfile = $opt_outfile if defined $opt_outfile;
67398a75 621 $htmldir = $opt_htmldir if defined $opt_htmldir;
54310121
PP
622
623 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
624 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
625
626 warn "Flushing item and directory caches\n"
627 if $opt_verbose && defined $opt_flush;
628 unlink($dircache, $itemcache) if defined $opt_flush;
629
630 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
631 $podroot = $opt_podroot if defined $opt_podroot;
632
633 $doindex = $opt_index if defined $opt_index;
634 $recurse = $opt_recurse if defined $opt_recurse;
635 $title = $opt_title if defined $opt_title;
34db337b
JD
636 $header = defined $opt_header ? 1 : 0;
637 $css = $opt_css if defined $opt_css;
54310121 638 $verbose = defined $opt_verbose ? 1 : 0;
34db337b 639 $quiet = defined $opt_quiet ? 1 : 0;
54310121
PP
640 $netscape = $opt_netscape if defined $opt_netscape;
641}
642
3e3baf6d
TB
643
644my $saved_cache_key;
645
646sub get_cache {
647 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
648 my @cache_key_args = @_;
649
650 # A first-level cache:
651 # Don't bother reading the cache files if they still apply
652 # and haven't changed since we last read them.
653
654 my $this_cache_key = cache_key(@cache_key_args);
655
656 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
657
658 # load the cache of %pages and %items if possible. $tests will be
659 # non-zero if successful.
660 my $tests = 0;
661 if (-f $dircache && -f $itemcache) {
662 warn "scanning for item cache\n" if $verbose;
663 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
664 }
665
666 # if we didn't succeed in loading the cache then we must (re)build
667 # %pages and %items.
668 if (!$tests) {
669 warn "scanning directories in pod-path\n" if $verbose;
670 scan_podpath($podroot, $recurse, 0);
671 }
672 $saved_cache_key = cache_key(@cache_key_args);
673}
674
675sub cache_key {
676 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
677 return join('!', $dircache, $itemcache, $recurse,
29f227c9 678 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d
TB
679}
680
54310121 681#
3e3baf6d 682# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121
PP
683# are valid caches of %pages and %items. if they are valid then it loads
684# them and returns a non-zero value.
685#
3e3baf6d 686sub load_cache {
54310121
PP
687 my($dircache, $itemcache, $podpath, $podroot) = @_;
688 my($tests);
689 local $_;
690
691 $tests = 0;
692
693 open(CACHE, "<$itemcache") ||
694 die "$0: error opening $itemcache for reading: $!\n";
695 $/ = "\n";
696
697 # is it the same podpath?
698 $_ = <CACHE>;
699 chomp($_);
3e3baf6d 700 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
701
702 # is it the same podroot?
703 $_ = <CACHE>;
704 chomp($_);
705 $tests++ if ($podroot eq $_);
706
707 # load the cache if its good
708 if ($tests != 2) {
709 close(CACHE);
54310121
PP
710 return 0;
711 }
712
713 warn "loading item cache\n" if $verbose;
714 while (<CACHE>) {
715 /(.*?) (.*)$/;
716 $items{$1} = $2;
717 }
718 close(CACHE);
719
720 warn "scanning for directory cache\n" if $verbose;
721 open(CACHE, "<$dircache") ||
722 die "$0: error opening $dircache for reading: $!\n";
723 $/ = "\n";
724 $tests = 0;
725
726 # is it the same podpath?
727 $_ = <CACHE>;
728 chomp($_);
3e3baf6d 729 $tests++ if (join(":", @$podpath) eq $_);
54310121
PP
730
731 # is it the same podroot?
732 $_ = <CACHE>;
733 chomp($_);
734 $tests++ if ($podroot eq $_);
735
736 # load the cache if its good
737 if ($tests != 2) {
738 close(CACHE);
54310121
PP
739 return 0;
740 }
741
742 warn "loading directory cache\n" if $verbose;
743 while (<CACHE>) {
744 /(.*?) (.*)$/;
745 $pages{$1} = $2;
746 }
747
748 close(CACHE);
749
750 return 1;
751}
752
753#
754# scan_podpath - scans the directories specified in @podpath for directories,
755# .pod files, and .pm files. it also scans the pod files specified in
756# @libpods for =item directives.
757#
758sub scan_podpath {
3e3baf6d 759 my($podroot, $recurse, $append) = @_;
54310121
PP
760 my($pwd, $dir);
761 my($libpod, $dirname, $pod, @files, @poddata);
762
3e3baf6d
TB
763 unless($append) {
764 %items = ();
765 %pages = ();
766 }
767
54310121
PP
768 # scan each directory listed in @podpath
769 $pwd = getcwd();
770 chdir($podroot)
771 || die "$0: error changing to directory $podroot: $!\n";
772 foreach $dir (@podpath) {
773 scan_dir($dir, $recurse);
774 }
775
776 # scan the pods listed in @libpods for =item directives
777 foreach $libpod (@libpods) {
778 # if the page isn't defined then we won't know where to find it
779 # on the system.
780 next unless defined $pages{$libpod} && $pages{$libpod};
781
782 # if there is a directory then use the .pod and .pm files within it.
29f227c9
BS
783 # NOTE: Only finds the first so-named directory in the tree.
784# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
785 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121
PP
786 # find all the .pod and .pm files within the directory
787 $dirname = $1;
788 opendir(DIR, $dirname) ||
789 die "$0: error opening directory $dirname: $!\n";
790 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
791 closedir(DIR);
792
793 # scan each .pod and .pm file for =item directives
794 foreach $pod (@files) {
795 open(POD, "<$dirname/$pod") ||
796 die "$0: error opening $dirname/$pod for input: $!\n";
797 @poddata = <POD>;
798 close(POD);
2a28b791 799 clean_data( \@poddata );
54310121 800
2a28b791 801 scan_items( \%items, "$dirname/$pod", @poddata);
54310121
PP
802 }
803
804 # use the names of files as =item directives too.
2a28b791
GS
805### Don't think this should be done this way - confuses issues.(WL)
806### foreach $pod (@files) {
807### $pod =~ /^(.*)(\.pod|\.pm)$/;
808### $items{$1} = "$dirname/$1.html" if $1;
809### }
54310121
PP
810 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
811 $pages{$libpod} =~ /([^:]*\.pm):/) {
812 # scan the .pod or .pm file for =item directives
813 $pod = $1;
814 open(POD, "<$pod") ||
815 die "$0: error opening $pod for input: $!\n";
816 @poddata = <POD>;
817 close(POD);
2a28b791 818 clean_data( \@poddata );
54310121 819
2a28b791 820 scan_items( \%items, "$pod", @poddata);
54310121
PP
821 } else {
822 warn "$0: shouldn't be here (line ".__LINE__."\n";
823 }
824 }
825 @poddata = (); # clean-up a bit
826
827 chdir($pwd)
828 || die "$0: error changing to directory $pwd: $!\n";
829
830 # cache the item list for later use
831 warn "caching items for later use\n" if $verbose;
832 open(CACHE, ">$itemcache") ||
833 die "$0: error open $itemcache for writing: $!\n";
834
835 print CACHE join(":", @podpath) . "\n$podroot\n";
836 foreach my $key (keys %items) {
837 print CACHE "$key $items{$key}\n";
838 }
839
840 close(CACHE);
841
842 # cache the directory list for later use
843 warn "caching directories for later use\n" if $verbose;
844 open(CACHE, ">$dircache") ||
845 die "$0: error open $dircache for writing: $!\n";
846
847 print CACHE join(":", @podpath) . "\n$podroot\n";
848 foreach my $key (keys %pages) {
849 print CACHE "$key $pages{$key}\n";
850 }
851
852 close(CACHE);
853}
854
855#
856# scan_dir - scans the directory specified in $dir for subdirectories, .pod
857# files, and .pm files. notes those that it finds. this information will
858# be used later in order to figure out where the pages specified in L<>
859# links are on the filesystem.
860#
861sub scan_dir {
862 my($dir, $recurse) = @_;
863 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
864 local $_;
865
866 @subdirs = ();
867 @pods = ();
868
869 opendir(DIR, $dir) ||
870 die "$0: error opening directory $dir: $!\n";
871 while (defined($_ = readdir(DIR))) {
872 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
873 $pages{$_} = "" unless defined $pages{$_};
874 $pages{$_} .= "$dir/$_:";
875 push(@subdirs, $_);
876 } elsif (/\.pod$/) { # .pod
877 s/\.pod$//;
878 $pages{$_} = "" unless defined $pages{$_};
879 $pages{$_} .= "$dir/$_.pod:";
880 push(@pods, "$dir/$_.pod");
881 } elsif (/\.pm$/) { # .pm
882 s/\.pm$//;
883 $pages{$_} = "" unless defined $pages{$_};
884 $pages{$_} .= "$dir/$_.pm:";
885 push(@pods, "$dir/$_.pm");
886 }
887 }
888 closedir(DIR);
889
890 # recurse on the subdirectories if necessary
891 if ($recurse) {
892 foreach my $subdir (@subdirs) {
893 scan_dir("$dir/$subdir", $recurse);
894 }
895 }
896}
897
898#
899# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
900# build an index.
901#
902sub scan_headings {
903 my($sections, @data) = @_;
2a28b791 904 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 905
be173d55
UC
906 # here we need local $ignore = 0;
907 # unfortunately, we can't have it, because $ignore is lexical
908 $ignore = 0;
909
54310121
PP
910 $listdepth = 0;
911 $index = "";
912
913 # scan for =head directives, note their name, and build an index
914 # pointing to each of them.
915 foreach my $line (@data) {
bb9460ed 916 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791
GS
917 ($tag, $which_head, $otitle) = ($1,$2,$3);
918
919 my $title = depod( $otitle );
920 my $name = htmlify( $title );
921 $$sections{$name} = 1;
922 $title = process_text( \$otitle );
54310121 923
102c538a
DS
924 while ($which_head != $listdepth) {
925 if ($which_head > $listdepth) {
926 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
927 $listdepth++;
928 } elsif ($which_head < $listdepth) {
929 $listdepth--;
930 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
931 }
54310121 932 }
54310121
PP
933
934 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791
GS
935 "<A HREF=\"#" . $name . "\">" .
936 $title . "</A></LI>";
54310121
PP
937 }
938 }
939
940 # finish off the lists
941 while ($listdepth--) {
942 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
943 }
944
945 # get rid of bogus lists
946 $index =~ s,\t*<UL>\s*</UL>\n,,g;
947
bb9460ed 948 $ignore = 1; # restore old value;
be173d55 949
54310121
PP
950 return $index;
951}
952
953#
954# scan_items - scans the pod specified by $pod for =item directives. we
955# will use this information later on in resolving C<> links.
956#
957sub scan_items {
2a28b791 958 my( $itemref, $pod, @poddata ) = @_;
54310121
PP
959 my($i, $item);
960 local $_;
961
962 $pod =~ s/\.pod$//;
963 $pod .= ".html" if $pod;
964
965 foreach $i (0..$#poddata) {
2a28b791
GS
966 my $txt = depod( $poddata[$i] );
967
968 # figure out what kind of item it is.
969 # Build string for referencing this item.
970 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
971 next unless $1;
972 $item = $1;
973 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
974 $item = $1;
975 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
976 $item = $1;
977 } else {
978 next;
54310121 979 }
2a28b791
GS
980 my $fid = fragment_id( $item );
981 $$itemref{$fid} = "$pod" if $fid;
54310121
PP
982 }
983}
984
985#
986# process_head - convert a pod head[1-6] tag and convert it to HTML format.
987#
988sub process_head {
2a28b791 989 my($tag, $heading, $hasindex) = @_;
54310121
PP
990
991 # figure out the level of the =head
992 $tag =~ /head([1-6])/;
993 my $level = $1;
994
2a28b791
GS
995 if( $listlevel ){
996 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
997 while( $listlevel ){
998 process_back();
999 }
1000 }
1001
1002 print HTML "<P>\n";
1003 if( $level == 1 && ! $top ){
1004 print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1005 if $hasindex;
1006 print HTML "<HR>\n"
1007 }
1008
1009 my $name = htmlify( depod( $heading ) );
1010 my $convert = process_text( \$heading );
1011 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121
PP
1012}
1013
2a28b791 1014
54310121 1015#
2a28b791
GS
1016# emit_item_tag - print an =item's text
1017# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1018#
2a28b791
GS
1019my $EmittedItem;
1020
1021sub emit_item_tag($$$){
1022 my( $otext, $text, $compact ) = @_;
1023 my $item = fragment_id( $text );
54310121 1024
2a28b791
GS
1025 $EmittedItem = $item;
1026 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1027
2a28b791
GS
1028 print HTML '<STRONG>';
1029 if ($items_named{$item}++) {
1030 print HTML process_text( \$otext );
1031 } else {
1032 my $name = 'item_' . $item;
1033 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1034 }
1035 print HTML "</STRONG><BR>\n";
1036 undef( $EmittedItem );
1037}
1038
1039sub emit_li {
1040 my( $tag ) = @_;
1041 if( $items_seen[$listlevel]++ == 0 ){
1042 push( @listend, "</$tag>" );
1043 print HTML "<$tag>\n";
1044 }
1045 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1046}
1047
1048#
1049# process_item - convert a pod item tag and convert it to HTML format.
1050#
1051sub process_item {
1052 my( $otext ) = @_;
54310121
PP
1053
1054 # lots of documents start a list without doing an =over. this is
1055 # bad! but, the proper thing to do seems to be to just assume
1056 # they did do an =over. so warn them once and then continue.
2a28b791
GS
1057 if( $listlevel == 0 ){
1058 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1059 process_over();
1060 }
54310121 1061
2a28b791
GS
1062 # formatting: insert a paragraph if preceding item has >1 paragraph
1063 if( $after_lpar ){
1064 print HTML "<P></P>\n";
1065 $after_lpar = 0;
1066 }
54310121
PP
1067
1068 # remove formatting instructions from the text
2a28b791
GS
1069 my $text = depod( $otext );
1070
1071 # all the list variants:
1072 if( $text =~ /\A\*/ ){ # bullet
1073 emit_li( 'UL' );
1074 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1075 my $tag = $1;
1076 $otext =~ s/\A\*\s+//;
1077 emit_item_tag( $otext, $tag, 1 );
54310121
PP
1078 }
1079
2a28b791
GS
1080 } elsif( $text =~ /\A\d+/ ){ # numbered list
1081 emit_li( 'OL' );
1082 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1083 my $tag = $1;
1084 $otext =~ s/\A\d+\.?\s*//;
1085 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1086 }
54310121 1087
2a28b791
GS
1088 } else { # definition list
1089 emit_li( 'DL' );
1090 if ($text =~ /\A(.+)\Z/s ){ # should have text
1091 emit_item_tag( $otext, $text, 1 );
7b8d334a 1092 }
54310121
PP
1093 print HTML '<DD>';
1094 }
54310121
PP
1095 print HTML "\n";
1096}
1097
1098#
2a28b791 1099# process_over - process a pod over tag and start a corresponding HTML list.
54310121
PP
1100#
1101sub process_over {
1102 # start a new list
1103 $listlevel++;
2a28b791
GS
1104 push( @items_seen, 0 );
1105 $after_lpar = 0;
54310121
PP
1106}
1107
1108#
1109# process_back - process a pod back tag and convert it to HTML format.
1110#
1111sub process_back {
2a28b791
GS
1112 if( $listlevel == 0 ){
1113 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1114 return;
1115 }
54310121
PP
1116
1117 # close off the list. note, I check to see if $listend[$listlevel] is
1118 # defined because an =item directive may have never appeared and thus
1119 # $listend[$listlevel] may have never been initialized.
1120 $listlevel--;
2a28b791
GS
1121 if( defined $listend[$listlevel] ){
1122 print HTML '<P></P>' if $after_lpar;
1123 print HTML $listend[$listlevel];
1124 print HTML "\n";
1125 pop( @listend );
1126 }
1127 $after_lpar = 0;
54310121 1128
2a28b791
GS
1129 # clean up item count
1130 pop( @items_seen );
54310121
PP
1131}
1132
1133#
2a28b791 1134# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121
PP
1135#
1136sub process_cut {
1137 $ignore = 1;
1138}
1139
1140#
2a28b791
GS
1141# process_pod - process a pod pod tag, thus stop ignoring pod directives
1142# until we see a corresponding cut.
54310121
PP
1143#
1144sub process_pod {
1145 # no need to set $ignore to 0 cause the main loop did it
1146}
1147
1148#
2a28b791 1149# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1150# it out verbatim, if illustration, center it, otherwise ignore it.
54310121
PP
1151#
1152sub process_for {
1153 my($whom, $text) = @_;
1154 if ( $whom =~ /^(pod2)?html$/i) {
1155 print HTML $text;
c4d9b39d
IZ
1156 } elsif ($whom =~ /^illustration$/i) {
1157 1 while chomp $text;
1158 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1159 $text .= $ext, last if -r "$text$ext";
1160 }
1161 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1162 }
54310121
PP
1163}
1164
1165#
1166# process_begin - process a =begin pod tag. this pushes
1167# whom we're beginning on the begin stack. if there's a
1168# begin stack, we only print if it us.
1169#
1170sub process_begin {
1171 my($whom, $text) = @_;
1172 $whom = lc($whom);
1173 push (@begin_stack, $whom);
1174 if ( $whom =~ /^(pod2)?html$/) {
1175 print HTML $text if $text;
1176 }
1177}
1178
1179#
1180# process_end - process a =end pod tag. pop the
1181# begin stack. die if we're mismatched.
1182#
1183sub process_end {
1184 my($whom, $text) = @_;
1185 $whom = lc($whom);
1186 if ($begin_stack[-1] ne $whom ) {
1187 die "Unmatched begin/end at chunk $paragraph\n"
1188 }
2a28b791 1189 pop( @begin_stack );
54310121
PP
1190}
1191
1192#
2a28b791 1193# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1194#
2a28b791
GS
1195sub process_pre {
1196 my( $text ) = @_;
1197 my( $rest );
54310121
PP
1198 return if $ignore;
1199
54310121
PP
1200 $rest = $$text;
1201
2a28b791
GS
1202 # insert spaces in place of tabs
1203 $rest =~ s#.*#
be173d55
UC
1204 my $line = $&;
1205 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1206 $line;
1207 #eg;
54310121 1208
2a28b791
GS
1209 # convert some special chars to HTML escapes
1210 $rest =~ s/&/&amp;/g;
1211 $rest =~ s/</&lt;/g;
1212 $rest =~ s/>/&gt;/g;
1213 $rest =~ s/"/&quot;/g;
1214
1215 # try and create links for all occurrences of perl.* within
1216 # the preformatted text.
1217 $rest =~ s{
1218 (\s*)(perl\w+)
1219 }{
1220 if ( defined $pages{$2} ){ # is a link
1221 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1222 } elsif (defined $pages{dosify($2)}) { # is a link
1223 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1224 } else {
1225 "$1$2";
1226 }
1227 }xeg;
1228 $rest =~ s{
1229 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1230 }{
1231 my $url ;
1232 if ( $htmlfileurl ne '' ){
1233 # Here, we take advantage of the knowledge
1234 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1235 # Since $htmlroot eq '', we need to prepend $htmldir
1236 # on the fron of the link to get the absolute path
1237 # of the link's target. We check for a leading '/'
1238 # to avoid corrupting links that are #, file:, etc.
1239 my $old_url = $3 ;
1240 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1241 $url = relativize_url( "$old_url.html", $htmlfileurl );
1242 } else {
1243 $url = "$3.html" ;
1244 }
1245 "$1$url" ;
1246 }xeg;
1247
1248 # Look for embedded URLs and make them into links. We don't
1249 # relativize them since they are best left as the author intended.
1250
1251 my $urls = '(' . join ('|', qw{
54310121
PP
1252 http
1253 telnet
1254 mailto
1255 news
1256 gopher
1257 file
1258 wais
1259 ftp
1260 } )
1261 . ')';
1262
2a28b791
GS
1263 my $ltrs = '\w';
1264 my $gunk = '/#~:.?+=&%@!\-';
1265 my $punc = '.:?\-';
1266 my $any = "${ltrs}${gunk}${punc}";
54310121 1267
2a28b791 1268 $rest =~ s{
54310121
PP
1269 \b # start at word boundary
1270 ( # begin $1 {
29f227c9
BS
1271 $urls : # need resource and a colon
1272 (?!:) # Ignore File::, among others.
54310121
PP
1273 [$any] +? # followed by on or more
1274 # of any valid character, but
1275 # be conservative and take only
1276 # what you need to....
1277 ) # end $1 }
1278 (?= # look-ahead non-consumptive assertion
1279 [$punc]* # either 0 or more puntuation
1280 [^$any] # followed by a non-url char
1281 | # or else
1282 $ # then end of the string
1283 )
1284 }{<A HREF="$1">$1</A>}igox;
1285
2a28b791
GS
1286 # text should be as it is (verbatim)
1287 $$text = $rest;
1288}
54310121 1289
54310121 1290
2a28b791
GS
1291#
1292# pure text processing
1293#
1294# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1295# we don't want this to happen within IS
1296#
1297sub pure_text($){
1298 my $text = shift();
1299 process_puretext( $text, \$ptQuote, 1 );
54310121
PP
1300}
1301
2a28b791
GS
1302sub inIS_text($){
1303 my $text = shift();
1304 process_puretext( $text, \$ptQuote, 0 );
1305}
54310121
PP
1306
1307#
1308# process_puretext - process pure text (without pod-escapes) converting
1309# double-quotes and handling implicit C<> links.
1310#
1311sub process_puretext {
2a28b791 1312 my($text, $quote, $notinIS) = @_;
54310121 1313
2a28b791
GS
1314 ## Guessing at func() or [$@%&]*var references in plain text is destined
1315 ## to produce some strange looking ref's. uncomment to disable:
1316 ## $notinIS = 0;
1317
1318 my(@words, $lead, $trail);
54310121 1319
2a28b791
GS
1320 # convert double-quotes to single-quotes
1321 if( $$quote && $text =~ s/"/''/s ){
1322 $$quote = 0;
1323 }
1324 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1325 $$quote = 1 if $text =~ s/"/``/s;
54310121
PP
1326
1327 # keep track of leading and trailing white-space
2a28b791
GS
1328 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1329 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1330
2a28b791
GS
1331 # split at space/non-space boundaries
1332 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121
PP
1333
1334 # process each word individually
1335 foreach my $word (@words) {
2a28b791
GS
1336 # skip space runs
1337 next if $word =~ /^\s*$/;
54310121 1338 # see if we can infer a link
67398a75 1339 if( $notinIS && $word =~ /^(\w+)\((.*)\)\W*$/ ) {
54310121 1340 # has parenthesis so should have been a C<> ref
2a28b791
GS
1341 ## try for a pagename (perlXXX(1))?
1342 if( $2 =~ /^\d+$/ ){
1343 my $url = page_sect( $word, '' );
1344 if( defined $url ){
1345 $word = "<A HREF=\"$url\">the $word manpage</A>";
1346 next;
1347 }
1348 }
1349 $word = emit_C( $word );
1350
1351#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1352## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1353## # perl variables, should be a C<> ref
1354## $word = emit_C( $word );
1355
54310121
PP
1356 } elsif ($word =~ m,^\w+://\w,) {
1357 # looks like a URL
5a039dd3 1358 # Don't relativize it: leave it as the author intended
54310121 1359 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1360 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1361 # looks like an e-mail address
7b8d334a
GS
1362 my ($w1, $w2, $w3) = ("", $word, "");
1363 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1364 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1365 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1366 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1367 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1368 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1369 } else {
7b8d334a 1370 $word = html_escape($word) if $word =~ /["&<>]/;
54310121
PP
1371 }
1372 }
1373
2a28b791
GS
1374 # put everything back together
1375 return $lead . join( '', @words ) . $trail;
1376}
1377
54310121 1378
2a28b791
GS
1379#
1380# process_text - handles plaintext that appears in the input pod file.
1381# there may be pod commands embedded within the text so those must be
1382# converted to html commands.
1383#
7ba65c74 1384
35f2feb0
GS
1385sub process_text1($$;$$);
1386sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1387sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
7ba65c74 1388
2a28b791
GS
1389sub process_text {
1390 return if $ignore;
1391 my( $tref ) = @_;
1392 my $res = process_text1( 0, $tref );
1393 $$tref = $res;
1394}
1395
35f2feb0
GS
1396sub process_text1($$;$$){
1397 my( $lev, $rstr, $func, $closing ) = @_;
2a28b791
GS
1398 $lev++ unless defined $func;
1399 my $res = '';
1400
228a48a5 1401 $func ||= '';
2a28b791
GS
1402 if( $func eq 'B' ){
1403 # B<text> - boldface
1404 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1405
1406 } elsif( $func eq 'C' ){
1407 # C<code> - can be a ref or <CODE></CODE>
1408 # need to extract text
35f2feb0 1409 my $par = go_ahead( $rstr, 'C', $closing );
2a28b791
GS
1410
1411 ## clean-up of the link target
1412 my $text = depod( $par );
1413
1414 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1415 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1416
1417 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1418
1419 } elsif( $func eq 'E' ){
1420 # E<x> - convert to character
1421 $$rstr =~ s/^(\w+)>//;
1422 $res = "&$1;";
1423
1424 } elsif( $func eq 'F' ){
1425 # F<filename> - italizice
1426 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1427
1428 } elsif( $func eq 'I' ){
1429 # I<text> - italizice
1430 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1431
1432 } elsif( $func eq 'L' ){
1433 # L<link> - link
1434 ## L<text|cross-ref> => produce text, use cross-ref for linking
1435 ## L<cross-ref> => make text from cross-ref
1436 ## need to extract text
35f2feb0 1437 my $par = go_ahead( $rstr, 'L', $closing );
2a28b791
GS
1438
1439 # some L<>'s that shouldn't be:
1440 # a) full-blown URL's are emitted as-is
1441 if( $par =~ m{^\w+://}s ){
1442 return make_URL_href( $par );
1443 }
1444 # b) C<...> is stripped and treated as C<>
1445 if( $par =~ /^C<(.*)>$/ ){
1446 my $text = depod( $1 );
1447 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1448 }
1449
1450 # analyze the contents
1451 $par =~ s/\n/ /g; # undo word-wrapped tags
1452 my $opar = $par;
1453 my $linktext;
1454 if( $par =~ s{^([^|]+)\|}{} ){
1455 $linktext = $1;
1456 }
1457
1458 # make sure sections start with a /
1459 $par =~ s{^"}{/"};
1460
1461 my( $page, $section, $ident );
1462
1463 # check for link patterns
1464 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1465 # we've got a name/ident (no quotes)
1466 ( $page, $ident ) = ( $1, $2 );
1467 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1468
1469 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1470 # even though this should be a "section", we go for ident first
1471 ( $page, $ident ) = ( $1, $2 );
1472 ### print STDERR "--> L<$par> to page $page, section $section\n";
1473
1474 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1475 ( $page, $section ) = ( '', $par );
1476 ### print STDERR "--> L<$par> to void page, section $section\n";
1477
1478 } else {
1479 ( $page, $section ) = ( $par, '' );
1480 ### print STDERR "--> L<$par> to page $par, void section\n";
1481 }
1482
1483 # now, either $section or $ident is defined. the convoluted logic
1484 # below tries to resolve L<> according to what the user specified.
1485 # failing this, we try to find the next best thing...
1486 my( $url, $ltext, $fid );
1487
1488 RESOLVE: {
1489 if( defined $ident ){
1490 ## try to resolve $ident as an item
1491 ( $url, $fid ) = coderef( $page, $ident );
1492 if( $url ){
1493 if( ! defined( $linktext ) ){
1494 $linktext = $ident;
1495 $linktext .= " in " if $ident && $page;
1496 $linktext .= "the $page manpage" if $page;
1497 }
1498 ### print STDERR "got coderef url=$url\n";
1499 last RESOLVE;
1500 }
1501 ## no luck: go for a section (auto-quoting!)
1502 $section = $ident;
1503 }
1504 ## now go for a section
1505 my $htmlsection = htmlify( $section );
1506 $url = page_sect( $page, $htmlsection );
1507 if( $url ){
1508 if( ! defined( $linktext ) ){
1509 $linktext = $section;
1510 $linktext .= " in " if $section && $page;
1511 $linktext .= "the $page manpage" if $page;
1512 }
1513 ### print STDERR "got page/section url=$url\n";
1514 last RESOLVE;
1515 }
1516 ## no luck: go for an ident
1517 if( $section ){
1518 $ident = $section;
1519 } else {
1520 $ident = $page;
1521 $page = undef();
1522 }
1523 ( $url, $fid ) = coderef( $page, $ident );
1524 if( $url ){
1525 if( ! defined( $linktext ) ){
1526 $linktext = $ident;
1527 $linktext .= " in " if $ident && $page;
1528 $linktext .= "the $page manpage" if $page;
1529 }
1530 ### print STDERR "got section=>coderef url=$url\n";
1531 last RESOLVE;
1532 }
1533
1534 # warning; show some text.
1535 $linktext = $opar unless defined $linktext;
1536 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1537 }
1538
1539 # now we have an URL or just plain code
1540 $$rstr = $linktext . '>' . $$rstr;
1541 if( defined( $url ) ){
1542 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1543 } else {
1544 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1545 }
1546
1547 } elsif( $func eq 'S' ){
1548 # S<text> - non-breaking spaces
1549 $res = process_text1( $lev, $rstr );
1550 $res =~ s/ /&nbsp;/g;
1551
1552 } elsif( $func eq 'X' ){
1553 # X<> - ignore
1554 $$rstr =~ s/^[^>]*>//;
1555
1556 } elsif( $func eq 'Z' ){
1557 # Z<> - empty
1558 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1559 unless $$rstr =~ s/^>//;
1560
1561 } else {
35f2feb0
GS
1562 my $term = pattern $closing;
1563 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 1564 # all others: either recurse into new function or
35f2feb0 1565 # terminate at closing angle bracket(s)
2a28b791 1566 my $pt = $1;
35f2feb0 1567 $pt .= $2 if !$3 && $lev == 1;
2a28b791 1568 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
35f2feb0
GS
1569 return $res if !$3 && $lev > 1;
1570 if( $3 ){
1571 $res .= process_text1( $lev, $rstr, $3, closing $4 );
2a28b791
GS
1572 }
1573
1574 }
1575 if( $lev == 1 ){
1576 $res .= pure_text( $$rstr );
54310121 1577 } else {
2a28b791 1578 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121
PP
1579 }
1580 }
2a28b791
GS
1581 return $res;
1582}
54310121 1583
2a28b791
GS
1584#
1585# go_ahead: extract text of an IS (can be nested)
1586#
35f2feb0
GS
1587sub go_ahead($$$){
1588 my( $rstr, $func, $closing ) = @_;
2a28b791 1589 my $res = '';
35f2feb0
GS
1590 my @closing = ($closing);
1591 while( $$rstr =~
1592 s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
2a28b791 1593 $res .= $1;
35f2feb0
GS
1594 unless( $3 ){
1595 shift @closing;
1596 return $res unless @closing;
2a28b791 1597 } else {
35f2feb0 1598 unshift @closing, closing $4;
2a28b791
GS
1599 }
1600 $res .= $2;
1601 }
1602 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1603 return $res;
54310121
PP
1604}
1605
1606#
2a28b791
GS
1607# emit_C - output result of C<text>
1608# $text is the depod-ed text
54310121 1609#
2a28b791
GS
1610sub emit_C($;$){
1611 my( $text, $nocode ) = @_;
1612 my $res;
1613 my( $url, $fid ) = coderef( undef(), $text );
1614
1615 # need HTML-safe text
1616 my $linktext = html_escape( $text );
1617
1618 if( defined( $url ) &&
1619 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1620 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1621 } elsif( 0 && $nocode ){
1622 $res = $linktext;
1623 } else {
1624 $res = "<CODE>$linktext</CODE>";
1625 }
1626 return $res;
54310121
PP
1627}
1628
1629#
2a28b791
GS
1630# html_escape: make text safe for HTML
1631#
1632sub html_escape {
1633 my $rest = $_[0];
1634 $rest =~ s/&/&amp;/g;
1635 $rest =~ s/</&lt;/g;
1636 $rest =~ s/>/&gt;/g;
1637 $rest =~ s/"/&quot;/g;
1638 return $rest;
1639}
1640
1641
1642#
39e571d4
ML
1643# dosify - convert filenames to 8.3
1644#
1645sub dosify {
1646 my($str) = @_;
fe4c6be1 1647 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4
ML
1648 if ($Is83) {
1649 $str = lc $str;
1650 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1651 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1652 }
1653 return $str;
1654}
1655
1656#
2a28b791 1657# page_sect - make an URL from the text of a L<>
54310121 1658#
2a28b791
GS
1659sub page_sect($$) {
1660 my( $page, $section ) = @_;
1661 my( $linktext, $page83, $link); # work strings
1662
1663 # check if we know that this is a section in this page
1664 if (!defined $pages{$page} && defined $sections{$page}) {
1665 $section = $page;
1666 $page = "";
1667 ### print STDERR "reset page='', section=$section\n";
54310121
PP
1668 }
1669
39e571d4
ML
1670 $page83=dosify($page);
1671 $page=$page83 if (defined $pages{$page83});
54310121 1672 if ($page eq "") {
2a28b791 1673 $link = "#" . htmlify( $section );
350ccacd 1674 } elsif ( $page =~ /::/ ) {
350ccacd 1675 $page =~ s,::,/,g;
29f227c9
BS
1676 # Search page cache for an entry keyed under the html page name,
1677 # then look to see what directory that page might be in. NOTE:
1678 # this will only find one page. A better solution might be to produce
1679 # an intermediate page that is an index to all such pages.
1680 my $page_name = $page ;
1681 $page_name =~ s,^.*/,, ;
1682 if ( defined( $pages{ $page_name } ) &&
1683 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1684 ) {
1685 $page = $1 ;
1686 }
1687 else {
1688 # NOTE: This branch assumes that all A::B pages are located in
1689 # $htmlroot/A/B.html . This is often incorrect, since they are
1690 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1691 # analyze the contents of %pages and figure out where any
1692 # cousins of A::B are, then assume that. So, if A::B isn't found,
1693 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1694 # lib/A/B.pm. This is also limited, but it's an improvement.
1695 # Maybe a hints file so that the links point to the correct places
2a28b791
GS
1696 # nonetheless?
1697
29f227c9 1698 }
350ccacd 1699 $link = "$htmlroot/$page.html";
2a28b791 1700 $link .= "#" . htmlify( $section ) if ($section);
54310121 1701 } elsif (!defined $pages{$page}) {
54310121 1702 $link = "";
54310121 1703 } else {
2a28b791
GS
1704 $section = htmlify( $section ) if $section ne "";
1705 ### print STDERR "...section=$section\n";
54310121
PP
1706
1707 # if there is a directory by the name of the page, then assume that an
1708 # appropriate section will exist in the subdirectory
29f227c9
BS
1709# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1710 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1711 $link = "$htmlroot/$1/$section.html";
2a28b791 1712 ### print STDERR "...link=$link\n";
54310121
PP
1713
1714 # since there is no directory by the name of the page, the section will
1715 # have to exist within a .html of the same name. thus, make sure there
1716 # is a .pod or .pm that might become that .html
1717 } else {
2a28b791
GS
1718 $section = "#$section" if $section;
1719 ### print STDERR "...section=$section\n";
1720
54310121
PP
1721 # check if there is a .pod with the page name
1722 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1723 $link = "$htmlroot/$1.html$section";
1724 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1725 $link = "$htmlroot/$1.html$section";
1726 } else {
54310121 1727 $link = "";
54310121
PP
1728 }
1729 }
1730 }
1731
54310121 1732 if ($link) {
29f227c9
BS
1733 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1734 # implies $htmlroot eq ''. This means that the link in question
1735 # needs a prefix of $htmldir if it begins with '/'. The test for
1736 # the initial '/' is done to avoid '#'-only links, and to allow
1737 # for other kinds of links, like file:, ftp:, etc.
1738 my $url ;
1739 if ( $htmlfileurl ne '' ) {
2a28b791
GS
1740 $link = "$htmldir$link" if $link =~ m{^/};
1741 $url = relativize_url( $link, $htmlfileurl );
1742# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9
BS
1743 }
1744 else {
1745 $url = $link ;
1746 }
2a28b791 1747 return $url;
29f227c9 1748
54310121 1749 } else {
2a28b791 1750 return undef();
54310121 1751 }
54310121
PP
1752}
1753
1754#
29f227c9
BS
1755# relativize_url - convert an absolute URL to one relative to a base URL.
1756# Assumes both end in a filename.
1757#
1758sub relativize_url {
1759 my ($dest,$source) = @_ ;
1760
1761 my ($dest_volume,$dest_directory,$dest_file) =
1762 File::Spec::Unix->splitpath( $dest ) ;
1763 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1764
1765 my ($source_volume,$source_directory,$source_file) =
1766 File::Spec::Unix->splitpath( $source ) ;
1767 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1768
1769 my $rel_path = '' ;
1770 if ( $dest ne '' ) {
1771 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1772 }
1773
1774 if ( $rel_path ne '' &&
1775 substr( $rel_path, -1 ) ne '/' &&
1776 substr( $dest_file, 0, 1 ) ne '#'
1777 ) {
1778 $rel_path .= "/$dest_file" ;
1779 }
1780 else {
1781 $rel_path .= "$dest_file" ;
1782 }
1783
1784 return $rel_path ;
1785}
1786
54310121
PP
1787
1788#
2a28b791 1789# coderef - make URL from the text of a C<>
54310121 1790#
2a28b791
GS
1791sub coderef($$){
1792 my( $page, $item ) = @_;
1793 my( $url );
1794
1795 my $fid = fragment_id( $item );
2a28b791
GS
1796 if( defined( $page ) ){
1797 # we have been given a $page...
1798 $page =~ s{::}{/}g;
1799
1800 # Do we take it? Item could be a section!
228a48a5 1801 my $base = $items{$fid} || "";
2a28b791
GS
1802 $base =~ s{[^/]*/}{};
1803 if( $base ne "$page.html" ){
1804 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1805 $page = undef();
1806 }
54310121 1807
2a28b791
GS
1808 } else {
1809 # no page - local items precede cached items
67398a75
GS
1810 if( defined( $fid ) ){
1811 if( exists $local_items{$fid} ){
1812 $page = $local_items{$fid};
1813 } else {
1814 $page = $items{$fid};
1815 }
2a28b791
GS
1816 }
1817 }
54310121
PP
1818
1819 # if there was a pod file that we found earlier with an appropriate
1820 # =item directive, then create a link to that page.
2a28b791
GS
1821 if( defined $page ){
1822 if( $page ){
228a48a5 1823 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1824 $page = $1 . '.html';
29f227c9 1825 }
2a28b791 1826 my $link = "$htmlroot/$page#item_$fid";
54310121 1827
2a28b791
GS
1828 # Here, we take advantage of the knowledge that $htmlfileurl
1829 # ne '' implies $htmlroot eq ''.
1830 if ( $htmlfileurl ne '' ) {
1831 $link = "$htmldir$link" ;
1832 $url = relativize_url( $link, $htmlfileurl ) ;
1833 } else {
1834 $url = $link ;
1835 }
1836 } else {
1837 $url = "#item_" . $fid;
1838 }
54310121 1839
2a28b791
GS
1840 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1841 }
1842 return( $url, $fid );
54310121
PP
1843}
1844
54310121
PP
1845
1846
1847#
29f227c9
BS
1848# Adapted from Nick Ing-Simmons' PodToHtml package.
1849sub relative_url {
1850 my $source_file = shift ;
1851 my $destination_file = shift;
1852
1853 my $source = URI::file->new_abs($source_file);
1854 my $uo = URI::file->new($destination_file,$source)->abs;
1855 return $uo->rel->as_string;
1856}
1857
1858
1859#
54310121
PP
1860# finish_list - finish off any pending HTML lists. this should be called
1861# after the entire pod file has been read and converted.
1862#
1863sub finish_list {
7b8d334a 1864 while ($listlevel > 0) {
54310121
PP
1865 print HTML "</DL>\n";
1866 $listlevel--;
1867 }
1868}
1869
1870#
1871# htmlify - converts a pod section specification to a suitable section
2a28b791
GS
1872# specification for HTML. Note that we keep spaces and special characters
1873# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121
PP
1874#
1875sub htmlify {
2a28b791
GS
1876 my( $heading) = @_;
1877 $heading =~ s/(\s+)/ /g;
1878 $heading =~ s/\s+\Z//;
1879 $heading =~ s/\A\s+//;
1880 # The hyphen is a disgrace to the English language.
1881 $heading =~ s/[-"?]//g;
1882 $heading = lc( $heading );
1883 return $heading;
1884}
54310121 1885
2a28b791
GS
1886#
1887# depod - convert text by eliminating all interior sequences
1888# Note: can be called with copy or modify semantics
1889#
1890my %E2c;
67398a75
GS
1891$E2c{lt} = '<';
1892$E2c{gt} = '>';
1893$E2c{sol} = '/';
2a28b791 1894$E2c{verbar} = '|';
67398a75 1895$E2c{amp} = '&'; # in Tk's pods
2a28b791 1896
35f2feb0 1897sub depod1($;$$);
7ba65c74 1898
2a28b791
GS
1899sub depod($){
1900 my $string;
1901 if( ref( $_[0] ) ){
1902 $string = ${$_[0]};
1903 ${$_[0]} = depod1( \$string );
1904 } else {
1905 $string = $_[0];
1906 depod1( \$string );
1907 }
1908}
54310121 1909
35f2feb0
GS
1910sub depod1($;$$){
1911 my( $rstr, $func, $closing ) = @_;
2a28b791 1912 my $res = '';
228a48a5 1913 return $res unless defined $$rstr;
2a28b791
GS
1914 if( ! defined( $func ) ){
1915 # skip to next begin of an interior sequence
35f2feb0 1916 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
2a28b791 1917 # recurse into its text
35f2feb0 1918 $res .= $1 . depod1( $rstr, $2, closing $3);
2a28b791
GS
1919 }
1920 $res .= $$rstr;
1921 } elsif( $func eq 'E' ){
1922 # E<x> - convert to character
1923 $$rstr =~ s/^(\w+)>//;
228a48a5 1924 $res .= $E2c{$1} || "";
2a28b791
GS
1925 } elsif( $func eq 'X' ){
1926 # X<> - ignore
1927 $$rstr =~ s/^[^>]*>//;
1928 } elsif( $func eq 'Z' ){
1929 # Z<> - empty
1930 $$rstr =~ s/^>//;
1931 } else {
1932 # all others: either recurse into new function or
1933 # terminate at closing angle bracket
35f2feb0
GS
1934 my $term = pattern $closing;
1935 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
2a28b791 1936 $res .= $1;
35f2feb0
GS
1937 last unless $3;
1938 $res .= depod1( $rstr, $3, closing $4 );
2a28b791
GS
1939 }
1940 ## If we're here and $2 ne '>': undelimited interior sequence.
1941 ## Ignored, as this is called without proper indication of where we are.
1942 ## Rely on process_text to produce diagnostics.
1943 }
1944 return $res;
1945}
54310121 1946
2a28b791
GS
1947#
1948# fragment_id - construct a fragment identifier from:
1949# a) =item text
1950# b) contents of C<...>
1951#
1952my @hc;
1953sub fragment_id {
1954 my $text = shift();
1955 $text =~ s/\s+\Z//s;
1956 if( $text ){
1957 # a method or function?
1958 return $1 if $text =~ /(\w+)\s*\(/;
1959 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1960
1961 # a variable name?
1962 return $1 if $text =~ /^([$@%*]\S+)/;
1963
1964 # some pattern matching operator?
1965 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1966
1967 # fancy stuff... like "do { }"
1968 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1969
1970 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1971 # and some funnies with ... Module ...
1972 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1973 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1974
1975 # text? normalize!
1976 $text =~ s/\s+/_/sg;
1977 $text =~ s{(\W)}{
1978 defined( $hc[ord($1)] ) ? $hc[ord($1)]
1979 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1980 $text = substr( $text, 0, 50 );
1981 } else {
1982 return undef();
1983 }
54310121
PP
1984}
1985
2a28b791
GS
1986#
1987# make_URL_href - generate HTML href from URL
1988# Special treatment for CGI queries.
1989#
1990sub make_URL_href($){
1991 my( $url ) = @_;
1992 if( $url !~
228a48a5 1993 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791
GS
1994 $url = "<A HREF=\"$url\">$url</A>";
1995 }
1996 return $url;
54310121
PP
1997}
1998
19991;