Commit | Line | Data |
---|---|---|
54310121 | 1 | package Pod::Html; |
2 | ||
3 | use Pod::Functions; | |
4 | use Getopt::Long; # package for handling command-line parameters | |
5a039dd3 | 5 | use File::PathConvert 0.84 ; # Used to do relative URLs |
54310121 | 6 | require Exporter; |
7b8d334a GS |
7 | use vars qw($VERSION); |
8 | $VERSION = 1.01; | |
54310121 | 9 | @ISA = Exporter; |
10 | @EXPORT = qw(pod2html htmlify); | |
11 | use Cwd; | |
12 | ||
13 | use Carp; | |
14 | ||
3ec07288 FK |
15 | use locale; # make \w work right in non-ASCII lands |
16 | ||
54310121 | 17 | use strict; |
18 | ||
7b8d334a GS |
19 | use Config; |
20 | ||
54310121 | 21 | =head1 NAME |
22 | ||
7b8d334a | 23 | Pod::Html - module to convert pod files to HTML |
54310121 | 24 | |
25 | =head1 SYNOPSIS | |
26 | ||
7b8d334a | 27 | use Pod::Html; |
54310121 | 28 | pod2html([options]); |
29 | ||
30 | =head1 DESCRIPTION | |
31 | ||
32 | Converts files from pod format (see L<perlpod>) to HTML format. It | |
33 | can automatically generate indexes and cross-references, and it keeps | |
34 | a cache of things it knows how to cross-reference. | |
35 | ||
36 | =head1 ARGUMENTS | |
37 | ||
38 | Pod::Html takes the following arguments: | |
39 | ||
40 | =over 4 | |
41 | ||
42 | =item help | |
43 | ||
44 | --help | |
45 | ||
46 | Displays the usage message. | |
47 | ||
5a039dd3 BS |
48 | =item htmldir |
49 | ||
50 | --htmldir=name | |
51 | ||
52 | Sets the directory in which the resulting HTML file is placed. This | |
53 | is used to generate relative links to other files. | |
54 | ||
54310121 | 55 | =item htmlroot |
56 | ||
57 | --htmlroot=name | |
58 | ||
59 | Sets the base URL for the HTML files. When cross-references are made, | |
60 | the HTML root is prepended to the URL. | |
61 | ||
62 | =item infile | |
63 | ||
64 | --infile=name | |
65 | ||
66 | Specify the pod file to convert. Input is taken from STDIN if no | |
67 | infile is specified. | |
68 | ||
69 | =item outfile | |
70 | ||
71 | --outfile=name | |
72 | ||
73 | Specify the HTML file to create. Output goes to STDOUT if no outfile | |
74 | is specified. | |
75 | ||
76 | =item podroot | |
77 | ||
78 | --podroot=name | |
79 | ||
80 | Specify the base directory for finding library pods. | |
81 | ||
82 | =item podpath | |
83 | ||
84 | --podpath=name:...:name | |
85 | ||
86 | Specify which subdirectories of the podroot contain pod files whose | |
87 | HTML converted forms can be linked-to in cross-references. | |
88 | ||
89 | =item libpods | |
90 | ||
91 | --libpods=name:...:name | |
92 | ||
93 | List of page names (eg, "perlfunc") which contain linkable C<=item>s. | |
94 | ||
95 | =item netscape | |
96 | ||
97 | --netscape | |
98 | ||
99 | Use Netscape HTML directives when applicable. | |
100 | ||
101 | =item nonetscape | |
102 | ||
103 | --nonetscape | |
104 | ||
105 | Do not use Netscape HTML directives (default). | |
106 | ||
107 | =item index | |
108 | ||
109 | --index | |
110 | ||
111 | Generate an index at the top of the HTML file (default behaviour). | |
112 | ||
113 | =item noindex | |
114 | ||
115 | --noindex | |
116 | ||
117 | Do not generate an index at the top of the HTML file. | |
118 | ||
119 | ||
120 | =item recurse | |
121 | ||
122 | --recurse | |
123 | ||
124 | Recurse into subdirectories specified in podpath (default behaviour). | |
125 | ||
126 | =item norecurse | |
127 | ||
128 | --norecurse | |
129 | ||
130 | Do not recurse into subdirectories specified in podpath. | |
131 | ||
132 | =item title | |
133 | ||
134 | --title=title | |
135 | ||
136 | Specify the title of the resulting HTML file. | |
137 | ||
138 | =item verbose | |
139 | ||
140 | --verbose | |
141 | ||
142 | Display progress messages. | |
143 | ||
144 | =back | |
145 | ||
146 | =head1 EXAMPLE | |
147 | ||
148 | pod2html("pod2html", | |
149 | "--podpath=lib:ext:pod:vms", | |
150 | "--podroot=/usr/src/perl", | |
151 | "--htmlroot=/perl/nmanual", | |
152 | "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop", | |
153 | "--recurse", | |
154 | "--infile=foo.pod", | |
155 | "--outfile=/perl/nmanual/foo.html"); | |
156 | ||
157 | =head1 AUTHOR | |
158 | ||
159 | Tom Christiansen, E<lt>tchrist@perl.comE<gt>. | |
160 | ||
161 | =head1 BUGS | |
162 | ||
163 | Has trouble with C<> etc in = commands. | |
164 | ||
165 | =head1 SEE ALSO | |
166 | ||
167 | L<perlpod> | |
168 | ||
169 | =head1 COPYRIGHT | |
170 | ||
171 | This program is distributed under the Artistic License. | |
172 | ||
173 | =cut | |
174 | ||
175 | my $dircache = "pod2html-dircache"; | |
176 | my $itemcache = "pod2html-itemcache"; | |
177 | ||
178 | my @begin_stack = (); # begin/end stack | |
179 | ||
5a039dd3 BS |
180 | my @libpods = (); # files to search for links from C<> directives |
181 | my $htmlroot = "/"; # http-server base directory from which all | |
54310121 | 182 | # relative paths in $podpath stem. |
5a039dd3 BS |
183 | my $htmldir = ""; # The directory to which the html pages |
184 | # will (eventually) be written. | |
54310121 | 185 | my $htmlfile = ""; # write to stdout by default |
5a039dd3 BS |
186 | my $htmlfileurl = ""; # The url that other files would use to |
187 | # refer to this file. This is only used | |
188 | # to make relative urls that point to | |
189 | # other files. | |
54310121 | 190 | my $podfile = ""; # read from stdin by default |
191 | my @podpath = (); # list of directories containing library pods. | |
192 | my $podroot = "."; # filesystem base directory from which all | |
193 | # relative paths in $podpath stem. | |
194 | my $recurse = 1; # recurse on subdirectories in $podpath. | |
195 | my $verbose = 0; # not verbose by default | |
196 | my $doindex = 1; # non-zero if we should generate an index | |
197 | my $listlevel = 0; # current list depth | |
198 | my @listitem = (); # stack of HTML commands to use when a =item is | |
199 | # encountered. the top of the stack is the | |
200 | # current list. | |
201 | my @listdata = (); # similar to @listitem, but for the text after | |
202 | # an =item | |
203 | my @listend = (); # similar to @listitem, but the text to use to | |
204 | # end the list. | |
205 | my $ignore = 1; # whether or not to format text. we don't | |
206 | # format text until we hit our first pod | |
207 | # directive. | |
208 | ||
209 | my %items_named = (); # for the multiples of the same item in perlfunc | |
210 | my @items_seen = (); | |
211 | my $netscape = 0; # whether or not to use netscape directives. | |
212 | my $title; # title to give the pod(s) | |
213 | my $top = 1; # true if we are at the top of the doc. used | |
214 | # to prevent the first <HR> directive. | |
215 | my $paragraph; # which paragraph we're processing (used | |
216 | # for error messages) | |
217 | my %pages = (); # associative array used to find the location | |
218 | # of pages referenced by L<> links. | |
219 | my %sections = (); # sections within this page | |
220 | my %items = (); # associative array used to find the location | |
221 | # of =item directives referenced by C<> links | |
39e571d4 LM |
222 | my $Is83; # is dos with short filenames (8.3) |
223 | ||
54310121 | 224 | sub init_globals { |
225 | $dircache = "pod2html-dircache"; | |
226 | $itemcache = "pod2html-itemcache"; | |
227 | ||
228 | @begin_stack = (); # begin/end stack | |
229 | ||
230 | @libpods = (); # files to search for links from C<> directives | |
231 | $htmlroot = "/"; # http-server base directory from which all | |
232 | # relative paths in $podpath stem. | |
233 | $htmlfile = ""; # write to stdout by default | |
234 | $podfile = ""; # read from stdin by default | |
235 | @podpath = (); # list of directories containing library pods. | |
236 | $podroot = "."; # filesystem base directory from which all | |
237 | # relative paths in $podpath stem. | |
238 | $recurse = 1; # recurse on subdirectories in $podpath. | |
239 | $verbose = 0; # not verbose by default | |
240 | $doindex = 1; # non-zero if we should generate an index | |
241 | $listlevel = 0; # current list depth | |
242 | @listitem = (); # stack of HTML commands to use when a =item is | |
243 | # encountered. the top of the stack is the | |
244 | # current list. | |
245 | @listdata = (); # similar to @listitem, but for the text after | |
246 | # an =item | |
247 | @listend = (); # similar to @listitem, but the text to use to | |
248 | # end the list. | |
249 | $ignore = 1; # whether or not to format text. we don't | |
250 | # format text until we hit our first pod | |
251 | # directive. | |
252 | ||
253 | @items_seen = (); | |
254 | %items_named = (); | |
255 | $netscape = 0; # whether or not to use netscape directives. | |
256 | $title = ''; # title to give the pod(s) | |
257 | $top = 1; # true if we are at the top of the doc. used | |
258 | # to prevent the first <HR> directive. | |
259 | $paragraph = ''; # which paragraph we're processing (used | |
260 | # for error messages) | |
54310121 | 261 | %sections = (); # sections within this page |
3e3baf6d TB |
262 | |
263 | # These are not reinitialised here but are kept as a cache. | |
264 | # See get_cache and related cache management code. | |
265 | #%pages = (); # associative array used to find the location | |
266 | # of pages referenced by L<> links. | |
267 | #%items = (); # associative array used to find the location | |
54310121 | 268 | # of =item directives referenced by C<> links |
39e571d4 | 269 | $Is83=$^O eq 'dos'; |
54310121 | 270 | } |
271 | ||
272 | sub pod2html { | |
273 | local(@ARGV) = @_; | |
274 | local($/); | |
275 | local $_; | |
276 | ||
277 | init_globals(); | |
278 | ||
39e571d4 LM |
279 | $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN()); |
280 | ||
54310121 | 281 | # cache of %pages and %items from last time we ran pod2html |
54310121 | 282 | |
283 | #undef $opt_help if defined $opt_help; | |
284 | ||
285 | # parse the command-line parameters | |
286 | parse_command_line(); | |
287 | ||
288 | # set some variables to their default values if necessary | |
289 | local *POD; | |
290 | unless (@ARGV && $ARGV[0]) { | |
291 | $podfile = "-" unless $podfile; # stdin | |
292 | open(POD, "<$podfile") | |
293 | || die "$0: cannot open $podfile file for input: $!\n"; | |
294 | } else { | |
295 | $podfile = $ARGV[0]; # XXX: might be more filenames | |
296 | *POD = *ARGV; | |
297 | } | |
298 | $htmlfile = "-" unless $htmlfile; # stdout | |
299 | $htmlroot = "" if $htmlroot eq "/"; # so we don't get a // | |
5a039dd3 BS |
300 | $htmldir =~ s#/$## ; # so we don't get a // |
301 | if ( $htmldir ne '' | |
302 | && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir | |
303 | ) | |
304 | { | |
305 | $htmlfileurl= "$htmlroot/" . substr( $htmlfile, length( $htmldir ) + 1 ); | |
306 | } | |
307 | File::PathConvert::setfstype( 'URL' ) ; | |
54310121 | 308 | |
309 | # read the pod a paragraph at a time | |
310 | warn "Scanning for sections in input file(s)\n" if $verbose; | |
311 | $/ = ""; | |
312 | my @poddata = <POD>; | |
313 | close(POD); | |
314 | ||
315 | # scan the pod for =head[1-6] directives and build an index | |
316 | my $index = scan_headings(\%sections, @poddata); | |
317 | ||
3e3baf6d TB |
318 | unless($index) { |
319 | warn "No pod in $podfile\n" if $verbose; | |
320 | return; | |
321 | } | |
322 | ||
54310121 | 323 | # open the output file |
324 | open(HTML, ">$htmlfile") | |
325 | || die "$0: cannot open $htmlfile file for output: $!\n"; | |
326 | ||
d011ffae GS |
327 | # put a title in the HTML file if one wasn't specified |
328 | if ($title eq '') { | |
329 | TITLE_SEARCH: { | |
330 | for (my $i = 0; $i < @poddata; $i++) { | |
331 | if ($poddata[$i] =~ /^=head1\s*NAME\b/m) { | |
332 | for my $para ( @poddata[$i, $i+1] ) { | |
333 | last TITLE_SEARCH | |
334 | if ($title) = $para =~ /(\S+\s+-+.*\S)/s; | |
335 | } | |
336 | } | |
54310121 | 337 | |
d011ffae GS |
338 | } |
339 | } | |
340 | } | |
3e3baf6d TB |
341 | if (!$title and $podfile =~ /\.pod$/) { |
342 | # probably a split pod so take first =head[12] as title | |
343 | for (my $i = 0; $i < @poddata; $i++) { | |
344 | last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/; | |
345 | } | |
346 | warn "adopted '$title' as title for $podfile\n" | |
347 | if $verbose and $title; | |
348 | } | |
7b8d334a GS |
349 | if ($title) { |
350 | $title =~ s/\s*\(.*\)//; | |
351 | } else { | |
3e3baf6d | 352 | warn "$0: no title for $podfile"; |
54310121 | 353 | $podfile =~ /^(.*)(\.[^.\/]+)?$/; |
354 | $title = ($podfile eq "-" ? 'No Title' : $1); | |
3e3baf6d | 355 | warn "using $title" if $verbose; |
54310121 | 356 | } |
357 | print HTML <<END_OF_HEAD; | |
7b8d334a GS |
358 | <HTML> |
359 | <HEAD> | |
360 | <TITLE>$title</TITLE> | |
361 | <LINK REV="made" HREF="mailto:$Config{perladmin}"> | |
362 | </HEAD> | |
54310121 | 363 | |
7b8d334a | 364 | <BODY> |
54310121 | 365 | |
366 | END_OF_HEAD | |
367 | ||
3e3baf6d TB |
368 | # load/reload/validate/cache %pages and %items |
369 | get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse); | |
54310121 | 370 | |
371 | # scan the pod for =item directives | |
372 | scan_items("", \%items, @poddata); | |
373 | ||
374 | # put an index at the top of the file. note, if $doindex is 0 we | |
375 | # still generate an index, but surround it with an html comment. | |
376 | # that way some other program can extract it if desired. | |
377 | $index =~ s/--+/-/g; | |
378 | print HTML "<!-- INDEX BEGIN -->\n"; | |
379 | print HTML "<!--\n" unless $doindex; | |
380 | print HTML $index; | |
381 | print HTML "-->\n" unless $doindex; | |
382 | print HTML "<!-- INDEX END -->\n\n"; | |
383 | print HTML "<HR>\n" if $doindex; | |
384 | ||
385 | # now convert this file | |
386 | warn "Converting input file\n" if $verbose; | |
387 | foreach my $i (0..$#poddata) { | |
388 | $_ = $poddata[$i]; | |
389 | $paragraph = $i+1; | |
390 | if (/^(=.*)/s) { # is it a pod directive? | |
391 | $ignore = 0; | |
392 | $_ = $1; | |
393 | if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin | |
394 | process_begin($1, $2); | |
395 | } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end | |
396 | process_end($1, $2); | |
397 | } elsif (/^=cut/) { # =cut | |
398 | process_cut(); | |
399 | } elsif (/^=pod/) { # =pod | |
400 | process_pod(); | |
401 | } else { | |
402 | next if @begin_stack && $begin_stack[-1] ne 'html'; | |
403 | ||
7b8d334a | 404 | if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading |
54310121 | 405 | process_head($1, $2); |
7b8d334a | 406 | } elsif (/^=item\s*(.*\S)/sm) { # =item text |
54310121 | 407 | process_item($1); |
408 | } elsif (/^=over\s*(.*)/) { # =over N | |
409 | process_over(); | |
410 | } elsif (/^=back/) { # =back | |
411 | process_back(); | |
412 | } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for | |
413 | process_for($1,$2); | |
414 | } else { | |
415 | /^=(\S*)\s*/; | |
416 | warn "$0: $podfile: unknown pod directive '$1' in " | |
417 | . "paragraph $paragraph. ignoring.\n"; | |
418 | } | |
419 | } | |
420 | $top = 0; | |
421 | } | |
422 | else { | |
423 | next if $ignore; | |
424 | next if @begin_stack && $begin_stack[-1] ne 'html'; | |
425 | my $text = $_; | |
426 | process_text(\$text, 1); | |
7b8d334a | 427 | print HTML "<P>\n$text"; |
54310121 | 428 | } |
429 | } | |
430 | ||
431 | # finish off any pending directives | |
432 | finish_list(); | |
433 | print HTML <<END_OF_TAIL; | |
7b8d334a | 434 | </BODY> |
54310121 | 435 | |
7b8d334a | 436 | </HTML> |
54310121 | 437 | END_OF_TAIL |
438 | ||
439 | # close the html file | |
440 | close(HTML); | |
441 | ||
442 | warn "Finished\n" if $verbose; | |
443 | } | |
444 | ||
445 | ############################################################################## | |
446 | ||
447 | my $usage; # see below | |
448 | sub usage { | |
449 | my $podfile = shift; | |
450 | warn "$0: $podfile: @_\n" if @_; | |
451 | die $usage; | |
452 | } | |
453 | ||
454 | $usage =<<END_OF_USAGE; | |
455 | Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name> | |
456 | --podpath=<name>:...:<name> --podroot=<name> | |
457 | --libpods=<name>:...:<name> --recurse --verbose --index | |
458 | --netscape --norecurse --noindex | |
459 | ||
460 | --flush - flushes the item and directory caches. | |
461 | --help - prints this message. | |
462 | --htmlroot - http-server base directory from which all relative paths | |
463 | in podpath stem (default is /). | |
464 | --index - generate an index at the top of the resulting html | |
465 | (default). | |
466 | --infile - filename for the pod to convert (input taken from stdin | |
467 | by default). | |
468 | --libpods - colon-separated list of pages to search for =item pod | |
469 | directives in as targets of C<> and implicit links (empty | |
470 | by default). note, these are not filenames, but rather | |
471 | page names like those that appear in L<> links. | |
472 | --netscape - will use netscape html directives when applicable. | |
473 | --nonetscape - will not use netscape directives (default). | |
474 | --outfile - filename for the resulting html file (output sent to | |
475 | stdout by default). | |
476 | --podpath - colon-separated list of directories containing library | |
477 | pods. empty by default. | |
478 | --podroot - filesystem base directory from which all relative paths | |
479 | in podpath stem (default is .). | |
480 | --noindex - don't generate an index at the top of the resulting html. | |
481 | --norecurse - don't recurse on those subdirectories listed in podpath. | |
482 | --recurse - recurse on those subdirectories listed in podpath | |
483 | (default behavior). | |
484 | --title - title that will appear in resulting html file. | |
485 | --verbose - self-explanatory | |
486 | ||
487 | END_OF_USAGE | |
488 | ||
489 | sub parse_command_line { | |
5a039dd3 BS |
490 | my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile |
491 | ,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecur | |
492 | se,$opt_recurse,$opt_title,$opt_verbose); | |
54310121 | 493 | my $result = GetOptions( |
5a039dd3 BS |
494 | 'flush' => \$opt_flush, |
495 | 'help' => \$opt_help, | |
496 | 'htmldir=s' => \$opt_htmldir, | |
54310121 | 497 | 'htmlroot=s' => \$opt_htmlroot, |
5a039dd3 | 498 | 'index!' => \$opt_index, |
54310121 | 499 | 'infile=s' => \$opt_infile, |
500 | 'libpods=s' => \$opt_libpods, | |
501 | 'netscape!' => \$opt_netscape, | |
502 | 'outfile=s' => \$opt_outfile, | |
503 | 'podpath=s' => \$opt_podpath, | |
504 | 'podroot=s' => \$opt_podroot, | |
505 | 'norecurse' => \$opt_norecurse, | |
506 | 'recurse!' => \$opt_recurse, | |
507 | 'title=s' => \$opt_title, | |
508 | 'verbose' => \$opt_verbose, | |
509 | ); | |
510 | usage("-", "invalid parameters") if not $result; | |
511 | ||
512 | usage("-") if defined $opt_help; # see if the user asked for help | |
513 | $opt_help = ""; # just to make -w shut-up. | |
514 | ||
515 | $podfile = $opt_infile if defined $opt_infile; | |
516 | $htmlfile = $opt_outfile if defined $opt_outfile; | |
5a039dd3 | 517 | $htmldir = $opt_htmldir if defined $opt_outfile; |
54310121 | 518 | |
519 | @podpath = split(":", $opt_podpath) if defined $opt_podpath; | |
520 | @libpods = split(":", $opt_libpods) if defined $opt_libpods; | |
521 | ||
522 | warn "Flushing item and directory caches\n" | |
523 | if $opt_verbose && defined $opt_flush; | |
524 | unlink($dircache, $itemcache) if defined $opt_flush; | |
525 | ||
526 | $htmlroot = $opt_htmlroot if defined $opt_htmlroot; | |
527 | $podroot = $opt_podroot if defined $opt_podroot; | |
528 | ||
529 | $doindex = $opt_index if defined $opt_index; | |
530 | $recurse = $opt_recurse if defined $opt_recurse; | |
531 | $title = $opt_title if defined $opt_title; | |
532 | $verbose = defined $opt_verbose ? 1 : 0; | |
533 | $netscape = $opt_netscape if defined $opt_netscape; | |
534 | } | |
535 | ||
3e3baf6d TB |
536 | |
537 | my $saved_cache_key; | |
538 | ||
539 | sub get_cache { | |
540 | my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; | |
541 | my @cache_key_args = @_; | |
542 | ||
543 | # A first-level cache: | |
544 | # Don't bother reading the cache files if they still apply | |
545 | # and haven't changed since we last read them. | |
546 | ||
547 | my $this_cache_key = cache_key(@cache_key_args); | |
548 | ||
549 | return if $saved_cache_key and $this_cache_key eq $saved_cache_key; | |
550 | ||
551 | # load the cache of %pages and %items if possible. $tests will be | |
552 | # non-zero if successful. | |
553 | my $tests = 0; | |
554 | if (-f $dircache && -f $itemcache) { | |
555 | warn "scanning for item cache\n" if $verbose; | |
556 | $tests = load_cache($dircache, $itemcache, $podpath, $podroot); | |
557 | } | |
558 | ||
559 | # if we didn't succeed in loading the cache then we must (re)build | |
560 | # %pages and %items. | |
561 | if (!$tests) { | |
562 | warn "scanning directories in pod-path\n" if $verbose; | |
563 | scan_podpath($podroot, $recurse, 0); | |
564 | } | |
565 | $saved_cache_key = cache_key(@cache_key_args); | |
566 | } | |
567 | ||
568 | sub cache_key { | |
569 | my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; | |
570 | return join('!', $dircache, $itemcache, $recurse, | |
571 | @$podpath, $podroot, stat($dircache), stat($itemcache)); | |
572 | } | |
573 | ||
54310121 | 574 | # |
3e3baf6d | 575 | # load_cache - tries to find if the caches stored in $dircache and $itemcache |
54310121 | 576 | # are valid caches of %pages and %items. if they are valid then it loads |
577 | # them and returns a non-zero value. | |
578 | # | |
3e3baf6d TB |
579 | |
580 | sub load_cache { | |
54310121 | 581 | my($dircache, $itemcache, $podpath, $podroot) = @_; |
582 | my($tests); | |
583 | local $_; | |
584 | ||
585 | $tests = 0; | |
586 | ||
587 | open(CACHE, "<$itemcache") || | |
588 | die "$0: error opening $itemcache for reading: $!\n"; | |
589 | $/ = "\n"; | |
590 | ||
591 | # is it the same podpath? | |
592 | $_ = <CACHE>; | |
593 | chomp($_); | |
3e3baf6d | 594 | $tests++ if (join(":", @$podpath) eq $_); |
54310121 | 595 | |
596 | # is it the same podroot? | |
597 | $_ = <CACHE>; | |
598 | chomp($_); | |
599 | $tests++ if ($podroot eq $_); | |
600 | ||
601 | # load the cache if its good | |
602 | if ($tests != 2) { | |
603 | close(CACHE); | |
54310121 | 604 | return 0; |
605 | } | |
606 | ||
607 | warn "loading item cache\n" if $verbose; | |
608 | while (<CACHE>) { | |
609 | /(.*?) (.*)$/; | |
610 | $items{$1} = $2; | |
611 | } | |
612 | close(CACHE); | |
613 | ||
614 | warn "scanning for directory cache\n" if $verbose; | |
615 | open(CACHE, "<$dircache") || | |
616 | die "$0: error opening $dircache for reading: $!\n"; | |
617 | $/ = "\n"; | |
618 | $tests = 0; | |
619 | ||
620 | # is it the same podpath? | |
621 | $_ = <CACHE>; | |
622 | chomp($_); | |
3e3baf6d | 623 | $tests++ if (join(":", @$podpath) eq $_); |
54310121 | 624 | |
625 | # is it the same podroot? | |
626 | $_ = <CACHE>; | |
627 | chomp($_); | |
628 | $tests++ if ($podroot eq $_); | |
629 | ||
630 | # load the cache if its good | |
631 | if ($tests != 2) { | |
632 | close(CACHE); | |
54310121 | 633 | return 0; |
634 | } | |
635 | ||
636 | warn "loading directory cache\n" if $verbose; | |
637 | while (<CACHE>) { | |
638 | /(.*?) (.*)$/; | |
639 | $pages{$1} = $2; | |
640 | } | |
641 | ||
642 | close(CACHE); | |
643 | ||
644 | return 1; | |
645 | } | |
646 | ||
647 | # | |
648 | # scan_podpath - scans the directories specified in @podpath for directories, | |
649 | # .pod files, and .pm files. it also scans the pod files specified in | |
650 | # @libpods for =item directives. | |
651 | # | |
652 | sub scan_podpath { | |
3e3baf6d | 653 | my($podroot, $recurse, $append) = @_; |
54310121 | 654 | my($pwd, $dir); |
655 | my($libpod, $dirname, $pod, @files, @poddata); | |
656 | ||
3e3baf6d TB |
657 | unless($append) { |
658 | %items = (); | |
659 | %pages = (); | |
660 | } | |
661 | ||
54310121 | 662 | # scan each directory listed in @podpath |
663 | $pwd = getcwd(); | |
664 | chdir($podroot) | |
665 | || die "$0: error changing to directory $podroot: $!\n"; | |
666 | foreach $dir (@podpath) { | |
667 | scan_dir($dir, $recurse); | |
668 | } | |
669 | ||
670 | # scan the pods listed in @libpods for =item directives | |
671 | foreach $libpod (@libpods) { | |
672 | # if the page isn't defined then we won't know where to find it | |
673 | # on the system. | |
674 | next unless defined $pages{$libpod} && $pages{$libpod}; | |
675 | ||
676 | # if there is a directory then use the .pod and .pm files within it. | |
677 | if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { | |
678 | # find all the .pod and .pm files within the directory | |
679 | $dirname = $1; | |
680 | opendir(DIR, $dirname) || | |
681 | die "$0: error opening directory $dirname: $!\n"; | |
682 | @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR)); | |
683 | closedir(DIR); | |
684 | ||
685 | # scan each .pod and .pm file for =item directives | |
686 | foreach $pod (@files) { | |
687 | open(POD, "<$dirname/$pod") || | |
688 | die "$0: error opening $dirname/$pod for input: $!\n"; | |
689 | @poddata = <POD>; | |
690 | close(POD); | |
691 | ||
692 | scan_items("$dirname/$pod", @poddata); | |
693 | } | |
694 | ||
695 | # use the names of files as =item directives too. | |
696 | foreach $pod (@files) { | |
697 | $pod =~ /^(.*)(\.pod|\.pm)$/; | |
698 | $items{$1} = "$dirname/$1.html" if $1; | |
699 | } | |
700 | } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ || | |
701 | $pages{$libpod} =~ /([^:]*\.pm):/) { | |
702 | # scan the .pod or .pm file for =item directives | |
703 | $pod = $1; | |
704 | open(POD, "<$pod") || | |
705 | die "$0: error opening $pod for input: $!\n"; | |
706 | @poddata = <POD>; | |
707 | close(POD); | |
708 | ||
709 | scan_items("$pod", @poddata); | |
710 | } else { | |
711 | warn "$0: shouldn't be here (line ".__LINE__."\n"; | |
712 | } | |
713 | } | |
714 | @poddata = (); # clean-up a bit | |
715 | ||
716 | chdir($pwd) | |
717 | || die "$0: error changing to directory $pwd: $!\n"; | |
718 | ||
719 | # cache the item list for later use | |
720 | warn "caching items for later use\n" if $verbose; | |
721 | open(CACHE, ">$itemcache") || | |
722 | die "$0: error open $itemcache for writing: $!\n"; | |
723 | ||
724 | print CACHE join(":", @podpath) . "\n$podroot\n"; | |
725 | foreach my $key (keys %items) { | |
726 | print CACHE "$key $items{$key}\n"; | |
727 | } | |
728 | ||
729 | close(CACHE); | |
730 | ||
731 | # cache the directory list for later use | |
732 | warn "caching directories for later use\n" if $verbose; | |
733 | open(CACHE, ">$dircache") || | |
734 | die "$0: error open $dircache for writing: $!\n"; | |
735 | ||
736 | print CACHE join(":", @podpath) . "\n$podroot\n"; | |
737 | foreach my $key (keys %pages) { | |
738 | print CACHE "$key $pages{$key}\n"; | |
739 | } | |
740 | ||
741 | close(CACHE); | |
742 | } | |
743 | ||
744 | # | |
745 | # scan_dir - scans the directory specified in $dir for subdirectories, .pod | |
746 | # files, and .pm files. notes those that it finds. this information will | |
747 | # be used later in order to figure out where the pages specified in L<> | |
748 | # links are on the filesystem. | |
749 | # | |
750 | sub scan_dir { | |
751 | my($dir, $recurse) = @_; | |
752 | my($t, @subdirs, @pods, $pod, $dirname, @dirs); | |
753 | local $_; | |
754 | ||
755 | @subdirs = (); | |
756 | @pods = (); | |
757 | ||
758 | opendir(DIR, $dir) || | |
759 | die "$0: error opening directory $dir: $!\n"; | |
760 | while (defined($_ = readdir(DIR))) { | |
761 | if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory | |
762 | $pages{$_} = "" unless defined $pages{$_}; | |
763 | $pages{$_} .= "$dir/$_:"; | |
764 | push(@subdirs, $_); | |
765 | } elsif (/\.pod$/) { # .pod | |
766 | s/\.pod$//; | |
767 | $pages{$_} = "" unless defined $pages{$_}; | |
768 | $pages{$_} .= "$dir/$_.pod:"; | |
769 | push(@pods, "$dir/$_.pod"); | |
770 | } elsif (/\.pm$/) { # .pm | |
771 | s/\.pm$//; | |
772 | $pages{$_} = "" unless defined $pages{$_}; | |
773 | $pages{$_} .= "$dir/$_.pm:"; | |
774 | push(@pods, "$dir/$_.pm"); | |
775 | } | |
776 | } | |
777 | closedir(DIR); | |
778 | ||
779 | # recurse on the subdirectories if necessary | |
780 | if ($recurse) { | |
781 | foreach my $subdir (@subdirs) { | |
782 | scan_dir("$dir/$subdir", $recurse); | |
783 | } | |
784 | } | |
785 | } | |
786 | ||
787 | # | |
788 | # scan_headings - scan a pod file for head[1-6] tags, note the tags, and | |
789 | # build an index. | |
790 | # | |
791 | sub scan_headings { | |
792 | my($sections, @data) = @_; | |
793 | my($tag, $which_head, $title, $listdepth, $index); | |
794 | ||
be173d55 HM |
795 | # here we need local $ignore = 0; |
796 | # unfortunately, we can't have it, because $ignore is lexical | |
797 | $ignore = 0; | |
798 | ||
54310121 | 799 | $listdepth = 0; |
800 | $index = ""; | |
801 | ||
802 | # scan for =head directives, note their name, and build an index | |
803 | # pointing to each of them. | |
804 | foreach my $line (@data) { | |
bb9460ed | 805 | if ($line =~ /^=(head)([1-6])\s+(.*)/) { |
54310121 | 806 | ($tag,$which_head, $title) = ($1,$2,$3); |
807 | chomp($title); | |
808 | $$sections{htmlify(0,$title)} = 1; | |
809 | ||
102c538a DS |
810 | while ($which_head != $listdepth) { |
811 | if ($which_head > $listdepth) { | |
812 | $index .= "\n" . ("\t" x $listdepth) . "<UL>\n"; | |
813 | $listdepth++; | |
814 | } elsif ($which_head < $listdepth) { | |
815 | $listdepth--; | |
816 | $index .= "\n" . ("\t" x $listdepth) . "</UL>\n"; | |
817 | } | |
54310121 | 818 | } |
54310121 | 819 | |
820 | $index .= "\n" . ("\t" x $listdepth) . "<LI>" . | |
be173d55 | 821 | "<A HREF=\"#" . htmlify(0,$title) . "\">" . |
7b8d334a | 822 | html_escape(process_text(\$title, 0)) . "</A>"; |
54310121 | 823 | } |
824 | } | |
825 | ||
826 | # finish off the lists | |
827 | while ($listdepth--) { | |
828 | $index .= "\n" . ("\t" x $listdepth) . "</UL>\n"; | |
829 | } | |
830 | ||
831 | # get rid of bogus lists | |
832 | $index =~ s,\t*<UL>\s*</UL>\n,,g; | |
833 | ||
bb9460ed | 834 | $ignore = 1; # restore old value; |
be173d55 | 835 | |
54310121 | 836 | return $index; |
837 | } | |
838 | ||
839 | # | |
840 | # scan_items - scans the pod specified by $pod for =item directives. we | |
841 | # will use this information later on in resolving C<> links. | |
842 | # | |
843 | sub scan_items { | |
844 | my($pod, @poddata) = @_; | |
845 | my($i, $item); | |
846 | local $_; | |
847 | ||
848 | $pod =~ s/\.pod$//; | |
849 | $pod .= ".html" if $pod; | |
850 | ||
851 | foreach $i (0..$#poddata) { | |
852 | $_ = $poddata[$i]; | |
853 | ||
854 | # remove any formatting instructions | |
855 | s,[A-Z]<([^<>]*)>,$1,g; | |
856 | ||
857 | # figure out what kind of item it is and get the first word of | |
858 | # it's name. | |
859 | if (/^=item\s+(\w*)\s*.*$/s) { | |
860 | if ($1 eq "*") { # bullet list | |
861 | /\A=item\s+\*\s*(.*?)\s*\Z/s; | |
862 | $item = $1; | |
7b8d334a GS |
863 | } elsif ($1 =~ /^\d+/) { # numbered list |
864 | /\A=item\s+\d+\.?(.*?)\s*\Z/s; | |
54310121 | 865 | $item = $1; |
866 | } else { | |
867 | # /\A=item\s+(.*?)\s*\Z/s; | |
868 | /\A=item\s+(\w*)/s; | |
869 | $item = $1; | |
870 | } | |
871 | ||
872 | $items{$item} = "$pod" if $item; | |
873 | } | |
874 | } | |
875 | } | |
876 | ||
877 | # | |
878 | # process_head - convert a pod head[1-6] tag and convert it to HTML format. | |
879 | # | |
880 | sub process_head { | |
881 | my($tag, $heading) = @_; | |
882 | my $firstword; | |
883 | ||
884 | # figure out the level of the =head | |
885 | $tag =~ /head([1-6])/; | |
886 | my $level = $1; | |
887 | ||
888 | # can't have a heading full of spaces and speechmarks and so on | |
889 | $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/; | |
890 | ||
891 | print HTML "<P>\n" unless $listlevel; | |
892 | print HTML "<HR>\n" unless $listlevel || $top; | |
893 | print HTML "<H$level>"; # unless $listlevel; | |
894 | #print HTML "<H$level>" unless $listlevel; | |
be173d55 | 895 | my $convert = $heading; process_text(\$convert, 0); |
7b8d334a | 896 | $convert = html_escape($convert); |
54310121 | 897 | print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>"; |
898 | print HTML "</H$level>"; # unless $listlevel; | |
899 | print HTML "\n"; | |
900 | } | |
901 | ||
902 | # | |
903 | # process_item - convert a pod item tag and convert it to HTML format. | |
904 | # | |
905 | sub process_item { | |
906 | my $text = $_[0]; | |
907 | my($i, $quote, $name); | |
908 | ||
909 | my $need_preamble = 0; | |
910 | my $this_entry; | |
911 | ||
912 | ||
913 | # lots of documents start a list without doing an =over. this is | |
914 | # bad! but, the proper thing to do seems to be to just assume | |
915 | # they did do an =over. so warn them once and then continue. | |
916 | warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n" | |
917 | unless $listlevel; | |
918 | process_over() unless $listlevel; | |
919 | ||
920 | return unless $listlevel; | |
921 | ||
922 | # remove formatting instructions from the text | |
923 | 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g; | |
924 | pre_escape(\$text); | |
925 | ||
926 | $need_preamble = $items_seen[$listlevel]++ == 0; | |
927 | ||
928 | # check if this is the first =item after an =over | |
929 | $i = $listlevel - 1; | |
930 | my $need_new = $listlevel >= @listitem; | |
931 | ||
932 | if ($text =~ /\A\*/) { # bullet | |
933 | ||
934 | if ($need_preamble) { | |
935 | push(@listend, "</UL>"); | |
936 | print HTML "<UL>\n"; | |
937 | } | |
938 | ||
7b8d334a GS |
939 | print HTML '<LI>'; |
940 | if ($text =~ /\A\*\s*(.+)\Z/s) { | |
941 | print HTML '<STRONG>'; | |
942 | if ($items_named{$1}++) { | |
943 | print HTML html_escape($1); | |
944 | } else { | |
945 | my $name = 'item_' . htmlify(1,$1); | |
946 | print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; | |
947 | } | |
948 | print HTML '</STRONG>'; | |
949 | } | |
54310121 | 950 | |
7b8d334a | 951 | } elsif ($text =~ /\A[\d#]+/) { # numbered list |
54310121 | 952 | |
953 | if ($need_preamble) { | |
954 | push(@listend, "</OL>"); | |
955 | print HTML "<OL>\n"; | |
956 | } | |
957 | ||
7b8d334a GS |
958 | print HTML '<LI>'; |
959 | if ($text =~ /\A\d+\.?\s*(.+)\Z/s) { | |
960 | print HTML '<STRONG>'; | |
961 | if ($items_named{$1}++) { | |
962 | print HTML html_escape($1); | |
963 | } else { | |
964 | my $name = 'item_' . htmlify(0,$1); | |
965 | print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; | |
966 | } | |
967 | print HTML '</STRONG>'; | |
968 | } | |
54310121 | 969 | |
970 | } else { # all others | |
971 | ||
972 | if ($need_preamble) { | |
973 | push(@listend, '</DL>'); | |
974 | print HTML "<DL>\n"; | |
975 | } | |
976 | ||
7b8d334a GS |
977 | print HTML '<DT>'; |
978 | if ($text =~ /(\S+)/) { | |
979 | print HTML '<STRONG>'; | |
980 | if ($items_named{$1}++) { | |
981 | print HTML html_escape($text); | |
982 | } else { | |
983 | my $name = 'item_' . htmlify(1,$text); | |
984 | print HTML qq(<A NAME="$name">), html_escape($text), '</A>'; | |
985 | } | |
986 | print HTML '</STRONG>'; | |
987 | } | |
54310121 | 988 | print HTML '<DD>'; |
989 | } | |
990 | ||
991 | print HTML "\n"; | |
992 | } | |
993 | ||
994 | # | |
995 | # process_over - process a pod over tag and start a corresponding HTML | |
996 | # list. | |
997 | # | |
998 | sub process_over { | |
999 | # start a new list | |
1000 | $listlevel++; | |
1001 | } | |
1002 | ||
1003 | # | |
1004 | # process_back - process a pod back tag and convert it to HTML format. | |
1005 | # | |
1006 | sub process_back { | |
2ceaccd7 | 1007 | warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n" |
54310121 | 1008 | unless $listlevel; |
1009 | return unless $listlevel; | |
1010 | ||
1011 | # close off the list. note, I check to see if $listend[$listlevel] is | |
1012 | # defined because an =item directive may have never appeared and thus | |
1013 | # $listend[$listlevel] may have never been initialized. | |
1014 | $listlevel--; | |
1015 | print HTML $listend[$listlevel] if defined $listend[$listlevel]; | |
1016 | print HTML "\n"; | |
1017 | ||
1018 | # don't need the corresponding perl code anymore | |
1019 | pop(@listitem); | |
1020 | pop(@listdata); | |
1021 | pop(@listend); | |
1022 | ||
1023 | pop(@items_seen); | |
1024 | } | |
1025 | ||
1026 | # | |
1027 | # process_cut - process a pod cut tag, thus stop ignoring pod directives. | |
1028 | # | |
1029 | sub process_cut { | |
1030 | $ignore = 1; | |
1031 | } | |
1032 | ||
1033 | # | |
1034 | # process_pod - process a pod pod tag, thus ignore pod directives until we see a | |
1035 | # corresponding cut. | |
1036 | # | |
1037 | sub process_pod { | |
1038 | # no need to set $ignore to 0 cause the main loop did it | |
1039 | } | |
1040 | ||
1041 | # | |
1042 | # process_for - process a =for pod tag. if it's for html, split | |
c4d9b39d | 1043 | # it out verbatim, if illustration, center it, otherwise ignore it. |
54310121 | 1044 | # |
1045 | sub process_for { | |
1046 | my($whom, $text) = @_; | |
1047 | if ( $whom =~ /^(pod2)?html$/i) { | |
1048 | print HTML $text; | |
c4d9b39d IZ |
1049 | } elsif ($whom =~ /^illustration$/i) { |
1050 | 1 while chomp $text; | |
1051 | for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) { | |
1052 | $text .= $ext, last if -r "$text$ext"; | |
1053 | } | |
1054 | print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>}; | |
1055 | } | |
54310121 | 1056 | } |
1057 | ||
1058 | # | |
1059 | # process_begin - process a =begin pod tag. this pushes | |
1060 | # whom we're beginning on the begin stack. if there's a | |
1061 | # begin stack, we only print if it us. | |
1062 | # | |
1063 | sub process_begin { | |
1064 | my($whom, $text) = @_; | |
1065 | $whom = lc($whom); | |
1066 | push (@begin_stack, $whom); | |
1067 | if ( $whom =~ /^(pod2)?html$/) { | |
1068 | print HTML $text if $text; | |
1069 | } | |
1070 | } | |
1071 | ||
1072 | # | |
1073 | # process_end - process a =end pod tag. pop the | |
1074 | # begin stack. die if we're mismatched. | |
1075 | # | |
1076 | sub process_end { | |
1077 | my($whom, $text) = @_; | |
1078 | $whom = lc($whom); | |
1079 | if ($begin_stack[-1] ne $whom ) { | |
1080 | die "Unmatched begin/end at chunk $paragraph\n" | |
1081 | } | |
1082 | pop @begin_stack; | |
1083 | } | |
1084 | ||
1085 | # | |
1086 | # process_text - handles plaintext that appears in the input pod file. | |
1087 | # there may be pod commands embedded within the text so those must be | |
1088 | # converted to html commands. | |
1089 | # | |
1090 | sub process_text { | |
1091 | my($text, $escapeQuotes) = @_; | |
1092 | my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf); | |
1093 | my($podcommand, $params, $tag, $quote); | |
1094 | ||
1095 | return if $ignore; | |
1096 | ||
1097 | $quote = 0; # status of double-quote conversion | |
1098 | $result = ""; | |
1099 | $rest = $$text; | |
1100 | ||
1101 | if ($rest =~ /^\s+/) { # preformatted text, no pod directives | |
be173d55 HM |
1102 | $rest =~ s/\n+\Z//; |
1103 | $rest =~ s#.*# | |
1104 | my $line = $&; | |
1105 | 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; | |
1106 | $line; | |
1107 | #eg; | |
54310121 | 1108 | |
1109 | $rest =~ s/&/&/g; | |
1110 | $rest =~ s/</</g; | |
1111 | $rest =~ s/>/>/g; | |
1112 | $rest =~ s/"/"/g; | |
1113 | ||
1114 | # try and create links for all occurrences of perl.* within | |
1115 | # the preformatted text. | |
1116 | $rest =~ s{ | |
1117 | (\s*)(perl\w+) | |
1118 | }{ | |
1119 | if (defined $pages{$2}) { # is a link | |
1120 | qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>); | |
39e571d4 LM |
1121 | } elsif (defined $pages{dosify($2)}) { # is a link |
1122 | qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>); | |
54310121 | 1123 | } else { |
1124 | "$1$2"; | |
1125 | } | |
1126 | }xeg; | |
5a039dd3 BS |
1127 | # $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g; |
1128 | $rest =~ s{ | |
1129 | (<A\ HREF="?)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)? | |
1130 | }{ | |
1131 | my $url= | |
1132 | File::PathConvert::abs2rel( "$3.html", $htmlfileurl ); | |
1133 | # print( " $htmlfileurl $3.html [$url]\n" ) ; | |
1134 | "$1$url" ; | |
1135 | }xeg; | |
54310121 | 1136 | |
5a039dd3 BS |
1137 | # Look for embedded URLs and make them in to links. We don't |
1138 | # relativize them since they are best left as the author intended. | |
54310121 | 1139 | my $urls = '(' . join ('|', qw{ |
1140 | http | |
1141 | telnet | |
1142 | mailto | |
1143 | news | |
1144 | gopher | |
1145 | file | |
1146 | wais | |
1147 | ftp | |
1148 | } ) | |
1149 | . ')'; | |
1150 | ||
1151 | my $ltrs = '\w'; | |
1152 | my $gunk = '/#~:.?+=&%@!\-'; | |
1153 | my $punc = '.:?\-'; | |
1154 | my $any = "${ltrs}${gunk}${punc}"; | |
1155 | ||
1156 | $rest =~ s{ | |
1157 | \b # start at word boundary | |
1158 | ( # begin $1 { | |
fa46748d | 1159 | $urls :[^:] # need resource and a colon |
54310121 | 1160 | [$any] +? # followed by on or more |
1161 | # of any valid character, but | |
1162 | # be conservative and take only | |
1163 | # what you need to.... | |
1164 | ) # end $1 } | |
1165 | (?= # look-ahead non-consumptive assertion | |
1166 | [$punc]* # either 0 or more puntuation | |
1167 | [^$any] # followed by a non-url char | |
1168 | | # or else | |
1169 | $ # then end of the string | |
1170 | ) | |
1171 | }{<A HREF="$1">$1</A>}igox; | |
1172 | ||
1173 | $result = "<PRE>" # text should be as it is (verbatim) | |
1174 | . "$rest\n" | |
1175 | . "</PRE>\n"; | |
1176 | } else { # formatted text | |
1177 | # parse through the string, stopping each time we find a | |
1178 | # pod-escape. once the string has been throughly processed | |
1179 | # we can output it. | |
ec978fad | 1180 | while (length $rest) { |
54310121 | 1181 | # check to see if there are any possible pod directives in |
1182 | # the remaining part of the text. | |
1183 | if ($rest =~ m/[BCEIFLSZ]</) { | |
1184 | warn "\$rest\t= $rest\n" unless | |
1185 | $rest =~ /\A | |
1186 | ([^<]*?) | |
1187 | ([BCEIFLSZ]?) | |
1188 | < | |
1189 | (.*)\Z/xs; | |
1190 | ||
1191 | $s1 = $1; # pure text | |
1192 | $s2 = $2; # the type of pod-escape that follows | |
1193 | $s3 = '<'; # '<' | |
1194 | $s4 = $3; # the rest of the string | |
1195 | } else { | |
1196 | $s1 = $rest; | |
1197 | $s2 = ""; | |
1198 | $s3 = ""; | |
1199 | $s4 = ""; | |
1200 | } | |
1201 | ||
1202 | if ($s3 eq '<' && $s2) { # a pod-escape | |
1203 | $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1); | |
1204 | $podcommand = "$s2<"; | |
1205 | $rest = $s4; | |
1206 | ||
1207 | # find the matching '>' | |
1208 | $match = 1; | |
1209 | $bf = 0; | |
1210 | while ($match && !$bf) { | |
1211 | $bf = 1; | |
1212 | if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) { | |
1213 | $bf = 0; | |
1214 | $match++; | |
1215 | $podcommand .= $1; | |
1216 | $rest = $2; | |
1217 | } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) { | |
1218 | $bf = 0; | |
1219 | $match--; | |
1220 | $podcommand .= $1; | |
1221 | $rest = $2; | |
1222 | } | |
1223 | } | |
1224 | ||
1225 | if ($match != 0) { | |
1226 | warn <<WARN; | |
1227 | $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph. | |
1228 | WARN | |
1229 | $result .= substr $podcommand, 0, 2; | |
1230 | $rest = substr($podcommand, 2) . $rest; | |
1231 | next; | |
1232 | } | |
1233 | ||
1234 | # pull out the parameters to the pod-escape | |
1235 | $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s; | |
1236 | $tag = $1; | |
1237 | $params = $2; | |
1238 | ||
1239 | # process the text within the pod-escape so that any escapes | |
1240 | # which must occur do. | |
1241 | process_text(\$params, 0) unless $tag eq 'L'; | |
1242 | ||
1243 | $s1 = $params; | |
1244 | if (!$tag || $tag eq " ") { # <> : no tag | |
1245 | $s1 = "<$params>"; | |
1246 | } elsif ($tag eq "L") { # L<> : link | |
1247 | $s1 = process_L($params); | |
1248 | } elsif ($tag eq "I" || # I<> : italicize text | |
1249 | $tag eq "B" || # B<> : bold text | |
1250 | $tag eq "F") { # F<> : file specification | |
1251 | $s1 = process_BFI($tag, $params); | |
1252 | } elsif ($tag eq "C") { # C<> : literal code | |
1253 | $s1 = process_C($params, 1); | |
1254 | } elsif ($tag eq "E") { # E<> : escape | |
1255 | $s1 = process_E($params); | |
1256 | } elsif ($tag eq "Z") { # Z<> : zero-width character | |
1257 | $s1 = process_Z($params); | |
1258 | } elsif ($tag eq "S") { # S<> : non-breaking space | |
1259 | $s1 = process_S($params); | |
1260 | } elsif ($tag eq "X") { # S<> : non-breaking space | |
1261 | $s1 = process_X($params); | |
1262 | } else { | |
1263 | warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n"; | |
1264 | } | |
1265 | ||
1266 | $result .= "$s1"; | |
1267 | } else { | |
1268 | # for pure text we must deal with implicit links and | |
1269 | # double-quotes among other things. | |
1270 | $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3"); | |
1271 | $rest = $s4; | |
1272 | } | |
1273 | } | |
1274 | } | |
1275 | $$text = $result; | |
1276 | } | |
1277 | ||
1278 | sub html_escape { | |
1279 | my $rest = $_[0]; | |
1280 | $rest =~ s/&/&/g; | |
1281 | $rest =~ s/</</g; | |
1282 | $rest =~ s/>/>/g; | |
1283 | $rest =~ s/"/"/g; | |
1284 | return $rest; | |
1285 | } | |
1286 | ||
1287 | # | |
1288 | # process_puretext - process pure text (without pod-escapes) converting | |
1289 | # double-quotes and handling implicit C<> links. | |
1290 | # | |
1291 | sub process_puretext { | |
1292 | my($text, $quote) = @_; | |
1293 | my(@words, $result, $rest, $lead, $trail); | |
1294 | ||
1295 | # convert double-quotes to single-quotes | |
1296 | $text =~ s/\A([^"]*)"/$1''/s if $$quote; | |
1297 | while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {} | |
1298 | ||
1299 | $$quote = ($text =~ m/"/ ? 1 : 0); | |
1300 | $text =~ s/\A([^"]*)"/$1``/s if $$quote; | |
1301 | ||
1302 | # keep track of leading and trailing white-space | |
1303 | $lead = ($text =~ /\A(\s*)/s ? $1 : ""); | |
1304 | $trail = ($text =~ /(\s*)\Z/s ? $1 : ""); | |
1305 | ||
1306 | # collapse all white space into a single space | |
1307 | $text =~ s/\s+/ /g; | |
1308 | @words = split(" ", $text); | |
1309 | ||
1310 | # process each word individually | |
1311 | foreach my $word (@words) { | |
1312 | # see if we can infer a link | |
1313 | if ($word =~ /^\w+\(/) { | |
1314 | # has parenthesis so should have been a C<> ref | |
1315 | $word = process_C($word); | |
1316 | # $word =~ /^[^()]*]\(/; | |
1317 | # if (defined $items{$1} && $items{$1}) { | |
1318 | # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_" | |
1319 | # . htmlify(0,$word) | |
1320 | # . "\">$word</A></CODE>"; | |
1321 | # } elsif (defined $items{$word} && $items{$word}) { | |
1322 | # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_" | |
1323 | # . htmlify(0,$word) | |
1324 | # . "\">$word</A></CODE>"; | |
1325 | # } else { | |
1326 | # $word = "\n<CODE><A HREF=\"#item_" | |
1327 | # . htmlify(0,$word) | |
1328 | # . "\">$word</A></CODE>"; | |
1329 | # } | |
1330 | } elsif ($word =~ /^[\$\@%&*]+\w+$/) { | |
1331 | # perl variables, should be a C<> ref | |
1332 | $word = process_C($word, 1); | |
1333 | } elsif ($word =~ m,^\w+://\w,) { | |
1334 | # looks like a URL | |
5a039dd3 | 1335 | # Don't relativize it: leave it as the author intended |
54310121 | 1336 | $word = qq(<A HREF="$word">$word</A>); |
af47ee55 | 1337 | } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) { |
54310121 | 1338 | # looks like an e-mail address |
7b8d334a GS |
1339 | my ($w1, $w2, $w3) = ("", $word, ""); |
1340 | ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/; | |
1341 | ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/; | |
1342 | $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3); | |
54310121 | 1343 | } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase? |
7b8d334a | 1344 | $word = html_escape($word) if $word =~ /["&<>]/; |
54310121 | 1345 | $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape; |
1346 | } else { | |
7b8d334a | 1347 | $word = html_escape($word) if $word =~ /["&<>]/; |
54310121 | 1348 | } |
1349 | } | |
1350 | ||
1351 | # build a new string based upon our conversion | |
1352 | $result = ""; | |
1353 | $rest = join(" ", @words); | |
1354 | while (length($rest) > 75) { | |
1355 | if ( $rest =~ m/^(.{0,75})\s(.*?)$/o || | |
1356 | $rest =~ m/^(\S*)\s(.*?)$/o) { | |
1357 | ||
1358 | $result .= "$1\n"; | |
1359 | $rest = $2; | |
1360 | } else { | |
1361 | $result .= "$rest\n"; | |
1362 | $rest = ""; | |
1363 | } | |
1364 | } | |
1365 | $result .= $rest if $rest; | |
1366 | ||
1367 | # restore the leading and trailing white-space | |
1368 | $result = "$lead$result$trail"; | |
1369 | ||
1370 | return $result; | |
1371 | } | |
1372 | ||
1373 | # | |
1374 | # pre_escape - convert & in text to $amp; | |
1375 | # | |
1376 | sub pre_escape { | |
1377 | my($str) = @_; | |
1378 | ||
1379 | $$str =~ s,&,&,g; | |
1380 | } | |
1381 | ||
1382 | # | |
39e571d4 LM |
1383 | # dosify - convert filenames to 8.3 |
1384 | # | |
1385 | sub dosify { | |
1386 | my($str) = @_; | |
1387 | if ($Is83) { | |
1388 | $str = lc $str; | |
1389 | $str =~ s/(\.\w+)/substr ($1,0,4)/ge; | |
1390 | $str =~ s/(\w+)/substr ($1,0,8)/ge; | |
1391 | } | |
1392 | return $str; | |
1393 | } | |
1394 | ||
1395 | # | |
54310121 | 1396 | # process_L - convert a pod L<> directive to a corresponding HTML link. |
1397 | # most of the links made are inferred rather than known about directly | |
1398 | # (i.e it's not known whether the =head\d section exists in the target file, | |
1399 | # or whether a .pod file exists in the case of split files). however, the | |
1400 | # guessing usually works. | |
1401 | # | |
1402 | # Unlike the other directives, this should be called with an unprocessed | |
1403 | # string, else tags in the link won't be matched. | |
1404 | # | |
1405 | sub process_L { | |
1406 | my($str) = @_; | |
39e571d4 | 1407 | my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings |
54310121 | 1408 | |
1409 | $str =~ s/\n/ /g; # undo word-wrapped tags | |
1410 | $s1 = $str; | |
1411 | for ($s1) { | |
b74bceb9 AB |
1412 | # LREF: a la HREF L<show this text|man/section> |
1413 | $linktext = $1 if s:^([^|]+)\|::; | |
1414 | ||
54310121 | 1415 | # make sure sections start with a / |
1416 | s,^",/",g; | |
1417 | s,^,/,g if (!m,/, && / /); | |
1418 | ||
1419 | # check if there's a section specified | |
1420 | if (m,^(.*?)/"?(.*?)"?$,) { # yes | |
1421 | ($page, $section) = ($1, $2); | |
1422 | } else { # no | |
1423 | ($page, $section) = ($str, ""); | |
1424 | } | |
1425 | ||
1426 | # check if we know that this is a section in this page | |
1427 | if (!defined $pages{$page} && defined $sections{$page}) { | |
1428 | $section = $page; | |
1429 | $page = ""; | |
1430 | } | |
1431 | } | |
1432 | ||
39e571d4 LM |
1433 | $page83=dosify($page); |
1434 | $page=$page83 if (defined $pages{$page83}); | |
54310121 | 1435 | if ($page eq "") { |
1436 | $link = "#" . htmlify(0,$section); | |
b74bceb9 | 1437 | $linktext = $section unless defined($linktext); |
350ccacd GB |
1438 | } elsif ( $page =~ /::/ ) { |
1439 | $linktext = ($section ? "$section" : "$page"); | |
1440 | $page =~ s,::,/,g; | |
1441 | $link = "$htmlroot/$page.html"; | |
1442 | $link .= "#" . htmlify(0,$section) if ($section); | |
54310121 | 1443 | } elsif (!defined $pages{$page}) { |
1444 | warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n"; | |
1445 | $link = ""; | |
b74bceb9 | 1446 | $linktext = $page unless defined($linktext); |
54310121 | 1447 | } else { |
b74bceb9 | 1448 | $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext); |
54310121 | 1449 | $section = htmlify(0,$section) if $section ne ""; |
1450 | ||
1451 | # if there is a directory by the name of the page, then assume that an | |
1452 | # appropriate section will exist in the subdirectory | |
1453 | if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { | |
1454 | $link = "$htmlroot/$1/$section.html"; | |
1455 | ||
1456 | # since there is no directory by the name of the page, the section will | |
1457 | # have to exist within a .html of the same name. thus, make sure there | |
1458 | # is a .pod or .pm that might become that .html | |
1459 | } else { | |
1460 | $section = "#$section"; | |
1461 | # check if there is a .pod with the page name | |
1462 | if ($pages{$page} =~ /([^:]*)\.pod:/) { | |
1463 | $link = "$htmlroot/$1.html$section"; | |
1464 | } elsif ($pages{$page} =~ /([^:]*)\.pm:/) { | |
1465 | $link = "$htmlroot/$1.html$section"; | |
1466 | } else { | |
1467 | warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ". | |
1468 | "no .pod or .pm found\n"; | |
1469 | $link = ""; | |
b74bceb9 | 1470 | $linktext = $section unless defined($linktext); |
54310121 | 1471 | } |
1472 | } | |
1473 | } | |
1474 | ||
1475 | process_text(\$linktext, 0); | |
1476 | if ($link) { | |
5a039dd3 BS |
1477 | my $url= File::PathConvert::abs2rel( $link, $htmlfileurl ) ; |
1478 | # print( " $htmlfileurl $link [$url]\n" ) ; | |
1479 | $s1 = "<A HREF=\"$url\">$linktext</A>"; | |
54310121 | 1480 | } else { |
1481 | $s1 = "<EM>$linktext</EM>"; | |
1482 | } | |
1483 | return $s1; | |
1484 | } | |
1485 | ||
1486 | # | |
1487 | # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and | |
1488 | # convert them to corresponding HTML directives. | |
1489 | # | |
1490 | sub process_BFI { | |
1491 | my($tag, $str) = @_; | |
1492 | my($s1); # work string | |
1493 | my(%repltext) = ( 'B' => 'STRONG', | |
1494 | 'F' => 'EM', | |
1495 | 'I' => 'EM'); | |
1496 | ||
1497 | # extract the modified text and convert to HTML | |
1498 | $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>"; | |
1499 | return $s1; | |
1500 | } | |
1501 | ||
1502 | # | |
1503 | # process_C - process the C<> pod-escape. | |
1504 | # | |
1505 | sub process_C { | |
1506 | my($str, $doref) = @_; | |
1507 | my($s1, $s2); | |
1508 | ||
1509 | $s1 = $str; | |
1510 | $s1 =~ s/\([^()]*\)//g; # delete parentheses | |
be173d55 | 1511 | $s2 = $s1; |
54310121 | 1512 | $s1 =~ s/\W//g; # delete bogus characters |
7b8d334a | 1513 | $str = html_escape($str); |
54310121 | 1514 | |
1515 | # if there was a pod file that we found earlier with an appropriate | |
1516 | # =item directive, then create a link to that page. | |
1517 | if ($doref && defined $items{$s1}) { | |
5a039dd3 BS |
1518 | if ( $items{$s1} ) { |
1519 | my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ; | |
1520 | my $url = File::PathConvert::abs2rel( $link, $htmlfileurl ) ; | |
1521 | # print( " $htmlfileurl $link [$url]\n" ) ; | |
1522 | $s1 = "<A HREF=\"$url\">$str</A>" ; | |
1523 | } | |
1524 | else { | |
1525 | $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ; | |
1526 | } | |
54310121 | 1527 | $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,; |
1528 | confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/; | |
1529 | } else { | |
1530 | $s1 = "<CODE>$str</CODE>"; | |
1531 | # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose | |
1532 | } | |
1533 | ||
1534 | ||
1535 | return $s1; | |
1536 | } | |
1537 | ||
1538 | # | |
1539 | # process_E - process the E<> pod directive which seems to escape a character. | |
1540 | # | |
1541 | sub process_E { | |
1542 | my($str) = @_; | |
1543 | ||
1544 | for ($str) { | |
1545 | s,([^/].*),\&$1\;,g; | |
1546 | } | |
1547 | ||
1548 | return $str; | |
1549 | } | |
1550 | ||
1551 | # | |
1552 | # process_Z - process the Z<> pod directive which really just amounts to | |
1553 | # ignoring it. this allows someone to start a paragraph with an = | |
1554 | # | |
1555 | sub process_Z { | |
1556 | my($str) = @_; | |
1557 | ||
1558 | # there is no equivalent in HTML for this so just ignore it. | |
1559 | $str = ""; | |
1560 | return $str; | |
1561 | } | |
1562 | ||
1563 | # | |
1564 | # process_S - process the S<> pod directive which means to convert all | |
1565 | # spaces in the string to non-breaking spaces (in HTML-eze). | |
1566 | # | |
1567 | sub process_S { | |
1568 | my($str) = @_; | |
1569 | ||
1570 | # convert all spaces in the text to non-breaking spaces in HTML. | |
1571 | $str =~ s/ / /g; | |
1572 | return $str; | |
1573 | } | |
1574 | ||
1575 | # | |
1576 | # process_X - this is supposed to make an index entry. we'll just | |
1577 | # ignore it. | |
1578 | # | |
1579 | sub process_X { | |
1580 | return ''; | |
1581 | } | |
1582 | ||
1583 | ||
1584 | # | |
1585 | # finish_list - finish off any pending HTML lists. this should be called | |
1586 | # after the entire pod file has been read and converted. | |
1587 | # | |
1588 | sub finish_list { | |
7b8d334a | 1589 | while ($listlevel > 0) { |
54310121 | 1590 | print HTML "</DL>\n"; |
1591 | $listlevel--; | |
1592 | } | |
1593 | } | |
1594 | ||
1595 | # | |
1596 | # htmlify - converts a pod section specification to a suitable section | |
1597 | # specification for HTML. if first arg is 1, only takes 1st word. | |
1598 | # | |
1599 | sub htmlify { | |
1600 | my($compact, $heading) = @_; | |
1601 | ||
1602 | if ($compact) { | |
1603 | $heading =~ /^(\w+)/; | |
1604 | $heading = $1; | |
1605 | } | |
1606 | ||
1607 | # $heading = lc($heading); | |
1608 | $heading =~ s/[^\w\s]/_/g; | |
1609 | $heading =~ s/(\s+)/ /g; | |
1610 | $heading =~ s/^\s*(.*?)\s*$/$1/s; | |
1611 | $heading =~ s/ /_/g; | |
1612 | $heading =~ s/\A(.{32}).*\Z/$1/s; | |
1613 | $heading =~ s/\s+\Z//; | |
1614 | $heading =~ s/_{2,}/_/g; | |
1615 | ||
1616 | return $heading; | |
1617 | } | |
1618 | ||
1619 | BEGIN { | |
1620 | } | |
1621 | ||
1622 | 1; |