This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate with Sarathy.
[perl5.git] / lib / Pod / Man.pm
CommitLineData
9741dab0 1# Pod::Man -- Convert POD data to formatted *roff input.
2e20e14f 2# $Id: Man.pm,v 0.8 1999/10/07 09:39:37 eagle Exp $
9741dab0
GS
3#
4# Copyright 1999 by Russ Allbery <rra@stanford.edu>
5#
6# This program is free software; you can redistribute it and/or modify it
7# under the same terms as Perl itself.
8#
9# This module is intended to be a replacement for pod2man, and attempts to
10# match its output except for some specific circumstances where other
11# decisions seemed to produce better output. It uses Pod::Parser and is
12# designed to be very easy to subclass.
13
14############################################################################
15# Modules and declarations
16############################################################################
17
18package Pod::Man;
19
20require 5.004;
21
22use Carp qw(carp croak);
23use Pod::Parser ();
24
25use strict;
26use subs qw(makespace);
27use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
28
29@ISA = qw(Pod::Parser);
30
2e20e14f 31($VERSION = (split (' ', q$Revision: 0.8 $ ))[1]) =~ s/\.(\d)$/.0$1/;
9741dab0
GS
32
33
34############################################################################
35# Preamble and *roff output tables
36############################################################################
37
38# The following is the static preamble which starts all *roff output we
39# generate. It's completely static except for the font to use as a
40# fixed-width font, which is designed by @CFONT@. $PREAMBLE should
41# therefore be run through s/\@CFONT\@/<font>/g before output.
42$PREAMBLE = <<'----END OF PREAMBLE----';
43.de Sh \" Subsection heading
44.br
45.if t .Sp
46.ne 5
47.PP
48\fB\\$1\fR
49.PP
50..
51.de Sp \" Vertical space (when we can't use .PP)
52.if t .sp .5v
53.if n .sp
54..
55.de Ip \" List item
56.br
57.ie \\n(.$>=3 .ne \\$3
58.el .ne 3
59.IP "\\$1" \\$2
60..
61.de Vb \" Begin verbatim text
62.ft @CFONT@
63.nf
64.ne \\$1
65..
66.de Ve \" End verbatim text
67.ft R
68
69.fi
70..
71.\" Set up some character translations and predefined strings. \*(-- will
72.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
73.\" double quote, and \*(R" will give a right double quote. | will give a
74.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used
75.\" to do unbreakable dashes and therefore won't be available. \*(C` and
76.\" \*(C' expand to `' in nroff, nothing in troff, for use with C<>
77.tr \(*W-|\(bv\*(Tr
78.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
79.ie n \{\
80. ds -- \(*W-
81. ds PI pi
82. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
83. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
84. ds L" ""
85. ds R" ""
86. ds C` `
87. ds C' '
88'br\}
89.el\{\
90. ds -- \|\(em\|
91. ds PI \(*p
92. ds L" ``
93. ds R" ''
94'br\}
95.\"
96.\" If the F register is turned on, we'll generate index entries on stderr
97.\" for titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and
98.\" index entries marked with X<> in POD. Of course, you'll have to process
99.\" the output yourself in some meaningful fashion.
100.if \nF \{\
101. de IX
102. tm Index:\\$1\t\\n%\t"\\$2"
103. .
104. nr % 0
105. rr F
106.\}
107.\"
108.\" For nroff, turn off justification. Always turn off hyphenation; it
109.\" makes way too many mistakes in technical documents.
110.hy 0
111.if n .na
112.\"
113.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
114.\" Fear. Run. Save yourself. No user-serviceable parts.
115.bd B 3
116. \" fudge factors for nroff and troff
117.if n \{\
118. ds #H 0
119. ds #V .8m
120. ds #F .3m
121. ds #[ \f1
122. ds #] \fP
123.\}
124.if t \{\
125. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
126. ds #V .6m
127. ds #F 0
128. ds #[ \&
129. ds #] \&
130.\}
131. \" simple accents for nroff and troff
132.if n \{\
133. ds ' \&
134. ds ` \&
135. ds ^ \&
136. ds , \&
137. ds ~ ~
138. ds /
139.\}
140.if t \{\
141. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
142. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
143. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
144. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
145. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
146. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
147.\}
148. \" troff and (daisy-wheel) nroff accents
149.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
150.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
151.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
152.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
153.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
154.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
155.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
156.ds ae a\h'-(\w'a'u*4/10)'e
157.ds Ae A\h'-(\w'A'u*4/10)'E
158. \" corrections for vroff
159.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
160.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
161. \" for low resolution devices (crt and lpr)
162.if \n(.H>23 .if \n(.V>19 \
163\{\
164. ds : e
165. ds 8 ss
166. ds o a
167. ds d- d\h'-1'\(ga
168. ds D- D\h'-1'\(hy
169. ds th \o'bp'
170. ds Th \o'LP'
171. ds ae ae
172. ds Ae AE
173.\}
174.rm #[ #] #H #V #F C
175----END OF PREAMBLE----
176
177# This table is taken nearly verbatim from Tom Christiansen's pod2man. It
178# assumes that the standard preamble has already been printed, since that's
179# what defines all of the accent marks. Note that some of these are quoted
180# with double quotes since they contain embedded single quotes, so use \\
181# uniformly for backslash for readability.
182%ESCAPES = (
183 'amp' => '&', # ampersand
184 'lt' => '<', # left chevron, less-than
185 'gt' => '>', # right chevron, greater-than
186 'quot' => '"', # double quote
187
188 'Aacute' => "A\\*'", # capital A, acute accent
189 'aacute' => "a\\*'", # small a, acute accent
190 'Acirc' => 'A\\*^', # capital A, circumflex accent
191 'acirc' => 'a\\*^', # small a, circumflex accent
192 'AElig' => '\*(AE', # capital AE diphthong (ligature)
193 'aelig' => '\*(ae', # small ae diphthong (ligature)
194 'Agrave' => "A\\*`", # capital A, grave accent
195 'agrave' => "A\\*`", # small a, grave accent
196 'Aring' => 'A\\*o', # capital A, ring
197 'aring' => 'a\\*o', # small a, ring
198 'Atilde' => 'A\\*~', # capital A, tilde
199 'atilde' => 'a\\*~', # small a, tilde
200 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark
201 'auml' => 'a\\*:', # small a, dieresis or umlaut mark
202 'Ccedil' => 'C\\*,', # capital C, cedilla
203 'ccedil' => 'c\\*,', # small c, cedilla
204 'Eacute' => "E\\*'", # capital E, acute accent
205 'eacute' => "e\\*'", # small e, acute accent
206 'Ecirc' => 'E\\*^', # capital E, circumflex accent
207 'ecirc' => 'e\\*^', # small e, circumflex accent
208 'Egrave' => 'E\\*`', # capital E, grave accent
209 'egrave' => 'e\\*`', # small e, grave accent
210 'ETH' => '\\*(D-', # capital Eth, Icelandic
211 'eth' => '\\*(d-', # small eth, Icelandic
212 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark
213 'euml' => 'e\\*:', # small e, dieresis or umlaut mark
214 'Iacute' => "I\\*'", # capital I, acute accent
215 'iacute' => "i\\*'", # small i, acute accent
216 'Icirc' => 'I\\*^', # capital I, circumflex accent
217 'icirc' => 'i\\*^', # small i, circumflex accent
218 'Igrave' => 'I\\*`', # capital I, grave accent
219 'igrave' => 'i\\*`', # small i, grave accent
220 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark
221 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark
222 'Ntilde' => 'N\*~', # capital N, tilde
223 'ntilde' => 'n\*~', # small n, tilde
224 'Oacute' => "O\\*'", # capital O, acute accent
225 'oacute' => "o\\*'", # small o, acute accent
226 'Ocirc' => 'O\\*^', # capital O, circumflex accent
227 'ocirc' => 'o\\*^', # small o, circumflex accent
228 'Ograve' => 'O\\*`', # capital O, grave accent
229 'ograve' => 'o\\*`', # small o, grave accent
230 'Oslash' => 'O\\*/', # capital O, slash
231 'oslash' => 'o\\*/', # small o, slash
232 'Otilde' => 'O\\*~', # capital O, tilde
233 'otilde' => 'o\\*~', # small o, tilde
234 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark
235 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark
236 'szlig' => '\*8', # small sharp s, German (sz ligature)
237 'THORN' => '\\*(Th', # capital THORN, Icelandic
238 'thorn' => '\\*(th', # small thorn, Icelandic
239 'Uacute' => "U\\*'", # capital U, acute accent
240 'uacute' => "u\\*'", # small u, acute accent
241 'Ucirc' => 'U\\*^', # capital U, circumflex accent
242 'ucirc' => 'u\\*^', # small u, circumflex accent
243 'Ugrave' => 'U\\*`', # capital U, grave accent
244 'ugrave' => 'u\\*`', # small u, grave accent
245 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark
246 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark
247 'Yacute' => "Y\\*'", # capital Y, acute accent
248 'yacute' => "y\\*'", # small y, acute accent
249 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark
250);
251
252
253############################################################################
254# Static helper functions
255############################################################################
256
257# Protect leading quotes and periods against interpretation as commands.
258sub protect { local $_ = shift; s/^([.\'])/\\&$1/mg; $_ }
259
260# Given a command and a single argument that may or may not contain double
261# quotes, handle double-quote formatting for it. If there are no double
262# quotes, just return the command followed by the argument in double quotes.
263# If there are double quotes, use an if statement to test for nroff, and for
264# nroff output the command followed by the argument in double quotes with
265# embedded double quotes doubled. For other formatters, remap paired double
266# quotes to `` and ''.
267sub switchquotes {
268 my $command = shift;
269 local $_ = shift;
270 my $extra = shift;
271 s/\\\*\([LR]\"/\"/g;
272 if (/\"/) {
273 s/\"/\"\"/g;
274 my $troff = $_;
275 $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
276 s/\"/\"\"/g if $extra;
277 $troff =~ s/\"/\"\"/g if $extra;
278 $_ = qq("$_") . ($extra ? " $extra" : '');
279 $troff = qq("$troff") . ($extra ? " $extra" : '');
280 return ".if n $command $_\n.el $command $troff\n";
281 } else {
282 $_ = qq("$_") . ($extra ? " $extra" : '');
283 return "$command $_\n";
284 }
285}
286
287# Translate a font string into an escape.
288sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
289
290
291############################################################################
292# Initialization
293############################################################################
294
295# Initialize the object. Here, we also process any additional options
296# passed to the constructor or set up defaults if none were given. center
297# is the centered title, release is the version number, and date is the date
298# for the documentation. Note that we can't know what file name we're
299# processing due to the architecture of Pod::Parser, so that *has* to either
300# be passed to the constructor or set separately with Pod::Man::name().
301sub initialize {
302 my $self = shift;
303
304 # Figure out the fixed-width font. If user-supplied, make sure that
305 # they are the right length.
306 for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
307 if (defined $$self{$_}) {
308 if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
309 croak "roff font should be 1 or 2 chars, not `$$self{$_}'";
310 }
311 } else {
312 $$self{$_} = '';
313 }
314 }
315
316 # Set the default fonts. We can't be sure what fixed bold-italic is
317 # going to be called, so default to just bold.
318 $$self{fixed} ||= 'CW';
319 $$self{fixedbold} ||= 'CB';
320 $$self{fixeditalic} ||= 'CI';
321 $$self{fixedbolditalic} ||= 'CB';
322
323 # Set up a table of font escapes. First number is fixed-width, second
324 # is bold, third is italic.
325 $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
326 '010' => '\fB', '011' => '\f(BI',
327 '100' => toescape ($$self{fixed}),
328 '101' => toescape ($$self{fixeditalic}),
329 '110' => toescape ($$self{fixedbold}),
330 '111' => toescape ($$self{fixedbolditalic})};
331
332 # Extra stuff for page titles.
333 $$self{center} = 'User Contributed Perl Documentation'
334 unless defined $$self{center};
335 $$self{indent} = 4 unless defined $$self{indent};
336
337 # We used to try first to get the version number from a local binary,
338 # but we shouldn't need that any more. Get the version from the running
339 # Perl.
340 if (!defined $$self{release}) {
0ff3fa1a
GS
341 my ($rev, $ver, $sver) = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
342 $sver ||= 0; $sver *= 10 ** (3-length($sver));
343 $rev += 0; $ver += 0; $sver += 0;
344 $$self{release} = "perl v$rev.$ver.$sver";
9741dab0
GS
345 }
346
347 # Double quotes in things that will be quoted.
348 for (qw/center date release/) { $$self{$_} =~ s/\"/\"\"/g }
349
350 $$self{INDENT} = 0; # Current indentation level.
351 $$self{INDENTS} = []; # Stack of indentations.
352 $$self{INDEX} = []; # Index keys waiting to be printed.
353
354 $self->SUPER::initialize;
355}
356
357# For each document we process, output the preamble first. Note that the
358# fixed width font is a global default; once we interpolate it into the
359# PREAMBLE, it ain't ever changing. Maybe fix this later.
360sub begin_pod {
361 my $self = shift;
362
363 # Try to figure out the name and section from the file name.
364 my $section = $$self{section} || 1;
365 my $name = $$self{name};
366 if (!defined $name) {
367 $name = $self->input_file;
fe6f1558
GS
368 $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
369 $name =~ s/\.p(od|[lm])\z//i;
9741dab0
GS
370 if ($section =~ /^1/) {
371 require File::Basename;
372 $name = uc File::Basename::basename ($name);
373 } else {
374 # Lose everything up to the first of
375 # */lib/*perl* standard or site_perl module
376 # */*perl*/lib from -D prefix=/opt/perl
377 # */*perl*/ random module hierarchy
378 # which works. Should be fixed to use File::Spec.
379 for ($name) {
380 s%//+%/%g;
fe6f1558
GS
381 if ( s%^.*?/lib/[^/]*perl[^/]*/%%is
382 or s%^.*?/[^/]*perl[^/]*/(?:lib/)?%%is) {
383 s%^site(_perl)?/%%s; # site and site_perl
384 s%^(.*-$^O|$^O-.*)/%%os; # arch
385 s%^\d+\.\d+%%s; # version
9741dab0
GS
386 }
387 s%/%::%g;
388 }
389 }
390 }
391
392 # Modification date header. Try to use the modification time of our
393 # input.
394 if (!defined $$self{date}) {
395 my $time = (stat $self->input_file)[9] || time;
396 my ($day, $month, $year) = (localtime $time)[3,4,5];
397 $month++;
398 $year += 1900;
399 $$self{date} = join ('-', $year, $month, $day);
400 }
401
402 # Now, print out the preamble and the title.
403 $PREAMBLE =~ s/\@CFONT\@/$$self{fixed}/;
404 chomp $PREAMBLE;
405 print { $self->output_handle } <<"----END OF HEADER----";
406.\\" Automatically generated by Pod::Man version $VERSION
407.\\" @{[ scalar localtime ]}
408.\\"
409.\\" Standard preamble:
410.\\" ======================================================================
411$PREAMBLE
412.\\" ======================================================================
413.\\"
414.IX Title "$name $section"
415.TH $name $section "$$self{release}" "$$self{date}" "$$self{center}"
416.UC
417----END OF HEADER----
418#"# for cperl-mode
419
420 # Initialize a few per-file variables.
421 $$self{INDENT} = 0;
422 $$self{NEEDSPACE} = 0;
423}
424
425
426############################################################################
427# Core overrides
428############################################################################
429
430# Called for each command paragraph. Gets the command, the associated
431# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
432# the command to a method named the same as the command. =cut is handled
433# internally by Pod::Parser.
434sub command {
435 my $self = shift;
436 my $command = shift;
437 return if $command eq 'pod';
438 return if ($$self{EXCLUDE} && $command ne 'end');
439 $command = 'cmd_' . $command;
440 $self->$command (@_);
441}
442
443# Called for a verbatim paragraph. Gets the paragraph, the line number, and
444# a Pod::Paragraph object. Rofficate backslashes, untabify, put a
445# zero-width character at the beginning of each line to protect against
446# commands, and wrap in .Vb/.Ve.
447sub verbatim {
448 my $self = shift;
449 return if $$self{EXCLUDE};
450 local $_ = shift;
451 return if /^\s+$/;
452 s/\s+$/\n/;
453 my $lines = tr/\n/\n/;
454 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
455 s/\\/\\e/g;
456 s/^(\s*\S)/'\&' . $1/gme;
457 $self->makespace if $$self{NEEDSPACE};
458 $self->output (".Vb $lines\n$_.Ve\n");
459 $$self{NEEDSPACE} = 0;
460}
461
462# Called for a regular text block. Gets the paragraph, the line number, and
463# a Pod::Paragraph object. Perform interpolation and output the results.
464sub textblock {
465 my $self = shift;
466 return if $$self{EXCLUDE};
467 $self->output ($_[0]), return if $$self{VERBATIM};
468
469 # Perform a little magic to collapse multiple L<> references. We'll
470 # just rewrite the whole thing into actual text at this part, bypassing
471 # the whole internal sequence parsing thing.
472 s{
473 (L< # A link of the form L</something>.
474 /
475 (
476 [:\w]+ # The item has to be a simple word...
477 (\(\))? # ...or simple function.
478 )
479 >
480 (
481 ,?\s+(and\s+)? # Allow lots of them, conjuncted.
482 L<
483 /
484 ( [:\w]+ ( \(\) )? )
485 >
486 )+
487 )
488 } {
489 local $_ = $1;
490 s{ L< / ([^>]+ ) } {$1}g;
491 my @items = split /(?:,?\s+(?:and\s+)?)/;
492 my $string = "the ";
493 my $i;
494 for ($i = 0; $i < @items; $i++) {
495 $string .= $items[$i];
496 $string .= ", " if @items > 2 && $i != $#items;
497 $string .= " and " if ($i == $#items - 1);
498 }
499 $string .= " entries elsewhere in this document";
500 $string;
501 }gex;
502
503 # Parse the tree and output it. collapse knows about references to
504 # scalars as well as scalars and does the right thing with them.
505 local $_ = $self->parse (@_);
506 s/\n\s*$/\n/;
507 $self->makespace if $$self{NEEDSPACE};
508 $self->output (protect $self->mapfonts ($_));
509 $self->outindex;
510 $$self{NEEDSPACE} = 1;
511}
512
513# Called for an interior sequence. Takes a Pod::InteriorSequence object and
514# returns a reference to a scalar. This scalar is the final formatted text.
515# It's returned as a reference so that other interior sequences above us
516# know that the text has already been processed.
517sub sequence {
518 my ($self, $seq) = @_;
519 my $command = $seq->cmd_name;
520
521 # Zero-width characters.
f5daac4a
GS
522 if ($command eq 'Z') {
523 my $v = '\&'; return bless \ $v, 'Pod::Man::String';
524 }
9741dab0
GS
525
526 # C<>, L<>, X<>, and E<> don't apply guesswork to their contents.
527 local $_ = $self->collapse ($seq->parse_tree, $command =~ /^[CELX]$/);
528
529 # Handle E<> escapes.
530 if ($command eq 'E') {
2e20e14f
GS
531 if (/^\d+$/) {
532 return bless \ chr ($_), 'Pod::Man::String';
533 } elsif (exists $ESCAPES{$_}) {
9741dab0
GS
534 return bless \ "$ESCAPES{$_}", 'Pod::Man::String';
535 } else {
536 carp "Unknown escape E<$1>";
537 return bless \ "E<$_>", 'Pod::Man::String';
538 }
539 }
540
541 # For all the other sequences, empty content produces no output.
542 return '' if $_ eq '';
543
544 # Handle formatting sequences.
545 if ($command eq 'B') {
546 return bless \ ('\f(BS' . $_ . '\f(BE'), 'Pod::Man::String';
547 } elsif ($command eq 'F') {
548 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
549 } elsif ($command eq 'I') {
550 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
551 } elsif ($command eq 'C') {
552 s/-/\\-/g;
553 s/__/_\\|_/g;
554 return bless \ ('\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"),
555 'Pod::Man::String';
556 }
557
558 # Handle links.
559 if ($command eq 'L') {
f5daac4a
GS
560 # XXX bug in lvalue subroutines prevents this from working
561 #return bless \ ($self->buildlink ($_)), 'Pod::Man::String';
562 my $v = $self->buildlink($_);
563 return bless \$v, 'Pod::Man::String';
9741dab0
GS
564 }
565
566 # Whitespace protection replaces whitespace with "\ ".
567 if ($command eq 'S') {
568 s/\s+/\\ /g;
569 return bless \ "$_", 'Pod::Man::String';
570 }
571
572 # Add an index entry to the list of ones waiting to be output.
573 if ($command eq 'X') { push (@{ $$self{INDEX} }, $_); return '' }
574
575 # Anything else is unknown.
576 carp "Unknown sequence $command<$_>";
577}
578
579
580############################################################################
581# Command paragraphs
582############################################################################
583
584# All command paragraphs take the paragraph and the line number.
585
586# First level heading. We can't output .IX in the NAME section due to a bug
587# in some versions of catman, so don't output a .IX for that section. .SH
588# already uses small caps, so remove any E<> sequences that would cause
589# them.
590sub cmd_head1 {
591 my $self = shift;
592 local $_ = $self->parse (@_);
593 s/\s+$//;
594 s/\\s-?\d//g;
595 $self->output (switchquotes ('.SH', $self->mapfonts ($_)));
596 $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
597 $$self{NEEDSPACE} = 0;
598}
599
600# Second level heading.
601sub cmd_head2 {
602 my $self = shift;
603 local $_ = $self->parse (@_);
604 s/\s+$//;
605 $self->output (switchquotes ('.Sh', $self->mapfonts ($_)));
606 $self->outindex ('Subsection', $_);
607 $$self{NEEDSPACE} = 0;
608}
609
610# Start a list. For indents after the first, wrap the outside indent in .RS
611# so that hanging paragraph tags will be correct.
612sub cmd_over {
613 my $self = shift;
614 local $_ = shift;
615 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
616 if (@{ $$self{INDENTS} } > 0) {
617 $self->output (".RS $$self{INDENT}\n");
618 }
619 push (@{ $$self{INDENTS} }, $$self{INDENT});
620 $$self{INDENT} = ($_ + 0);
621}
622
623# End a list. If we've closed an embedded indent, we've mangled the hanging
624# paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
625# We'll close that .RS at the next =back or =item.
626sub cmd_back {
627 my $self = shift;
628 $$self{INDENT} = pop @{ $$self{INDENTS} };
629 unless (defined $$self{INDENT}) {
630 carp "Unmatched =back";
631 $$self{INDENT} = 0;
632 }
633 if ($$self{WEIRDINDENT}) {
634 $self->output (".RE\n");
635 $$self{WEIRDINDENT} = 0;
636 }
637 if (@{ $$self{INDENTS} } > 0) {
638 $self->output (".RE\n");
639 $self->output (".RS $$self{INDENT}\n");
640 $$self{WEIRDINDENT} = 1;
641 }
642 $$self{NEEDSPACE} = 1;
643}
644
645# An individual list item. Emit an index entry for anything that's
646# interesting, but don't emit index entries for things like bullets and
647# numbers. rofficate bullets too while we're at it (so for nice output, use
648# * for your lists rather than o or . or - or some other thing).
649sub cmd_item {
650 my $self = shift;
651 local $_ = $self->parse (@_);
652 s/\s+$//;
653 my $index;
654 if (/\w/ && !/^\w[.\)]\s*$/) {
655 $index = $_;
656 $index =~ s/^\s*[-*+o.]?\s*//;
657 }
658 s/^\*(\s|\Z)/\\\(bu$1/;
659 if ($$self{WEIRDINDENT}) {
660 $self->output (".RE\n");
661 $$self{WEIRDINDENT} = 0;
662 }
663 $_ = $self->mapfonts ($_);
664 $self->output (switchquotes ('.Ip', $_, $$self{INDENT}));
665 $self->outindex ($index ? ('Item', $index) : ());
666 $$self{NEEDSPACE} = 0;
667}
668
669# Begin a block for a particular translator. Setting VERBATIM triggers
670# special handling in textblock().
671sub cmd_begin {
672 my $self = shift;
673 local $_ = shift;
674 my ($kind) = /^(\S+)/ or return;
675 if ($kind eq 'man' || $kind eq 'roff') {
676 $$self{VERBATIM} = 1;
677 } else {
678 $$self{EXCLUDE} = 1;
679 }
680}
681
682# End a block for a particular translator. We assume that all =begin/=end
683# pairs are properly closed.
684sub cmd_end {
685 my $self = shift;
686 $$self{EXCLUDE} = 0;
687 $$self{VERBATIM} = 0;
688}
689
690# One paragraph for a particular translator. Ignore it unless it's intended
691# for man or roff, in which case we output it verbatim.
692sub cmd_for {
693 my $self = shift;
694 local $_ = shift;
695 my $line = shift;
696 return unless s/^(?:man|roff)\b[ \t]*\n?//;
697 $self->output ($_);
698}
699
700
701############################################################################
702# Link handling
703############################################################################
704
705# Handle links. We can't actually make real hyperlinks, so this is all to
706# figure out what text and formatting we print out.
707sub buildlink {
708 my $self = shift;
709 local $_ = shift;
710
711 # Smash whitespace in case we were split across multiple lines.
712 s/\s+/ /g;
713
714 # If we were given any explicit text, just output it.
715 if (m{ ^ ([^|]+) \| }x) { return $1 }
716
717 # Okay, leading and trailing whitespace isn't important.
718 s/^\s+//;
719 s/\s+$//;
720
721 # Default to using the whole content of the link entry as a section
722 # name. Note that L<manpage/> forces a manpage interpretation, as does
723 # something looking like L<manpage(section)>. Do the same thing to
724 # L<manpage(section)> as we would to manpage(section) without the L<>;
725 # see guesswork(). If we've added italics, don't add the "manpage"
726 # text; markup is sufficient.
727 my ($manpage, $section) = ('', $_);
728 if (/^"\s*(.*?)\s*"$/) {
729 $section = '"' . $1 . '"';
730 } elsif (m{ ^ [-:.\w]+ (?: \( \S+ \) )? $ }x) {
731 ($manpage, $section) = ($_, '');
732 $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|('/e;
733 } elsif (m%/%) {
734 ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
735 if ($manpage =~ /^[-:.\w]+(?:\(\S+\))?$/) {
736 $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|'/e;
737 }
738 $section =~ s/^\"\s*//;
739 $section =~ s/\s*\"$//;
740 }
741 if ($manpage && $manpage !~ /\\f\(IS/) {
742 $manpage = "the $manpage manpage";
743 }
744
745 # Now build the actual output text.
746 my $text = '';
747 if (!length ($section) && !length ($manpage)) {
748 carp "Invalid link $_";
749 } elsif (!length ($section)) {
750 $text = $manpage;
751 } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
752 $text .= 'the ' . $section . ' entry';
753 $text .= (length $manpage) ? " in $manpage"
754 : " elsewhere in this document";
755 } else {
2e20e14f
GS
756 if ($section !~ /^".*"$/) { $section = '"' . $section . '"' }
757 $text .= 'the section on ' . $section;
9741dab0
GS
758 $text .= " in $manpage" if length $manpage;
759 }
760 $text;
761}
762
763
764############################################################################
765# Escaping and fontification
766############################################################################
767
768# At this point, we'll have embedded font codes of the form \f(<font>[SE]
769# where <font> is one of B, I, or F. Turn those into the right font start
770# or end codes. B<someI<thing> else> should map to \fBsome\f(BIthing\fB
771# else\fR. The old pod2man didn't get this right; the second \fB was \fR,
772# so nested sequences didn't work right. We take care of this by using
773# variables as a combined pointer to our current font sequence, and set each
774# to the number of current nestings of start tags for that font. Use them
775# as a vector to look up what font sequence to use.
776sub mapfonts {
777 my $self = shift;
778 local $_ = shift;
779
780 my ($fixed, $bold, $italic) = (0, 0, 0);
781 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
782 s { \\f\((.)(.) } {
783 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
784 $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
785 }gxe;
786 $_;
787}
788
789
790############################################################################
791# *roff-specific parsing
792############################################################################
793
794# Called instead of parse_text, calls parse_text with the right flags.
795sub parse {
796 my $self = shift;
797 $self->parse_text ({ -expand_seq => 'sequence',
798 -expand_ptree => 'collapse' }, @_);
799}
800
801# Takes a parse tree and a flag saying whether or not to treat it as literal
802# text (not call guesswork on it), and returns the concatenation of all of
803# the text strings in that parse tree. If the literal flag isn't true,
804# guesswork() will be called on all plain scalars in the parse tree.
805# Assumes that everything in the parse tree is either a scalar or a
806# reference to a scalar.
807sub collapse {
808 my ($self, $ptree, $literal) = @_;
809 if ($literal) {
810 return join ('', map {
811 if (ref $_) {
812 $$_;
813 } else {
814 s/\\/\\e/g;
815 $_;
816 }
817 } $ptree->children);
818 } else {
819 return join ('', map {
820 ref ($_) ? $$_ : $self->guesswork ($_)
821 } $ptree->children);
822 }
823}
824
825# Takes a text block to perform guesswork on; this is guaranteed not to
826# contain any interior sequences. Returns the text block with remapping
827# done.
828sub guesswork {
829 my $self = shift;
830 local $_ = shift;
831
832 # rofficate backslashes.
833 s/\\/\\e/g;
834
835 # Ensure double underbars have a tiny space between them.
836 s/__/_\\|_/g;
837
838 # Make all caps a little smaller. Be careful here, since we don't want
839 # to make @ARGV into small caps, nor do we want to fix the MIME in
840 # MIME-Version, since it looks weird with the full-height V.
841 s{
842 ( ^ | [\s\(\"\'\`\[\{<>] )
843 ( [A-Z] [A-Z] [/A-Z+:\d_\$&-]* )
844 (?: (?= [\s>\}\]\)\'\".?!,;:] | -- ) | $ )
845 } { $1 . '\s-1' . $2 . '\s0' . $3 }egx;
846
847 # Turn PI into a pretty pi.
848 s{ (?: \\s-1 | \b ) PI (?: \\s0 | \b ) } {\\*\(PI}gx;
849
850 # Italize functions in the form func().
851 s{
852 \b
853 (
854 [:\w]+ (?:\\s-1)? \(\)
855 )
856 } { '\f(IS' . $1 . '\f(IE' }egx;
857
858 # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n).
859 s{
860 \b
861 (\w[-:.\w]+ (?:\\s-1)?)
862 (
863 \( [^\)] \)
864 )
865 } { '\f(IS' . $1 . '\f(IE\|' . $2 }egx;
866
867 # Convert simple Perl variable references to a fixed-width font.
868 s{
869 ( \s+ )
870 ( [\$\@%] [\w:]+ )
871 (?! \( )
872 } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
873
874 # Translate -- into a real em dash if it's used like one and fix up
875 # dashes, but keep hyphens hyphens.
876 s{ (\G|^|.) (-+) (\b|.) } {
877 my ($pre, $dash, $post) = ($1, $2, $3);
878 if (length ($dash) == 1) {
879 ($pre =~ /[a-zA-Z]/) ? "$pre-$post" : "$pre\\-$post";
880 } elsif (length ($dash) == 2
881 && ((!$pre && !$post)
882 || ($pre =~ /\w/ && !$post)
883 || ($pre eq ' ' && $post eq ' ')
884 || ($pre eq '=' && $post ne '=')
885 || ($pre ne '=' && $post eq '='))) {
886 "$pre\\*(--$post";
887 } else {
888 $pre . ('\-' x length $dash) . $post;
889 }
890 }egxs;
891
892 # Fix up double quotes.
893 s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
894
895 # Make C++ into \*(C+, which is a squinched version.
896 s{ \b C\+\+ } {\\*\(C+}gx;
897
898 # All done.
899 $_;
900}
901
902
903############################################################################
904# Output formatting
905############################################################################
906
907# Make vertical whitespace.
908sub makespace {
909 my $self = shift;
910 $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n");
911}
912
913# Output any pending index entries, and optionally an index entry given as
914# an argument. Support multiple index entries in X<> separated by slashes,
915# and strip special escapes from index entries.
916sub outindex {
917 my ($self, $section, $index) = @_;
918 my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
919 return unless ($section || @entries);
920 $$self{INDEX} = [];
921 my $output;
922 if (@entries) {
923 my $output = '.IX Xref "'
924 . join (' ', map { s/\"/\"\"/; $_ } @entries)
925 . '"' . "\n";
926 }
927 if ($section) {
928 $index =~ s/\"/\"\"/;
929 $index =~ s/\\-/-/g;
930 $index =~ s/\\(?:s-?\d|.\(..|.)//g;
931 $output .= ".IX $section " . '"' . $index . '"' . "\n";
932 }
933 $self->output ($output);
934}
935
936# Output text to the output device.
937sub output { print { $_[0]->output_handle } $_[1] }
938
939__END__
940
941.\" These are some extra bits of roff that I don't want to lose track of
942.\" but that have been removed from the preamble to make it a bit shorter
943.\" since they're not currently being used. They're accents and special
944.\" characters we don't currently have escapes for.
945.if n \{\
946. ds ? ?
947. ds ! !
948. ds q
949.\}
950.if t \{\
951. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
952. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
953. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
954.\}
955.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
956.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
957.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
958.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
959.ds oe o\h'-(\w'o'u*4/10)'e
960.ds Oe O\h'-(\w'O'u*4/10)'E
961.if \n(.H>23 .if \n(.V>19 \
962\{\
963. ds v \h'-1'\o'\(aa\(ga'
964. ds _ \h'-1'^
965. ds . \h'-1'.
966. ds 3 3
967. ds oe oe
968. ds Oe OE
969.\}
970
971############################################################################
972# Documentation
973############################################################################
974
975=head1 NAME
976
977Pod::Man - Convert POD data to formatted *roff input
978
979=head1 SYNOPSIS
980
981 use Pod::Man;
982 my $parser = Pod::Man->new (release => $VERSION, section => 8);
983
984 # Read POD from STDIN and write to STDOUT.
985 $parser->parse_from_filehandle;
986
987 # Read POD from file.pod and write to file.1.
988 $parser->parse_from_file ('file.pod', 'file.1');
989
990=head1 DESCRIPTION
991
992Pod::Man is a module to convert documentation in the POD format (the
993preferred language for documenting Perl) into *roff input using the man
994macro set. The resulting *roff code is suitable for display on a terminal
995using nroff(1), normally via man(1), or printing using troff(1). It is
9e107c59 996conventionally invoked using the driver script B<pod2man>, but it can also
9741dab0
GS
997be used directly.
998
999As a derived class from Pod::Parser, Pod::Man supports the same methods and
1000interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
1001new parser with C<Pod::Man-E<gt>new()> and then calls either
1002parse_from_filehandle() or parse_from_file().
1003
1004new() can take options, in the form of key/value pairs that control the
1005behavior of the parser. See below for details.
1006
1007If no options are given, Pod::Man uses the name of the input file with any
1008trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1009section 1 unless the file ended in C<.pm> in which case it defaults to
1010section 3, to a centered title of "User Contributed Perl Documentation", to
1011a centered footer of the Perl version it is run with, and to a left-hand
1012footer of the modification date of its input (or the current date if given
1013STDIN for input).
1014
1015Pod::Man assumes that your *roff formatters have a fixed-width font named
1016CW. If yours is called something else (like CR), use the C<fixed> option to
1017specify it. This generally only matters for troff output for printing.
1018Similarly, you can set the fonts used for bold, italic, and bold italic
1019fixed-width output.
1020
1021Besides the obvious pod conversions, Pod::Man also takes care of formatting
1022func(), func(n), and simple variable references like $foo or @bar so you
1023don't have to use code escapes for them; complex expressions like
1024C<$fred{'stuff'}> will still need to be escaped, though. It also translates
1025dashes that aren't used as hyphens into en dashes, makes long dashes--like
1026this--into proper em dashes, fixes "paired quotes," makes C++ and PI look
1027right, puts a little space between double underbars, makes ALLCAPS a teeny
1028bit smaller in troff(1), and escapes stuff that *roff treats as special so
1029that you don't have to.
1030
1031The recognized options to new() are as follows. All options take a single
1032argument.
1033
1034=over 4
1035
1036=item center
1037
1038Sets the centered page header to use instead of "User Contributed Perl
1039Documentation".
1040
1041=item date
1042
1043Sets the left-hand footer. By default, the modification date of the input
1044file will be used, or the current date if stat() can't find that file (the
1045case if the input is from STDIN), and the date will be formatted as
1046YYYY-MM-DD.
1047
1048=item fixed
1049
1050The fixed-width font to use for vertabim text and code. Defaults to CW.
1051Some systems may want CR instead. Only matters for troff(1) output.
1052
1053=item fixedbold
1054
1055Bold version of the fixed-width font. Defaults to CB. Only matters for
1056troff(1) output.
1057
1058=item fixeditalic
1059
1060Italic version of the fixed-width font (actually, something of a misnomer,
1061since most fixed-width fonts only have an oblique version, not an italic
1062version). Defaults to CI. Only matters for troff(1) output.
1063
1064=item fixedbolditalic
1065
1066Bold italic (probably actually oblique) version of the fixed-width font.
1067Pod::Man doesn't assume you have this, and defaults to CB. Some systems
1068(such as Solaris) have this font available as CX. Only matters for troff(1)
1069output.
1070
1071=item release
1072
1073Set the centered footer. By default, this is the version of Perl you run
1074Pod::Man under. Note that some system an macro sets assume that the
1075centered footer will be a modification date and will prepend something like
1076"Last modified: "; if this is the case, you may want to set C<release> to
1077the last modified date and C<date> to the version number.
1078
1079=item section
1080
1081Set the section for the C<.TH> macro. The standard section numbering
1082convention is to use 1 for user commands, 2 for system calls, 3 for
1083functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1084miscellaneous information, and 8 for administrator commands. There is a lot
1085of variation here, however; some systems (like Solaris) use 4 for file
1086formats, 5 for miscellaneous information, and 7 for devices. Still others
1087use 1m instead of 8, or some mix of both. About the only section numbers
1088that are reliably consistent are 1, 2, and 3.
1089
1090By default, section 1 will be used unless the file ends in .pm in which case
1091section 3 will be selected.
1092
1093=back
1094
1095The standard Pod::Parser method parse_from_filehandle() takes up to two
1096arguments, the first being the file handle to read POD from and the second
1097being the file handle to write the formatted output to. The first defaults
1098to STDIN if not given, and the second defaults to STDOUT. The method
1099parse_from_file() is almost identical, except that its two arguments are the
1100input and output disk files instead. See L<Pod::Parser> for the specific
1101details.
1102
1103=head1 DIAGNOSTICS
1104
1105=over 4
1106
1107=item roff font should be 1 or 2 chars, not `%s'
1108
1109(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1110wasn't either one or two characters. Pod::Man doesn't support *roff fonts
1111longer than two characters, although some *roff extensions do (the canonical
1112versions of nroff(1) and troff(1) don't either).
1113
1114=item Invalid link %s
1115
1116(W) The POD source contained a C<LE<lt>E<gt>> sequence that Pod::Man was
1117unable to parse. You should never see this error message; it probably
1118indicates a bug in Pod::Man.
1119
1120=item Unknown escape EE<lt>%sE<gt>
1121
1122(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1123know about. C<EE<lt>%sE<gt>> was printed verbatim in the output.
1124
1125=item Unknown sequence %s
1126
1127(W) The POD source contained a non-standard interior sequence (something of
1128the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored.
1129
1130=item Unmatched =back
1131
1132(W) Pod::Man encountered a C<=back> command that didn't correspond to an
1133C<=over> command.
1134
1135=back
1136
1137=head1 BUGS
1138
1139The lint-like features and strict POD format checking done by B<pod2man> are
1140not yet implemented and should be, along with the corresponding C<lax>
1141option.
1142
1143The NAME section should be recognized specially and index entries emitted
1144for everything in that section. This would have to be deferred until the
1145next section, since extraneous things in NAME tends to confuse various man
1146page processors.
1147
1148The handling of hyphens, en dashes, and em dashes is somewhat fragile, and
1149one may get the wrong one under some circumstances. This should only matter
1150for troff(1) output.
1151
1152When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1153necessarily get it right.
1154
1155Pod::Man doesn't handle font names longer than two characters. Neither do
1156most troff(1) implementations, but GNU troff does as an extension. It would
1157be nice to support as an option for those who want to use it.
1158
1159The preamble added to each output file is rather verbose, and most of it is
1160only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1161characters. It would ideally be nice if all of those definitions were only
1162output if needed, perhaps on the fly as the characters are used.
1163
1164Some of the automagic applied to file names assumes Unix directory
1165separators.
1166
1167Pod::Man is excessively slow.
1168
1169=head1 NOTES
1170
1171The intention is for this module and its driver script to eventually replace
1172B<pod2man> in Perl core.
1173
1174=head1 SEE ALSO
1175
9e107c59 1176L<Pod::Parser|Pod::Parser>, perlpod(1), pod2man(1), nroff(1), troff(1),
9741dab0
GS
1177man(1), man(7)
1178
1179Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
1180Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
1181the best documentation of standard nroff(1) and troff(1). At the time of
1182this writing, it's available at http://www.cs.bell-labs.com/cm/cs/cstr.html.
1183
1184The man page documenting the man macro set may be man(5) instead of man(7)
9e107c59 1185on your system. Also, please see pod2man(1) for extensive documentation on
9741dab0
GS
1186writing manual pages if you've not done it before and aren't familiar with
1187the conventions.
1188
1189=head1 AUTHOR
1190
1191Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
1192original B<pod2man> by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>.
1193
1194=cut