This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to podlators-1.19.
[perl5.git] / lib / Pod / Man.pm
CommitLineData
9741dab0 1# Pod::Man -- Convert POD data to formatted *roff input.
2da3dd12 2# $Id: Man.pm,v 1.32 2002/01/02 09:02:24 eagle Exp $
9741dab0 3#
2da3dd12 4# Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>
9741dab0 5#
3c014959 6# This program is free software; you may redistribute it and/or modify it
9741dab0
GS
7# under the same terms as Perl itself.
8#
b84d8b9e
JH
9# This module translates POD documentation into *roff markup using the man
10# macro set, and is intended for converting POD documents written as Unix
11# manual pages to manual pages that can be read by the man(1) command. It is
12# a replacement for the pod2man command distributed with versions of Perl
13# prior to 5.6.
c9abbd5d
GS
14#
15# Perl core hackers, please note that this module is also separately
16# maintained outside of the Perl core as part of the podlators. Please send
17# me any patches at the address above in addition to sending them to the
18# standard Perl mailing lists.
9741dab0 19
3c014959 20##############################################################################
9741dab0 21# Modules and declarations
3c014959 22##############################################################################
9741dab0
GS
23
24package Pod::Man;
25
b84d8b9e 26require 5.005;
9741dab0
GS
27
28use Carp qw(carp croak);
bf202ccd 29use Pod::ParseLink qw(parselink);
9741dab0
GS
30use Pod::Parser ();
31
32use strict;
33use subs qw(makespace);
34use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
35
36@ISA = qw(Pod::Parser);
37
3c014959
JH
38# Don't use the CVS revision as the version, since this module is also in Perl
39# core and too many things could munge CVS magic revision strings. This
40# number should ideally be the same as the CVS revision in podlators, however.
2da3dd12 41$VERSION = 1.32;
9741dab0
GS
42
43
3c014959 44##############################################################################
9741dab0 45# Preamble and *roff output tables
3c014959 46##############################################################################
9741dab0
GS
47
48# The following is the static preamble which starts all *roff output we
49# generate. It's completely static except for the font to use as a
ab1f1d91 50# fixed-width font, which is designed by @CFONT@, and the left and right
3c014959
JH
51# quotes to use for C<> text, designated by @LQOUTE@ and @RQUOTE@. $PREAMBLE
52# should therefore be run through s/\@CFONT\@/<font>/g before output.
9741dab0
GS
53$PREAMBLE = <<'----END OF PREAMBLE----';
54.de Sh \" Subsection heading
55.br
56.if t .Sp
57.ne 5
58.PP
59\fB\\$1\fR
60.PP
61..
62.de Sp \" Vertical space (when we can't use .PP)
63.if t .sp .5v
64.if n .sp
65..
9741dab0
GS
66.de Vb \" Begin verbatim text
67.ft @CFONT@
68.nf
69.ne \\$1
70..
71.de Ve \" End verbatim text
72.ft R
9741dab0
GS
73.fi
74..
75.\" Set up some character translations and predefined strings. \*(-- will
76.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
77.\" double quote, and \*(R" will give a right double quote. | will give a
3c014959
JH
78.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
79.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
80.\" expand to `' in nroff, nothing in troff, for use with C<>.
9741dab0
GS
81.tr \(*W-|\(bv\*(Tr
82.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
83.ie n \{\
84. ds -- \(*W-
85. ds PI pi
86. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
87. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
88. ds L" ""
89. ds R" ""
ab1f1d91
JH
90. ds C` @LQUOTE@
91. ds C' @RQUOTE@
9741dab0
GS
92'br\}
93.el\{\
94. ds -- \|\(em\|
95. ds PI \(*p
96. ds L" ``
97. ds R" ''
98'br\}
99.\"
3c014959
JH
100.\" If the F register is turned on, we'll generate index entries on stderr for
101.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
102.\" entries marked with X<> in POD. Of course, you'll have to process the
103.\" output yourself in some meaningful fashion.
9741dab0
GS
104.if \nF \{\
105. de IX
106. tm Index:\\$1\t\\n%\t"\\$2"
f3248e50 107..
9741dab0
GS
108. nr % 0
109. rr F
110.\}
111.\"
3c014959
JH
112.\" For nroff, turn off justification. Always turn off hyphenation; it makes
113.\" way too many mistakes in technical documents.
9741dab0
GS
114.hy 0
115.if n .na
116.\"
117.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
118.\" Fear. Run. Save yourself. No user-serviceable parts.
9741dab0
GS
119. \" fudge factors for nroff and troff
120.if n \{\
121. ds #H 0
122. ds #V .8m
123. ds #F .3m
124. ds #[ \f1
125. ds #] \fP
126.\}
127.if t \{\
128. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
129. ds #V .6m
130. ds #F 0
131. ds #[ \&
132. ds #] \&
133.\}
134. \" simple accents for nroff and troff
135.if n \{\
136. ds ' \&
137. ds ` \&
138. ds ^ \&
139. ds , \&
140. ds ~ ~
141. ds /
142.\}
143.if t \{\
144. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
145. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
146. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
147. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
148. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
149. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
150.\}
151. \" troff and (daisy-wheel) nroff accents
152.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
153.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
154.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
155.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
156.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
157.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
158.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
159.ds ae a\h'-(\w'a'u*4/10)'e
160.ds Ae A\h'-(\w'A'u*4/10)'E
161. \" corrections for vroff
162.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
163.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
164. \" for low resolution devices (crt and lpr)
165.if \n(.H>23 .if \n(.V>19 \
166\{\
167. ds : e
168. ds 8 ss
169. ds o a
170. ds d- d\h'-1'\(ga
171. ds D- D\h'-1'\(hy
172. ds th \o'bp'
173. ds Th \o'LP'
174. ds ae ae
175. ds Ae AE
176.\}
177.rm #[ #] #H #V #F C
178----END OF PREAMBLE----
5cdeb5a2
JH
179#`# for cperl-mode
180
9741dab0
GS
181# This table is taken nearly verbatim from Tom Christiansen's pod2man. It
182# assumes that the standard preamble has already been printed, since that's
183# what defines all of the accent marks. Note that some of these are quoted
184# with double quotes since they contain embedded single quotes, so use \\
185# uniformly for backslash for readability.
186%ESCAPES = (
187 'amp' => '&', # ampersand
bf202ccd 188 'apos' => "'", # apostrophe
9741dab0
GS
189 'lt' => '<', # left chevron, less-than
190 'gt' => '>', # right chevron, greater-than
191 'quot' => '"', # double quote
ab1f1d91 192 'sol' => '/', # solidus (forward slash)
be3174d2 193 'verbar' => '|', # vertical bar
9741dab0
GS
194
195 'Aacute' => "A\\*'", # capital A, acute accent
196 'aacute' => "a\\*'", # small a, acute accent
197 'Acirc' => 'A\\*^', # capital A, circumflex accent
198 'acirc' => 'a\\*^', # small a, circumflex accent
199 'AElig' => '\*(AE', # capital AE diphthong (ligature)
200 'aelig' => '\*(ae', # small ae diphthong (ligature)
201 'Agrave' => "A\\*`", # capital A, grave accent
202 'agrave' => "A\\*`", # small a, grave accent
203 'Aring' => 'A\\*o', # capital A, ring
204 'aring' => 'a\\*o', # small a, ring
205 'Atilde' => 'A\\*~', # capital A, tilde
206 'atilde' => 'a\\*~', # small a, tilde
207 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark
208 'auml' => 'a\\*:', # small a, dieresis or umlaut mark
209 'Ccedil' => 'C\\*,', # capital C, cedilla
210 'ccedil' => 'c\\*,', # small c, cedilla
211 'Eacute' => "E\\*'", # capital E, acute accent
212 'eacute' => "e\\*'", # small e, acute accent
213 'Ecirc' => 'E\\*^', # capital E, circumflex accent
214 'ecirc' => 'e\\*^', # small e, circumflex accent
215 'Egrave' => 'E\\*`', # capital E, grave accent
216 'egrave' => 'e\\*`', # small e, grave accent
217 'ETH' => '\\*(D-', # capital Eth, Icelandic
218 'eth' => '\\*(d-', # small eth, Icelandic
219 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark
220 'euml' => 'e\\*:', # small e, dieresis or umlaut mark
221 'Iacute' => "I\\*'", # capital I, acute accent
222 'iacute' => "i\\*'", # small i, acute accent
223 'Icirc' => 'I\\*^', # capital I, circumflex accent
224 'icirc' => 'i\\*^', # small i, circumflex accent
225 'Igrave' => 'I\\*`', # capital I, grave accent
226 'igrave' => 'i\\*`', # small i, grave accent
227 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark
228 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark
229 'Ntilde' => 'N\*~', # capital N, tilde
230 'ntilde' => 'n\*~', # small n, tilde
231 'Oacute' => "O\\*'", # capital O, acute accent
232 'oacute' => "o\\*'", # small o, acute accent
233 'Ocirc' => 'O\\*^', # capital O, circumflex accent
234 'ocirc' => 'o\\*^', # small o, circumflex accent
235 'Ograve' => 'O\\*`', # capital O, grave accent
236 'ograve' => 'o\\*`', # small o, grave accent
237 'Oslash' => 'O\\*/', # capital O, slash
238 'oslash' => 'o\\*/', # small o, slash
239 'Otilde' => 'O\\*~', # capital O, tilde
240 'otilde' => 'o\\*~', # small o, tilde
241 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark
242 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark
243 'szlig' => '\*8', # small sharp s, German (sz ligature)
244 'THORN' => '\\*(Th', # capital THORN, Icelandic
245 'thorn' => '\\*(th', # small thorn, Icelandic
246 'Uacute' => "U\\*'", # capital U, acute accent
247 'uacute' => "u\\*'", # small u, acute accent
248 'Ucirc' => 'U\\*^', # capital U, circumflex accent
249 'ucirc' => 'u\\*^', # small u, circumflex accent
250 'Ugrave' => 'U\\*`', # capital U, grave accent
251 'ugrave' => 'u\\*`', # small u, grave accent
252 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark
253 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark
254 'Yacute' => "Y\\*'", # capital Y, acute accent
255 'yacute' => "y\\*'", # small y, acute accent
256 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark
bf202ccd
JH
257
258 'nbsp' => '\\ ', # non-breaking space
259 'shy' => '', # soft (discretionary) hyphen
9741dab0
GS
260);
261
262
3c014959 263##############################################################################
9741dab0 264# Static helper functions
3c014959 265##############################################################################
9741dab0 266
3c014959
JH
267# Protect leading quotes and periods against interpretation as commands. Also
268# protect anything starting with a backslash, since it could expand or hide
269# something that *roff would interpret as a command. This is overkill, but
270# it's much simpler than trying to parse *roff here.
c9abbd5d
GS
271sub protect {
272 local $_ = shift;
44464a02 273 s/^([.\'\\])/\\&$1/mg;
c9abbd5d
GS
274 $_;
275}
5cdeb5a2 276
9741dab0
GS
277# Translate a font string into an escape.
278sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
279
5cdeb5a2 280
3c014959 281##############################################################################
9741dab0 282# Initialization
3c014959
JH
283##############################################################################
284
285# Initialize the object. Here, we also process any additional options passed
286# to the constructor or set up defaults if none were given. center is the
287# centered title, release is the version number, and date is the date for the
288# documentation. Note that we can't know what file name we're processing due
289# to the architecture of Pod::Parser, so that *has* to either be passed to the
290# constructor or set separately with Pod::Man::name().
9741dab0
GS
291sub initialize {
292 my $self = shift;
293
3c014959
JH
294 # Figure out the fixed-width font. If user-supplied, make sure that they
295 # are the right length.
9741dab0
GS
296 for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
297 if (defined $$self{$_}) {
298 if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
ab1f1d91
JH
299 croak qq(roff font should be 1 or 2 chars,)
300 . qq( not "$$self{$_}");
9741dab0
GS
301 }
302 } else {
303 $$self{$_} = '';
304 }
305 }
306
3c014959
JH
307 # Set the default fonts. We can't be sure what fixed bold-italic is going
308 # to be called, so default to just bold.
9741dab0
GS
309 $$self{fixed} ||= 'CW';
310 $$self{fixedbold} ||= 'CB';
311 $$self{fixeditalic} ||= 'CI';
312 $$self{fixedbolditalic} ||= 'CB';
313
3c014959
JH
314 # Set up a table of font escapes. First number is fixed-width, second is
315 # bold, third is italic.
9741dab0
GS
316 $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
317 '010' => '\fB', '011' => '\f(BI',
318 '100' => toescape ($$self{fixed}),
319 '101' => toescape ($$self{fixeditalic}),
320 '110' => toescape ($$self{fixedbold}),
321 '111' => toescape ($$self{fixedbolditalic})};
322
323 # Extra stuff for page titles.
324 $$self{center} = 'User Contributed Perl Documentation'
325 unless defined $$self{center};
b84d8b9e 326 $$self{indent} = 4 unless defined $$self{indent};
9741dab0 327
3c014959
JH
328 # We used to try first to get the version number from a local binary, but
329 # we shouldn't need that any more. Get the version from the running Perl.
330 # Work a little magic to handle subversions correctly under both the
331 # pre-5.6 and the post-5.6 version numbering schemes.
9741dab0 332 if (!defined $$self{release}) {
c9abbd5d
GS
333 my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
334 $version[2] ||= 0;
335 $version[2] *= 10 ** (3 - length $version[2]);
336 for (@version) { $_ += 0 }
337 $$self{release} = 'perl v' . join ('.', @version);
9741dab0
GS
338 }
339
340 # Double quotes in things that will be quoted.
c9abbd5d
GS
341 for (qw/center date release/) {
342 $$self{$_} =~ s/\"/\"\"/g if $$self{$_};
343 }
9741dab0 344
ab1f1d91 345 # Figure out what quotes we'll be using for C<> text.
5cdeb5a2 346 $$self{quotes} ||= '"';
ab1f1d91
JH
347 if ($$self{quotes} eq 'none') {
348 $$self{LQUOTE} = $$self{RQUOTE} = '';
349 } elsif (length ($$self{quotes}) == 1) {
350 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
351 } elsif ($$self{quotes} =~ /^(.)(.)$/
352 || $$self{quotes} =~ /^(..)(..)$/) {
353 $$self{LQUOTE} = $1;
354 $$self{RQUOTE} = $2;
355 } else {
356 croak qq(Invalid quote specification "$$self{quotes}");
357 }
358
3c014959
JH
359 # Double the first quote; note that this should not be s///g as two double
360 # quotes is represented in *roff as three double quotes, not four. Weird,
361 # I know.
ab1f1d91
JH
362 $$self{LQUOTE} =~ s/\"/\"\"/;
363 $$self{RQUOTE} =~ s/\"/\"\"/;
364
9741dab0
GS
365 $self->SUPER::initialize;
366}
367
ab1f1d91 368# For each document we process, output the preamble first.
9741dab0
GS
369sub begin_pod {
370 my $self = shift;
371
372 # Try to figure out the name and section from the file name.
373 my $section = $$self{section} || 1;
374 my $name = $$self{name};
375 if (!defined $name) {
376 $name = $self->input_file;
fe6f1558
GS
377 $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
378 $name =~ s/\.p(od|[lm])\z//i;
bf202ccd 379 if ($section !~ /^3/) {
9741dab0
GS
380 require File::Basename;
381 $name = uc File::Basename::basename ($name);
382 } else {
bf202ccd
JH
383 # Assume that we're dealing with a module. We want to figure out
384 # the full module name from the path to the file, but we don't
385 # want to include too much of the path into the module name. Lose
386 # everything up to the first of:
387 #
388 # */lib/*perl*/ standard or site_perl module
389 # */*perl*/lib/ from -Dprefix=/opt/perl
390 # */*perl*/ random module hierarchy
391 #
392 # which works. Also strip off a leading site or site_perl
393 # component, any OS-specific component, and any version number
394 # component, and strip off an initial component of "lib" or
395 # "blib/lib" since that's what ExtUtils::MakeMaker creates.
396 # splitdir requires at least File::Spec 0.8.
397 require File::Spec;
398 my ($volume, $dirs, $file) = File::Spec->splitpath ($name);
399 my @dirs = File::Spec->splitdir ($dirs);
400 my $cut = 0;
401 my $i;
402 for ($i = 0; $i < scalar @dirs; $i++) {
403 if ($dirs[$i] eq 'lib' && $dirs[$i + 1] =~ /perl/) {
404 $cut = $i + 2;
405 last;
406 } elsif ($dirs[$i] =~ /perl/) {
407 $cut = $i + 1;
408 $cut++ if $dirs[$i + 1] eq 'lib';
409 last;
9741dab0 410 }
9741dab0 411 }
bf202ccd
JH
412 if ($cut > 0) {
413 splice (@dirs, 0, $cut);
414 shift @dirs if ($dirs[0] =~ /^site(_perl)?$/);
415 shift @dirs if ($dirs[0] =~ /^[\d.]+$/);
b616daaf 416 shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/);
bf202ccd
JH
417 }
418 shift @dirs if $dirs[0] eq 'lib';
419 splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib');
420
421 # Remove empty directories when building the module name; they
422 # occur too easily on Unix by doubling slashes.
423 $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file);
9741dab0
GS
424 }
425 }
426
3c014959
JH
427 # If $name contains spaces, quote it; this mostly comes up in the case of
428 # input from stdin.
77003bb1
JH
429 $name = '"' . $name . '"' if ($name =~ /\s/);
430
9741dab0
GS
431 # Modification date header. Try to use the modification time of our
432 # input.
433 if (!defined $$self{date}) {
434 my $time = (stat $self->input_file)[9] || time;
435 my ($day, $month, $year) = (localtime $time)[3,4,5];
436 $month++;
437 $year += 1900;
c9abbd5d 438 $$self{date} = sprintf ('%4d-%02d-%02d', $year, $month, $day);
9741dab0
GS
439 }
440
bf202ccd
JH
441 # Now, print out the preamble and the title. The meaning of the arguments
442 # to .TH unfortunately vary by system; some systems consider the fourth
443 # argument to be a "source" and others use it as a version number.
444 # Generally it's just presented as the left-side footer, though, so it
445 # doesn't matter too much if a particular system gives it another
446 # interpretation.
447 #
448 # The order of date and release used to be reversed in older versions of
449 # this module, but this order is correct for both Solaris and Linux.
ab1f1d91
JH
450 local $_ = $PREAMBLE;
451 s/\@CFONT\@/$$self{fixed}/;
452 s/\@LQUOTE\@/$$self{LQUOTE}/;
453 s/\@RQUOTE\@/$$self{RQUOTE}/;
454 chomp $_;
bf202ccd 455 my $pversion = $Pod::Parser::VERSION;
9741dab0 456 print { $self->output_handle } <<"----END OF HEADER----";
bf202ccd 457.\\" Automatically generated by Pod::Man v$VERSION, Pod::Parser v$pversion
9741dab0
GS
458.\\"
459.\\" Standard preamble:
3c014959 460.\\" ========================================================================
ab1f1d91 461$_
3c014959 462.\\" ========================================================================
9741dab0
GS
463.\\"
464.IX Title "$name $section"
bf202ccd 465.TH $name $section "$$self{date}" "$$self{release}" "$$self{center}"
9741dab0 466----END OF HEADER----
9741dab0
GS
467
468 # Initialize a few per-file variables.
b616daaf
JH
469 $$self{INDENT} = 0; # Current indentation level.
470 $$self{INDENTS} = []; # Stack of indentations.
471 $$self{INDEX} = []; # Index keys waiting to be printed.
2da3dd12 472 $$self{IN_NAME} = 0; # Whether processing the NAME section.
b616daaf
JH
473 $$self{ITEMS} = 0; # The number of consecutive =items.
474 $$self{SHIFTWAIT} = 0; # Whether there is a shift waiting.
475 $$self{SHIFTS} = []; # Stack of .RS shifts.
9741dab0
GS
476}
477
478
3c014959 479##############################################################################
9741dab0 480# Core overrides
3c014959 481##############################################################################
9741dab0
GS
482
483# Called for each command paragraph. Gets the command, the associated
484# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
485# the command to a method named the same as the command. =cut is handled
486# internally by Pod::Parser.
487sub command {
488 my $self = shift;
489 my $command = shift;
490 return if $command eq 'pod';
3c014959 491 return if ($$self{EXCLUDE} && $command ne 'end');
ab1f1d91
JH
492 if ($self->can ('cmd_' . $command)) {
493 $command = 'cmd_' . $command;
844b31e3 494 $self->$command (@_);
3c014959 495 } else {
ab1f1d91 496 my ($text, $line, $paragraph) = @_;
5cdeb5a2
JH
497 my $file;
498 ($file, $line) = $paragraph->file_line;
ab1f1d91
JH
499 $text =~ s/\n+\z//;
500 $text = " $text" if ($text =~ /^\S/);
501 warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
502 return;
844b31e3 503 }
9741dab0
GS
504}
505
3c014959
JH
506# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
507# Pod::Paragraph object. Rofficate backslashes, untabify, put a zero-width
508# character at the beginning of each line to protect against commands, and
509# wrap in .Vb/.Ve.
9741dab0
GS
510sub verbatim {
511 my $self = shift;
512 return if $$self{EXCLUDE};
513 local $_ = shift;
514 return if /^\s+$/;
515 s/\s+$/\n/;
516 my $lines = tr/\n/\n/;
517 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
518 s/\\/\\e/g;
519 s/^(\s*\S)/'\&' . $1/gme;
5cdeb5a2 520 $self->makespace;
9741dab0 521 $self->output (".Vb $lines\n$_.Ve\n");
2da3dd12 522 $$self{NEEDSPACE} = 1;
9741dab0
GS
523}
524
3c014959
JH
525# Called for a regular text block. Gets the paragraph, the line number, and a
526# Pod::Paragraph object. Perform interpolation and output the results.
9741dab0
GS
527sub textblock {
528 my $self = shift;
529 return if $$self{EXCLUDE};
530 $self->output ($_[0]), return if $$self{VERBATIM};
531
bf202ccd
JH
532 # Parse the tree. collapse knows about references to scalars as well as
533 # scalars and does the right thing with them. Tidy up any trailing
534 # whitespace.
c9abbd5d 535 my $text = shift;
c9abbd5d
GS
536 $text = $self->parse ($text, @_);
537 $text =~ s/\n\s*$/\n/;
bf202ccd
JH
538
539 # Output the paragraph. We also have to handle =over without =item. If
540 # there's an =over without =item, NEWINDENT will be set, and we need to
541 # handle creation of the indent here. Set WEIRDINDENT so that it will be
542 # cleaned up on =back.
5cdeb5a2 543 $self->makespace;
b616daaf 544 if ($$self{SHIFTWAIT}) {
bf202ccd 545 $self->output (".RS $$self{INDENT}\n");
b616daaf
JH
546 push (@{ $$self{SHIFTS} }, $$self{INDENT});
547 $$self{SHIFTWAIT} = 0;
bf202ccd 548 }
50a3fd2a 549 $self->output (protect $self->textmapfonts ($text));
9741dab0
GS
550 $self->outindex;
551 $$self{NEEDSPACE} = 1;
552}
553
b84d8b9e 554# Called for a formatting code. Takes a Pod::InteriorSequence object and
9741dab0 555# returns a reference to a scalar. This scalar is the final formatted text.
b84d8b9e
JH
556# It's returned as a reference to an array so that other formatting codes
557# above us know that the text has already been processed.
9741dab0
GS
558sub sequence {
559 my ($self, $seq) = @_;
560 my $command = $seq->cmd_name;
561
bf202ccd 562 # We have to defer processing of the inside of an L<> formatting code. If
b84d8b9e
JH
563 # this code is nested inside an L<> code, return the literal raw text of
564 # it.
bf202ccd
JH
565 my $parent = $seq->nested;
566 while (defined $parent) {
567 return $seq->raw_text if ($parent->cmd_name eq 'L');
568 $parent = $parent->nested;
569 }
570
9741dab0 571 # Zero-width characters.
b84d8b9e 572 return [ '\&' ] if ($command eq 'Z');
9741dab0 573
a3e04946
GS
574 # C<>, L<>, X<>, and E<> don't apply guesswork to their contents. C<>
575 # needs some additional special handling.
576 my $literal = ($command =~ /^[CELX]$/);
b84d8b9e 577 local $_ = $self->collapse ($seq->parse_tree, $literal, $command eq 'C');
9741dab0 578
bf202ccd
JH
579 # Handle E<> escapes. Numeric escapes that match one of the supported ISO
580 # 8859-1 characters don't work at present.
9741dab0 581 if ($command eq 'E') {
2e20e14f 582 if (/^\d+$/) {
b84d8b9e 583 return [ chr ($_) ];
2e20e14f 584 } elsif (exists $ESCAPES{$_}) {
b84d8b9e 585 return [ $ESCAPES{$_} ];
9741dab0 586 } else {
aa212ad6
RA
587 my ($file, $line) = $seq->file_line;
588 warn "$file:$line: Unknown escape E<$_>\n";
b84d8b9e 589 return [ "E<$_>" ];
9741dab0
GS
590 }
591 }
592
b84d8b9e 593 # For all the other codes, empty content produces no output.
9741dab0
GS
594 return '' if $_ eq '';
595
b84d8b9e 596 # Handle simple formatting codes.
9741dab0 597 if ($command eq 'B') {
b84d8b9e
JH
598 return [ '\f(BS' . $_ . '\f(BE' ];
599 } elsif ($command eq 'F' || $command eq 'I') {
600 return [ '\f(IS' . $_ . '\f(IE' ];
9741dab0 601 } elsif ($command eq 'C') {
b84d8b9e 602 return [ $self->quote_literal ($_) ];
9741dab0
GS
603 }
604
605 # Handle links.
606 if ($command eq 'L') {
bf202ccd
JH
607 my ($text, $type) = (parselink ($_))[1,4];
608 return '' unless $text;
609 my ($file, $line) = $seq->file_line;
610 $text = $self->parse ($text, $line);
611 $text = '<' . $text . '>' if $type eq 'url';
b84d8b9e 612 return [ $text ];
9741dab0 613 }
5cdeb5a2 614
9741dab0
GS
615 # Whitespace protection replaces whitespace with "\ ".
616 if ($command eq 'S') {
617 s/\s+/\\ /g;
b84d8b9e 618 return [ $_ ];
9741dab0
GS
619 }
620
621 # Add an index entry to the list of ones waiting to be output.
b84d8b9e
JH
622 if ($command eq 'X') {
623 push (@{ $$self{INDEX} }, $_);
624 return '';
625 }
9741dab0
GS
626
627 # Anything else is unknown.
aa212ad6 628 my ($file, $line) = $seq->file_line;
b84d8b9e 629 warn "$file:$line: Unknown formatting code $command<$_>\n";
9741dab0
GS
630}
631
632
3c014959 633##############################################################################
9741dab0 634# Command paragraphs
3c014959 635##############################################################################
9741dab0
GS
636
637# All command paragraphs take the paragraph and the line number.
638
639# First level heading. We can't output .IX in the NAME section due to a bug
640# in some versions of catman, so don't output a .IX for that section. .SH
2da3dd12
AMS
641# already uses small caps, so remove \s1 and \s-1. Maintain IN_NAME as
642# appropriate, but don't leave it set while calling parse() so as to not
643# override guesswork on section headings after NAME.
9741dab0
GS
644sub cmd_head1 {
645 my $self = shift;
2da3dd12 646 $$self{IN_NAME} = 0;
9741dab0
GS
647 local $_ = $self->parse (@_);
648 s/\s+$//;
649 s/\\s-?\d//g;
77003bb1 650 s/\s*\n\s*/ /g;
5cdeb5a2
JH
651 if ($$self{ITEMS} > 1) {
652 $$self{ITEMS} = 0;
653 $self->output (".PD\n");
654 }
50a3fd2a 655 $self->output ($self->switchquotes ('.SH', $self->mapfonts ($_)));
9741dab0
GS
656 $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
657 $$self{NEEDSPACE} = 0;
2da3dd12 658 $$self{IN_NAME} = ($_ eq 'NAME');
9741dab0
GS
659}
660
661# Second level heading.
662sub cmd_head2 {
663 my $self = shift;
664 local $_ = $self->parse (@_);
665 s/\s+$//;
77003bb1 666 s/\s*\n\s*/ /g;
5cdeb5a2
JH
667 if ($$self{ITEMS} > 1) {
668 $$self{ITEMS} = 0;
669 $self->output (".PD\n");
670 }
50a3fd2a 671 $self->output ($self->switchquotes ('.Sh', $self->mapfonts ($_)));
9741dab0
GS
672 $self->outindex ('Subsection', $_);
673 $$self{NEEDSPACE} = 0;
674}
675
50a3fd2a
RA
676# Third level heading.
677sub cmd_head3 {
678 my $self = shift;
679 local $_ = $self->parse (@_);
680 s/\s+$//;
77003bb1 681 s/\s*\n\s*/ /g;
50a3fd2a
RA
682 if ($$self{ITEMS} > 1) {
683 $$self{ITEMS} = 0;
684 $self->output (".PD\n");
685 }
686 $self->makespace;
b616daaf 687 $self->output ($self->textmapfonts ('\f(IS' . $_ . '\f(IE') . "\n");
50a3fd2a
RA
688 $self->outindex ('Subsection', $_);
689 $$self{NEEDSPACE} = 1;
690}
691
692# Fourth level heading.
693sub cmd_head4 {
694 my $self = shift;
695 local $_ = $self->parse (@_);
696 s/\s+$//;
77003bb1 697 s/\s*\n\s*/ /g;
50a3fd2a
RA
698 if ($$self{ITEMS} > 1) {
699 $$self{ITEMS} = 0;
700 $self->output (".PD\n");
701 }
702 $self->makespace;
703 $self->output ($self->textmapfonts ($_) . "\n");
704 $self->outindex ('Subsection', $_);
705 $$self{NEEDSPACE} = 1;
706}
707
9741dab0
GS
708# Start a list. For indents after the first, wrap the outside indent in .RS
709# so that hanging paragraph tags will be correct.
710sub cmd_over {
711 my $self = shift;
712 local $_ = shift;
713 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
b616daaf 714 if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) {
9741dab0 715 $self->output (".RS $$self{INDENT}\n");
b616daaf 716 push (@{ $$self{SHIFTS} }, $$self{INDENT});
9741dab0
GS
717 }
718 push (@{ $$self{INDENTS} }, $$self{INDENT});
719 $$self{INDENT} = ($_ + 0);
b616daaf 720 $$self{SHIFTWAIT} = 1;
9741dab0
GS
721}
722
723# End a list. If we've closed an embedded indent, we've mangled the hanging
724# paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
725# We'll close that .RS at the next =back or =item.
726sub cmd_back {
727 my $self = shift;
728 $$self{INDENT} = pop @{ $$self{INDENTS} };
729 unless (defined $$self{INDENT}) {
aa212ad6
RA
730 my ($file, $line, $paragraph) = @_;
731 ($file, $line) = $paragraph->file_line;
732 warn "$file:$line: Unmatched =back\n";
9741dab0
GS
733 $$self{INDENT} = 0;
734 }
b616daaf 735 if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) {
9741dab0 736 $self->output (".RE\n");
b616daaf 737 pop @{ $$self{SHIFTS} };
9741dab0
GS
738 }
739 if (@{ $$self{INDENTS} } > 0) {
740 $self->output (".RE\n");
741 $self->output (".RS $$self{INDENT}\n");
9741dab0
GS
742 }
743 $$self{NEEDSPACE} = 1;
b616daaf 744 $$self{SHIFTWAIT} = 0;
9741dab0
GS
745}
746
747# An individual list item. Emit an index entry for anything that's
748# interesting, but don't emit index entries for things like bullets and
3c014959
JH
749# numbers. rofficate bullets too while we're at it (so for nice output, use *
750# for your lists rather than o or . or - or some other thing). Newlines in an
751# item title are turned into spaces since *roff can't handle them embedded.
9741dab0
GS
752sub cmd_item {
753 my $self = shift;
754 local $_ = $self->parse (@_);
755 s/\s+$//;
46bce7d0 756 s/\s*\n\s*/ /g;
9741dab0
GS
757 my $index;
758 if (/\w/ && !/^\w[.\)]\s*$/) {
759 $index = $_;
fe2227f0 760 $index =~ s/^\s*[-*+o.]?(?:\s+|\Z)//;
9741dab0 761 }
bf202ccd 762 $_ = '*' unless $_;
9741dab0 763 s/^\*(\s|\Z)/\\\(bu$1/;
b616daaf 764 if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) {
9741dab0 765 $self->output (".RE\n");
b616daaf 766 pop @{ $$self{SHIFTS} };
9741dab0 767 }
50a3fd2a 768 $_ = $self->textmapfonts ($_);
5cdeb5a2 769 $self->output (".PD 0\n") if ($$self{ITEMS} == 1);
3c014959 770 $self->output ($self->switchquotes ('.IP', $_, $$self{INDENT}));
9741dab0
GS
771 $self->outindex ($index ? ('Item', $index) : ());
772 $$self{NEEDSPACE} = 0;
5cdeb5a2 773 $$self{ITEMS}++;
b616daaf 774 $$self{SHIFTWAIT} = 0;
9741dab0
GS
775}
776
777# Begin a block for a particular translator. Setting VERBATIM triggers
778# special handling in textblock().
779sub cmd_begin {
780 my $self = shift;
781 local $_ = shift;
782 my ($kind) = /^(\S+)/ or return;
783 if ($kind eq 'man' || $kind eq 'roff') {
784 $$self{VERBATIM} = 1;
785 } else {
786 $$self{EXCLUDE} = 1;
787 }
788}
789
790# End a block for a particular translator. We assume that all =begin/=end
791# pairs are properly closed.
792sub cmd_end {
793 my $self = shift;
794 $$self{EXCLUDE} = 0;
795 $$self{VERBATIM} = 0;
796}
797
798# One paragraph for a particular translator. Ignore it unless it's intended
799# for man or roff, in which case we output it verbatim.
800sub cmd_for {
801 my $self = shift;
802 local $_ = shift;
9741dab0
GS
803 return unless s/^(?:man|roff)\b[ \t]*\n?//;
804 $self->output ($_);
805}
806
807
3c014959 808##############################################################################
9741dab0 809# Escaping and fontification
3c014959 810##############################################################################
9741dab0
GS
811
812# At this point, we'll have embedded font codes of the form \f(<font>[SE]
3c014959
JH
813# where <font> is one of B, I, or F. Turn those into the right font start or
814# end codes. The old pod2man didn't get B<someI<thing> else> right; after I<>
815# it switched back to normal text rather than bold. We take care of this by
816# using variables as a combined pointer to our current font sequence, and set
817# each to the number of current nestings of start tags for that font. Use
818# them as a vector to look up what font sequence to use.
50a3fd2a
RA
819#
820# \fP changes to the previous font, but only one previous font is kept. We
821# don't know what the outside level font is; normally it's R, but if we're
3c014959
JH
822# inside a heading it could be something else. So arrange things so that the
823# outside font is always the "previous" font and end with \fP instead of \fR.
824# Idea from Zack Weinberg.
9741dab0
GS
825sub mapfonts {
826 my $self = shift;
827 local $_ = shift;
828
829 my ($fixed, $bold, $italic) = (0, 0, 0);
830 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
50a3fd2a
RA
831 my $last = '\fR';
832 s { \\f\((.)(.) } {
833 my $sequence = '';
834 my $f;
835 if ($last ne '\fR') { $sequence = '\fP' }
836 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
837 $f = $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
838 if ($f eq $last) {
839 '';
840 } else {
841 if ($f ne '\fR') { $sequence .= $f }
842 $last = $f;
843 $sequence;
844 }
845 }gxe;
846 $_;
847}
848
849# Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU
850# groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather
3c014959
JH
851# than R, presumably because \f(CW doesn't actually do a font change. To work
852# around this, use a separate textmapfonts for text blocks where the default
853# font is always R and only use the smart mapfonts for headings.
50a3fd2a
RA
854sub textmapfonts {
855 my $self = shift;
856 local $_ = shift;
857
858 my ($fixed, $bold, $italic) = (0, 0, 0);
859 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
9741dab0
GS
860 s { \\f\((.)(.) } {
861 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
862 $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
863 }gxe;
864 $_;
865}
866
867
3c014959
JH
868##############################################################################
869# *roff-specific parsing and magic
870##############################################################################
9741dab0
GS
871
872# Called instead of parse_text, calls parse_text with the right flags.
873sub parse {
874 my $self = shift;
875 $self->parse_text ({ -expand_seq => 'sequence',
876 -expand_ptree => 'collapse' }, @_);
877}
5cdeb5a2 878
b84d8b9e
JH
879# Takes a parse tree, a flag saying whether or not to treat it as literal text
880# (not call guesswork on it), and a flag saying whether or not to clean some
881# things up for *roff, and returns the concatenation of all of the text
882# strings in that parse tree. If the literal flag isn't true, guesswork()
2da3dd12
AMS
883# will be called on all plain scalars in the parse tree. Otherwise, if
884# collapse is being called on a C<> code, $cleanup should be set to true and
885# some additional cleanup will be done. Assumes that everything in the parse
886# tree is either a scalar or a reference to a scalar.
9741dab0 887sub collapse {
b84d8b9e 888 my ($self, $ptree, $literal, $cleanup) = @_;
2da3dd12
AMS
889
890 # If we're processing the NAME section, don't do normal guesswork. This
891 # is because NAME lines are often extracted by utilities like catman that
892 # require plain text and don't understand *roff markup. We still need to
893 # escape backslashes and hyphens for *roff (and catman expects \- instead
894 # of -).
895 if ($$self{IN_NAME}) {
896 $literal = 1;
897 $cleanup = 1;
898 }
899
900 # Do the collapse of the parse tree as described above.
b84d8b9e
JH
901 return join ('', map {
902 if (ref $_) {
903 join ('', @$_);
904 } elsif ($literal) {
905 if ($cleanup) {
906 s/\\/\\e/g;
907 s/-/\\-/g;
908 s/__/_\\|_/g;
9741dab0 909 }
b84d8b9e
JH
910 $_;
911 } else {
912 $self->guesswork ($_);
913 }
914 } $ptree->children);
9741dab0
GS
915}
916
917# Takes a text block to perform guesswork on; this is guaranteed not to
b84d8b9e 918# contain any formatting codes. Returns the text block with remapping done.
9741dab0
GS
919sub guesswork {
920 my $self = shift;
921 local $_ = shift;
922
923 # rofficate backslashes.
924 s/\\/\\e/g;
925
926 # Ensure double underbars have a tiny space between them.
927 s/__/_\\|_/g;
928
bf202ccd
JH
929 # Leave hyphens only if they're part of regular words and there is only
930 # one dash at a time. Leave a dash after the first character as a regular
931 # non-breaking dash, but don't let it mark the rest of the word invalid
932 # for hyphenation.
933 s/-/\\-/g;
934 s{
935 ( (?:\G|^|\s) [a-zA-Z] ) ( \\- )?
936 ( (?: [a-zA-Z]+ \\-)+ )
937 ( [a-zA-Z]+ ) (?=\s|\Z)
938 \b
939 } {
940 my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4);
941 $hyphen ||= '';
942 $main =~ s/\\-/-/g;
943 $prefix . $hyphen . $main . $suffix;
944 }egx;
945
946 # Translate -- into a real em dash if it's used like one.
947 s{ (\s) \\-\\- (\s) } { $1 . '\*(--' . $2 }egx;
948 s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx;
949
3c014959
JH
950 # Make all caps a little smaller. Be careful here, since we don't want to
951 # make @ARGV into small caps, nor do we want to fix the MIME in
9741dab0
GS
952 # MIME-Version, since it looks weird with the full-height V.
953 s{
954 ( ^ | [\s\(\"\'\`\[\{<>] )
bf202ccd
JH
955 ( [A-Z] [A-Z] (?: [/A-Z+:\d_\$&] | \\- )* )
956 (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | $ )
c9abbd5d 957 } { $1 . '\s-1' . $2 . '\s0' }egx;
9741dab0 958
9741dab0
GS
959 # Italize functions in the form func().
960 s{
ee89c1da 961 ( \b | \\s-1 )
9741dab0 962 (
ee89c1da 963 [A-Za-z_] ([:\w]|\\s-?[01])+ \(\)
9741dab0 964 )
ee89c1da 965 } { $1 . '\f(IS' . $2 . '\f(IE' }egx;
9741dab0
GS
966
967 # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n).
968 s{
ee89c1da 969 ( \b | \\s-1 )
bf202ccd 970 ( [A-Za-z_] (?:[.:\w]|\\-|\\s-?[01])+ )
9741dab0 971 (
ee89c1da 972 \( \d [a-z]* \)
9741dab0 973 )
ee89c1da 974 } { $1 . '\f(IS' . $2 . '\f(IE\|' . $3 }egx;
9741dab0
GS
975
976 # Convert simple Perl variable references to a fixed-width font.
977 s{
978 ( \s+ )
979 ( [\$\@%] [\w:]+ )
980 (?! \( )
981 } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
982
9741dab0
GS
983 # Fix up double quotes.
984 s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
985
986 # Make C++ into \*(C+, which is a squinched version.
987 s{ \b C\+\+ } {\\*\(C+}gx;
988
989 # All done.
990 $_;
991}
992
3c014959
JH
993# Handles C<> text, deciding whether to put \*C` around it or not. This is a
994# whole bunch of messy heuristics to try to avoid overquoting, originally from
995# Barrie Slaymaker. This largely duplicates similar code in Pod::Text.
996sub quote_literal {
997 my $self = shift;
998 local $_ = shift;
999
1000 # A regex that matches the portion of a variable reference that's the
1001 # array or hash index, separated out just because we want to use it in
1002 # several places in the following regex.
1003 my $index = '(?: \[.*\] | \{.*\} )?';
1004
1005 # Check for things that we don't want to quote, and if we find any of
1006 # them, return the string with just a font change and no quoting.
1007 m{
1008 ^\s*
1009 (?:
1010 ( [\'\`\"] ) .* \1 # already quoted
1011 | \` .* \' # `quoted'
1012 | \$+ [\#^]? \S $index # special ($^Foo, $")
1013 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
1014 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
1015 | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )? # a number
1016 | 0x [a-fA-F\d]+ # a hex constant
1017 )
1018 \s*\z
1019 }xo && return '\f(FS' . $_ . '\f(FE';
1020
1021 # If we didn't return, go ahead and quote the text.
1022 return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE";
1023}
1024
9741dab0 1025
3c014959 1026##############################################################################
9741dab0 1027# Output formatting
3c014959 1028##############################################################################
9741dab0
GS
1029
1030# Make vertical whitespace.
1031sub makespace {
1032 my $self = shift;
5cdeb5a2
JH
1033 $self->output (".PD\n") if ($$self{ITEMS} > 1);
1034 $$self{ITEMS} = 0;
1035 $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n")
1036 if $$self{NEEDSPACE};
9741dab0
GS
1037}
1038
3c014959
JH
1039# Output any pending index entries, and optionally an index entry given as an
1040# argument. Support multiple index entries in X<> separated by slashes, and
1041# strip special escapes from index entries.
9741dab0
GS
1042sub outindex {
1043 my ($self, $section, $index) = @_;
1044 my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
1045 return unless ($section || @entries);
1046 $$self{INDEX} = [];
b616daaf 1047 my @output;
9741dab0 1048 if (@entries) {
b616daaf 1049 push (@output, [ 'Xref', join (' ', @entries) ]);
9741dab0
GS
1050 }
1051 if ($section) {
9741dab0
GS
1052 $index =~ s/\\-/-/g;
1053 $index =~ s/\\(?:s-?\d|.\(..|.)//g;
b616daaf
JH
1054 push (@output, [ $section, $index ]);
1055 }
1056 for (@output) {
1057 my ($type, $entry) = @$_;
1058 $entry =~ s/\"/\"\"/g;
1059 $self->output (".IX $type " . '"' . $entry . '"' . "\n");
9741dab0 1060 }
9741dab0
GS
1061}
1062
1063# Output text to the output device.
1064sub output { print { $_[0]->output_handle } $_[1] }
1065
50a3fd2a
RA
1066# Given a command and a single argument that may or may not contain double
1067# quotes, handle double-quote formatting for it. If there are no double
1068# quotes, just return the command followed by the argument in double quotes.
1069# If there are double quotes, use an if statement to test for nroff, and for
1070# nroff output the command followed by the argument in double quotes with
1071# embedded double quotes doubled. For other formatters, remap paired double
73849855 1072# quotes to LQUOTE and RQUOTE.
50a3fd2a
RA
1073sub switchquotes {
1074 my $self = shift;
1075 my $command = shift;
1076 local $_ = shift;
1077 my $extra = shift;
1078 s/\\\*\([LR]\"/\"/g;
1079
1080 # We also have to deal with \*C` and \*C', which are used to add the
1081 # quotes around C<> text, since they may expand to " and if they do this
b616daaf
JH
1082 # confuses the .SH macros and the like no end. Expand them ourselves.
1083 # Also separate troff from nroff if there are any fixed-width fonts in use
1084 # to work around problems with Solaris nroff.
50a3fd2a 1085 my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/);
b616daaf
JH
1086 my $fixedpat = join ('|', @{ $$self{FONTS} }{'100', '101', '110', '111'});
1087 $fixedpat =~ s/\\/\\\\/g;
1088 $fixedpat =~ s/\(/\\\(/g;
1089 if (/\"/ || /$fixedpat/) {
50a3fd2a 1090 s/\"/\"\"/g;
3c014959 1091 my $nroff = $_;
50a3fd2a
RA
1092 my $troff = $_;
1093 $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
3c014959
JH
1094 if ($c_is_quote && /\\\*\(C[\'\`]/) {
1095 $nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g;
1096 $nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g;
1097 $troff =~ s/\\\*\(C[\'\`]//g;
1098 }
1099 $nroff = qq("$nroff") . ($extra ? " $extra" : '');
50a3fd2a 1100 $troff = qq("$troff") . ($extra ? " $extra" : '');
3c014959
JH
1101
1102 # Work around the Solaris nroff bug where \f(CW\fP leaves the font set
1103 # to Roman rather than the actual previous font when used in headings.
1104 # troff output may still be broken, but at least we can fix nroff by
b616daaf
JH
1105 # just switching the font changes to the non-fixed versions.
1106 $nroff =~ s/\Q$$self{FONTS}{100}\E(.*)\\f[PR]/$1/g;
1107 $nroff =~ s/\Q$$self{FONTS}{101}\E(.*)\\f([PR])/\\fI$1\\f$2/g;
1108 $nroff =~ s/\Q$$self{FONTS}{110}\E(.*)\\f([PR])/\\fB$1\\f$2/g;
1109 $nroff =~ s/\Q$$self{FONTS}{111}\E(.*)\\f([PR])/\\f\(BI$1\\f$2/g;
3c014959 1110
59548eca 1111 # Now finally output the command. Only bother with .ie if the nroff
3c014959
JH
1112 # and troff output isn't the same.
1113 if ($nroff ne $troff) {
59548eca 1114 return ".ie n $command $nroff\n.el $command $troff\n";
3c014959
JH
1115 } else {
1116 return "$command $nroff\n";
1117 }
50a3fd2a
RA
1118 } else {
1119 $_ = qq("$_") . ($extra ? " $extra" : '');
1120 return "$command $_\n";
1121 }
1122}
1123
9741dab0
GS
1124__END__
1125
3c014959 1126##############################################################################
9741dab0 1127# Documentation
3c014959 1128##############################################################################
9741dab0
GS
1129
1130=head1 NAME
1131
1132Pod::Man - Convert POD data to formatted *roff input
1133
1134=head1 SYNOPSIS
1135
1136 use Pod::Man;
1137 my $parser = Pod::Man->new (release => $VERSION, section => 8);
1138
1139 # Read POD from STDIN and write to STDOUT.
1140 $parser->parse_from_filehandle;
1141
1142 # Read POD from file.pod and write to file.1.
1143 $parser->parse_from_file ('file.pod', 'file.1');
1144
1145=head1 DESCRIPTION
1146
1147Pod::Man is a module to convert documentation in the POD format (the
1148preferred language for documenting Perl) into *roff input using the man
1149macro set. The resulting *roff code is suitable for display on a terminal
bf202ccd
JH
1150using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>.
1151It is conventionally invoked using the driver script B<pod2man>, but it can
1152also be used directly.
9741dab0
GS
1153
1154As a derived class from Pod::Parser, Pod::Man supports the same methods and
1155interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
b4558dc4 1156new parser with C<< Pod::Man->new() >> and then calls either
9741dab0
GS
1157parse_from_filehandle() or parse_from_file().
1158
1159new() can take options, in the form of key/value pairs that control the
1160behavior of the parser. See below for details.
1161
1162If no options are given, Pod::Man uses the name of the input file with any
1163trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1164section 1 unless the file ended in C<.pm> in which case it defaults to
1165section 3, to a centered title of "User Contributed Perl Documentation", to
1166a centered footer of the Perl version it is run with, and to a left-hand
1167footer of the modification date of its input (or the current date if given
1168STDIN for input).
1169
1170Pod::Man assumes that your *roff formatters have a fixed-width font named
1171CW. If yours is called something else (like CR), use the C<fixed> option to
1172specify it. This generally only matters for troff output for printing.
1173Similarly, you can set the fonts used for bold, italic, and bold italic
1174fixed-width output.
1175
1176Besides the obvious pod conversions, Pod::Man also takes care of formatting
bf202ccd 1177func(), func(3), and simple variable references like $foo or @bar so you
9741dab0
GS
1178don't have to use code escapes for them; complex expressions like
1179C<$fred{'stuff'}> will still need to be escaped, though. It also translates
1180dashes that aren't used as hyphens into en dashes, makes long dashes--like
b4558dc4
JH
1181this--into proper em dashes, fixes "paired quotes," makes C++ look right,
1182puts a little space between double underbars, makes ALLCAPS a teeny bit
1183smaller in B<troff>, and escapes stuff that *roff treats as special so that
1184you don't have to.
9741dab0
GS
1185
1186The recognized options to new() are as follows. All options take a single
1187argument.
1188
1189=over 4
1190
1191=item center
1192
1193Sets the centered page header to use instead of "User Contributed Perl
1194Documentation".
1195
1196=item date
1197
1198Sets the left-hand footer. By default, the modification date of the input
1199file will be used, or the current date if stat() can't find that file (the
1200case if the input is from STDIN), and the date will be formatted as
1201YYYY-MM-DD.
1202
1203=item fixed
1204
1205The fixed-width font to use for vertabim text and code. Defaults to CW.
bf202ccd 1206Some systems may want CR instead. Only matters for B<troff> output.
9741dab0
GS
1207
1208=item fixedbold
1209
1210Bold version of the fixed-width font. Defaults to CB. Only matters for
bf202ccd 1211B<troff> output.
9741dab0
GS
1212
1213=item fixeditalic
1214
1215Italic version of the fixed-width font (actually, something of a misnomer,
1216since most fixed-width fonts only have an oblique version, not an italic
bf202ccd 1217version). Defaults to CI. Only matters for B<troff> output.
9741dab0
GS
1218
1219=item fixedbolditalic
1220
1221Bold italic (probably actually oblique) version of the fixed-width font.
1222Pod::Man doesn't assume you have this, and defaults to CB. Some systems
bf202ccd 1223(such as Solaris) have this font available as CX. Only matters for B<troff>
9741dab0
GS
1224output.
1225
bf202ccd
JH
1226=item name
1227
1228Set the name of the manual page. Without this option, the manual name is
1229set to the uppercased base name of the file being converted unless the
1230manual section is 3, in which case the path is parsed to see if it is a Perl
1231module path. If it is, a path like C<.../lib/Pod/Man.pm> is converted into
1232a name like C<Pod::Man>. This option, if given, overrides any automatic
1233determination of the name.
1234
ab1f1d91
JH
1235=item quotes
1236
1237Sets the quote marks used to surround CE<lt>> text. If the value is a
1238single character, it is used as both the left and right quote; if it is two
1239characters, the first character is used as the left quote and the second as
1240the right quoted; and if it is four characters, the first two are used as
1241the left quote and the second two as the right quote.
1242
1243This may also be set to the special value C<none>, in which case no quote
1244marks are added around CE<lt>> text (but the font is still changed for troff
1245output).
1246
9741dab0
GS
1247=item release
1248
1249Set the centered footer. By default, this is the version of Perl you run
bf202ccd 1250Pod::Man under. Note that some system an macro sets assume that the
9741dab0
GS
1251centered footer will be a modification date and will prepend something like
1252"Last modified: "; if this is the case, you may want to set C<release> to
1253the last modified date and C<date> to the version number.
1254
1255=item section
1256
1257Set the section for the C<.TH> macro. The standard section numbering
1258convention is to use 1 for user commands, 2 for system calls, 3 for
1259functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1260miscellaneous information, and 8 for administrator commands. There is a lot
1261of variation here, however; some systems (like Solaris) use 4 for file
1262formats, 5 for miscellaneous information, and 7 for devices. Still others
1263use 1m instead of 8, or some mix of both. About the only section numbers
1264that are reliably consistent are 1, 2, and 3.
1265
1266By default, section 1 will be used unless the file ends in .pm in which case
1267section 3 will be selected.
1268
1269=back
1270
1271The standard Pod::Parser method parse_from_filehandle() takes up to two
1272arguments, the first being the file handle to read POD from and the second
1273being the file handle to write the formatted output to. The first defaults
1274to STDIN if not given, and the second defaults to STDOUT. The method
1275parse_from_file() is almost identical, except that its two arguments are the
1276input and output disk files instead. See L<Pod::Parser> for the specific
1277details.
1278
1279=head1 DIAGNOSTICS
1280
1281=over 4
1282
ab1f1d91 1283=item roff font should be 1 or 2 chars, not "%s"
9741dab0
GS
1284
1285(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1286wasn't either one or two characters. Pod::Man doesn't support *roff fonts
1287longer than two characters, although some *roff extensions do (the canonical
bf202ccd 1288versions of B<nroff> and B<troff> don't either).
9741dab0
GS
1289
1290=item Invalid link %s
1291
b84d8b9e
JH
1292(W) The POD source contained a C<LE<lt>E<gt>> formatting code that
1293Pod::Man was unable to parse. You should never see this error message; it
1294probably indicates a bug in Pod::Man.
9741dab0 1295
ab1f1d91
JH
1296=item Invalid quote specification "%s"
1297
1298(F) The quote specification given (the quotes option to the constructor) was
1299invalid. A quote specification must be one, two, or four characters long.
1300
1301=item %s:%d: Unknown command paragraph "%s".
1302
1303(W) The POD source contained a non-standard command paragraph (something of
1304the form C<=command args>) that Pod::Man didn't know about. It was ignored.
1305
aa212ad6 1306=item %s:%d: Unknown escape EE<lt>%sE<gt>
9741dab0
GS
1307
1308(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1309know about. C<EE<lt>%sE<gt>> was printed verbatim in the output.
1310
b84d8b9e 1311=item %s:%d: Unknown formatting code %s
9741dab0 1312
b84d8b9e 1313(W) The POD source contained a non-standard formatting code (something of
9741dab0
GS
1314the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored.
1315
aa212ad6 1316=item %s:%d: Unmatched =back
9741dab0
GS
1317
1318(W) Pod::Man encountered a C<=back> command that didn't correspond to an
1319C<=over> command.
1320
1321=back
1322
1323=head1 BUGS
1324
b4558dc4
JH
1325Eight-bit input data isn't handled at all well at present. The correct
1326approach would be to map EE<lt>E<gt> escapes to the appropriate UTF-8
1327characters and then do a translation pass on the output according to the
1328user-specified output character set. Unfortunately, we can't send eight-bit
1329data directly to the output unless the user says this is okay, since some
1330vendor *roff implementations can't handle eight-bit data. If the *roff
1331implementation can, however, that's far superior to the current hacked
1332characters that only work under troff.
1333
1334There is currently no way to turn off the guesswork that tries to format
1335unmarked text appropriately, and sometimes it isn't wanted (particularly
1336when using POD to document something other than Perl).
9741dab0
GS
1337
1338The NAME section should be recognized specially and index entries emitted
1339for everything in that section. This would have to be deferred until the
1340next section, since extraneous things in NAME tends to confuse various man
1341page processors.
1342
9741dab0 1343Pod::Man doesn't handle font names longer than two characters. Neither do
bf202ccd 1344most B<troff> implementations, but GNU troff does as an extension. It would
9741dab0
GS
1345be nice to support as an option for those who want to use it.
1346
1347The preamble added to each output file is rather verbose, and most of it is
1348only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1349characters. It would ideally be nice if all of those definitions were only
1350output if needed, perhaps on the fly as the characters are used.
1351
9741dab0
GS
1352Pod::Man is excessively slow.
1353
b4558dc4
JH
1354=head1 CAVEATS
1355
1356The handling of hyphens and em dashes is somewhat fragile, and one may get
1357the wrong one under some circumstances. This should only matter for
1358B<troff> output.
1359
1360When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1361necessarily get it right.
1362
9741dab0
GS
1363=head1 SEE ALSO
1364
bf202ccd
JH
1365L<Pod::Parser>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>,
1366L<man(1)>, L<man(7)>
9741dab0
GS
1367
1368Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
1369Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
bf202ccd
JH
1370the best documentation of standard B<nroff> and B<troff>. At the time of
1371this writing, it's available at
1372L<http://www.cs.bell-labs.com/cm/cs/cstr.html>.
9741dab0 1373
bf202ccd
JH
1374The man page documenting the man macro set may be L<man(5)> instead of
1375L<man(7)> on your system. Also, please see L<pod2man(1)> for extensive
1376documentation on writing manual pages if you've not done it before and
1377aren't familiar with the conventions.
9741dab0
GS
1378
1379=head1 AUTHOR
1380
bf202ccd
JH
1381Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
1382B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>.
9741dab0 1383
3c014959
JH
1384=head1 COPYRIGHT AND LICENSE
1385
2da3dd12 1386Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>.
3c014959
JH
1387
1388This program is free software; you may redistribute it and/or modify it
1389under the same terms as Perl itself.
1390
9741dab0 1391=cut