lib/Pod/Man.pm

   1 # Pod::Man -- Convert POD data to formatted *roff input.
   2 # $Id: Man.pm,v 1.32 2002/01/02 09:02:24 eagle Exp $
   3 #
   4 # Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>
   5 #
   6 # This program is free software; you may redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # This module translates POD documentation into *roff markup using the man
  10 # macro set, and is intended for converting POD documents written as Unix
  11 # manual pages to manual pages that can be read by the man(1) command.  It is
  12 # a replacement for the pod2man command distributed with versions of Perl
  13 # prior to 5.6.
  14 #
  15 # Perl core hackers, please note that this module is also separately
  16 # maintained outside of the Perl core as part of the podlators.  Please send
  17 # me any patches at the address above in addition to sending them to the
  18 # standard Perl mailing lists.
  19
  20 ##############################################################################
  21 # Modules and declarations
  22 ##############################################################################
  23
  24 package Pod::Man;
  25
  26 require 5.005;
  27
  28 use Carp qw(carp croak);
  29 use Pod::ParseLink qw(parselink);
  30 use Pod::Parser ();
  31
  32 use strict;
  33 use subs qw(makespace);
  34 use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
  35
  36 @ISA = qw(Pod::Parser);
  37
  38 # Don't use the CVS revision as the version, since this module is also in Perl
  39 # core and too many things could munge CVS magic revision strings.  This
  40 # number should ideally be the same as the CVS revision in podlators, however.
  41 $VERSION = 1.32;
  42
  43
  44 ##############################################################################
  45 # Preamble and *roff output tables
  46 ##############################################################################
  47
  48 # The following is the static preamble which starts all *roff output we
  49 # generate.  It's completely static except for the font to use as a
  50 # fixed-width font, which is designed by @CFONT@, and the left and right
  51 # quotes to use for C<> text, designated by @LQOUTE@ and @RQUOTE@.  $PREAMBLE
  52 # should therefore be run through s/\@CFONT\@/<font>/g before output.
  53 $PREAMBLE = <<'----END OF PREAMBLE----';
  54 .de Sh \" Subsection heading
  55 .br
  56 .if t .Sp
  57 .ne 5
  58 .PP
  59 \fB\\$1\fR
  60 .PP
  61 ..
  62 .de Sp \" Vertical space (when we can't use .PP)
  63 .if t .sp .5v
  64 .if n .sp
  65 ..
  66 .de Vb \" Begin verbatim text
  67 .ft @CFONT@
  68 .nf
  69 .ne \\$1
  70 ..
  71 .de Ve \" End verbatim text
  72 .ft R
  73 .fi
  74 ..
  75 .\" Set up some character translations and predefined strings.  \*(-- will
  76 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
  77 .\" double quote, and \*(R" will give a right double quote.  | will give a
  78 .\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
  79 .\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
  80 .\" expand to `' in nroff, nothing in troff, for use with C<>.
  81 .tr \(*W-|\(bv\*(Tr
  82 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
  83 .ie n \{\
  84 .    ds -- \(*W-
  85 .    ds PI pi
  86 .    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
  87 .    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
  88 .    ds L" ""
  89 .    ds R" ""
  90 .    ds C` @LQUOTE@
  91 .    ds C' @RQUOTE@
  92 'br\}
  93 .el\{\
  94 .    ds -- \|\(em\|
  95 .    ds PI \(*p
  96 .    ds L" ``
  97 .    ds R" ''
  98 'br\}
  99 .\"
 100 .\" If the F register is turned on, we'll generate index entries on stderr for
 101 .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
 102 .\" entries marked with X<> in POD.  Of course, you'll have to process the
 103 .\" output yourself in some meaningful fashion.
 104 .if \nF \{\
 105 .    de IX
 106 .    tm Index:\\$1\t\\n%\t"\\$2"
 107 ..
 108 .    nr % 0
 109 .    rr F
 110 .\}
 111 .\"
 112 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 113 .\" way too many mistakes in technical documents.
 114 .hy 0
 115 .if n .na
 116 .\"
 117 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
 118 .\" Fear.  Run.  Save yourself.  No user-serviceable parts.
 119 .    \" fudge factors for nroff and troff
 120 .if n \{\
 121 .    ds #H 0
 122 .    ds #V .8m
 123 .    ds #F .3m
 124 .    ds #[ \f1
 125 .    ds #] \fP
 126 .\}
 127 .if t \{\
 128 .    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
 129 .    ds #V .6m
 130 .    ds #F 0
 131 .    ds #[ \&
 132 .    ds #] \&
 133 .\}
 134 .    \" simple accents for nroff and troff
 135 .if n \{\
 136 .    ds ' \&
 137 .    ds ` \&
 138 .    ds ^ \&
 139 .    ds , \&
 140 .    ds ~ ~
 141 .    ds /
 142 .\}
 143 .if t \{\
 144 .    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
 145 .    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
 146 .    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
 147 .    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
 148 .    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 149 .    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 150 .\}
 151 .    \" troff and (daisy-wheel) nroff accents
 152 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 153 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 154 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 155 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 156 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 157 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 158 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 159 .ds ae a\h'-(\w'a'u*4/10)'e
 160 .ds Ae A\h'-(\w'A'u*4/10)'E
 161 .    \" corrections for vroff
 162 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 163 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 164 .    \" for low resolution devices (crt and lpr)
 165 .if \n(.H>23 .if \n(.V>19 \
 166 \{\
 167 .    ds : e
 168 .    ds 8 ss
 169 .    ds o a
 170 .    ds d- d\h'-1'\(ga
 171 .    ds D- D\h'-1'\(hy
 172 .    ds th \o'bp'
 173 .    ds Th \o'LP'
 174 .    ds ae ae
 175 .    ds Ae AE
 176 .\}
 177 .rm #[ #] #H #V #F C
 178 ----END OF PREAMBLE----
 179 #`# for cperl-mode
 180
 181 # This table is taken nearly verbatim from Tom Christiansen's pod2man.  It
 182 # assumes that the standard preamble has already been printed, since that's
 183 # what defines all of the accent marks.  Note that some of these are quoted
 184 # with double quotes since they contain embedded single quotes, so use \\
 185 # uniformly for backslash for readability.
 186 %ESCAPES = (
 187     'amp'       =>    '&',      # ampersand
 188     'apos'      =>    "'",      # apostrophe
 189     'lt'        =>    '<',      # left chevron, less-than
 190     'gt'        =>    '>',      # right chevron, greater-than
 191     'quot'      =>    '"',      # double quote
 192     'sol'       =>    '/',      # solidus (forward slash)
 193     'verbar'    =>    '|',      # vertical bar
 194
 195     'Aacute'    =>    "A\\*'",  # capital A, acute accent
 196     'aacute'    =>    "a\\*'",  # small a, acute accent
 197     'Acirc'     =>    'A\\*^',  # capital A, circumflex accent
 198     'acirc'     =>    'a\\*^',  # small a, circumflex accent
 199     'AElig'     =>    '\*(AE',  # capital AE diphthong (ligature)
 200     'aelig'     =>    '\*(ae',  # small ae diphthong (ligature)
 201     'Agrave'    =>    "A\\*`",  # capital A, grave accent
 202     'agrave'    =>    "A\\*`",  # small a, grave accent
 203     'Aring'     =>    'A\\*o',  # capital A, ring
 204     'aring'     =>    'a\\*o',  # small a, ring
 205     'Atilde'    =>    'A\\*~',  # capital A, tilde
 206     'atilde'    =>    'a\\*~',  # small a, tilde
 207     'Auml'      =>    'A\\*:',  # capital A, dieresis or umlaut mark
 208     'auml'      =>    'a\\*:',  # small a, dieresis or umlaut mark
 209     'Ccedil'    =>    'C\\*,',  # capital C, cedilla
 210     'ccedil'    =>    'c\\*,',  # small c, cedilla
 211     'Eacute'    =>    "E\\*'",  # capital E, acute accent
 212     'eacute'    =>    "e\\*'",  # small e, acute accent
 213     'Ecirc'     =>    'E\\*^',  # capital E, circumflex accent
 214     'ecirc'     =>    'e\\*^',  # small e, circumflex accent
 215     'Egrave'    =>    'E\\*`',  # capital E, grave accent
 216     'egrave'    =>    'e\\*`',  # small e, grave accent
 217     'ETH'       =>    '\\*(D-', # capital Eth, Icelandic
 218     'eth'       =>    '\\*(d-', # small eth, Icelandic
 219     'Euml'      =>    'E\\*:',  # capital E, dieresis or umlaut mark
 220     'euml'      =>    'e\\*:',  # small e, dieresis or umlaut mark
 221     'Iacute'    =>    "I\\*'",  # capital I, acute accent
 222     'iacute'    =>    "i\\*'",  # small i, acute accent
 223     'Icirc'     =>    'I\\*^',  # capital I, circumflex accent
 224     'icirc'     =>    'i\\*^',  # small i, circumflex accent
 225     'Igrave'    =>    'I\\*`',  # capital I, grave accent
 226     'igrave'    =>    'i\\*`',  # small i, grave accent
 227     'Iuml'      =>    'I\\*:',  # capital I, dieresis or umlaut mark
 228     'iuml'      =>    'i\\*:',  # small i, dieresis or umlaut mark
 229     'Ntilde'    =>    'N\*~',   # capital N, tilde
 230     'ntilde'    =>    'n\*~',   # small n, tilde
 231     'Oacute'    =>    "O\\*'",  # capital O, acute accent
 232     'oacute'    =>    "o\\*'",  # small o, acute accent
 233     'Ocirc'     =>    'O\\*^',  # capital O, circumflex accent
 234     'ocirc'     =>    'o\\*^',  # small o, circumflex accent
 235     'Ograve'    =>    'O\\*`',  # capital O, grave accent
 236     'ograve'    =>    'o\\*`',  # small o, grave accent
 237     'Oslash'    =>    'O\\*/',  # capital O, slash
 238     'oslash'    =>    'o\\*/',  # small o, slash
 239     'Otilde'    =>    'O\\*~',  # capital O, tilde
 240     'otilde'    =>    'o\\*~',  # small o, tilde
 241     'Ouml'      =>    'O\\*:',  # capital O, dieresis or umlaut mark
 242     'ouml'      =>    'o\\*:',  # small o, dieresis or umlaut mark
 243     'szlig'     =>    '\*8',    # small sharp s, German (sz ligature)
 244     'THORN'     =>    '\\*(Th', # capital THORN, Icelandic
 245     'thorn'     =>    '\\*(th', # small thorn, Icelandic
 246     'Uacute'    =>    "U\\*'",  # capital U, acute accent
 247     'uacute'    =>    "u\\*'",  # small u, acute accent
 248     'Ucirc'     =>    'U\\*^',  # capital U, circumflex accent
 249     'ucirc'     =>    'u\\*^',  # small u, circumflex accent
 250     'Ugrave'    =>    'U\\*`',  # capital U, grave accent
 251     'ugrave'    =>    'u\\*`',  # small u, grave accent
 252     'Uuml'      =>    'U\\*:',  # capital U, dieresis or umlaut mark
 253     'uuml'      =>    'u\\*:',  # small u, dieresis or umlaut mark
 254     'Yacute'    =>    "Y\\*'",  # capital Y, acute accent
 255     'yacute'    =>    "y\\*'",  # small y, acute accent
 256     'yuml'      =>    'y\\*:',  # small y, dieresis or umlaut mark
 257
 258     'nbsp'      =>    '\\ ',    # non-breaking space
 259     'shy'       =>    '',       # soft (discretionary) hyphen
 260 );
 261
 262
 263 ##############################################################################
 264 # Static helper functions
 265 ##############################################################################
 266
 267 # Protect leading quotes and periods against interpretation as commands.  Also
 268 # protect anything starting with a backslash, since it could expand or hide
 269 # something that *roff would interpret as a command.  This is overkill, but
 270 # it's much simpler than trying to parse *roff here.
 271 sub protect {
 272     local $_ = shift;
 273     s/^([.\'\\])/\\&$1/mg;
 274     $_;
 275 }
 276
 277 # Translate a font string into an escape.
 278 sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
 279
 280
 281 ##############################################################################
 282 # Initialization
 283 ##############################################################################
 284
 285 # Initialize the object.  Here, we also process any additional options passed
 286 # to the constructor or set up defaults if none were given.  center is the
 287 # centered title, release is the version number, and date is the date for the
 288 # documentation.  Note that we can't know what file name we're processing due
 289 # to the architecture of Pod::Parser, so that *has* to either be passed to the
 290 # constructor or set separately with Pod::Man::name().
 291 sub initialize {
 292     my $self = shift;
 293
 294     # Figure out the fixed-width font.  If user-supplied, make sure that they
 295     # are the right length.
 296     for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
 297         if (defined $$self{$_}) {
 298             if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
 299                 croak qq(roff font should be 1 or 2 chars,)
 300                     . qq( not "$$self{$_}");
 301             }
 302         } else {
 303             $$self{$_} = '';
 304         }
 305     }
 306
 307     # Set the default fonts.  We can't be sure what fixed bold-italic is going
 308     # to be called, so default to just bold.
 309     $$self{fixed}           ||= 'CW';
 310     $$self{fixedbold}       ||= 'CB';
 311     $$self{fixeditalic}     ||= 'CI';
 312     $$self{fixedbolditalic} ||= 'CB';
 313
 314     # Set up a table of font escapes.  First number is fixed-width, second is
 315     # bold, third is italic.
 316     $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
 317                       '010' => '\fB', '011' => '\f(BI',
 318                       '100' => toescape ($$self{fixed}),
 319                       '101' => toescape ($$self{fixeditalic}),
 320                       '110' => toescape ($$self{fixedbold}),
 321                       '111' => toescape ($$self{fixedbolditalic})};
 322
 323     # Extra stuff for page titles.
 324     $$self{center} = 'User Contributed Perl Documentation'
 325         unless defined $$self{center};
 326     $$self{indent} = 4 unless defined $$self{indent};
 327
 328     # We used to try first to get the version number from a local binary, but
 329     # we shouldn't need that any more.  Get the version from the running Perl.
 330     # Work a little magic to handle subversions correctly under both the
 331     # pre-5.6 and the post-5.6 version numbering schemes.
 332     if (!defined $$self{release}) {
 333         my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
 334         $version[2] ||= 0;
 335         $version[2] *= 10 ** (3 - length $version[2]);
 336         for (@version) { $_ += 0 }
 337         $$self{release} = 'perl v' . join ('.', @version);
 338     }
 339
 340     # Double quotes in things that will be quoted.
 341     for (qw/center date release/) {
 342         $$self{$_} =~ s/\"/\"\"/g if $$self{$_};
 343     }
 344
 345     # Figure out what quotes we'll be using for C<> text.
 346     $$self{quotes} ||= '"';
 347     if ($$self{quotes} eq 'none') {
 348         $$self{LQUOTE} = $$self{RQUOTE} = '';
 349     } elsif (length ($$self{quotes}) == 1) {
 350         $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
 351     } elsif ($$self{quotes} =~ /^(.)(.)$/
 352              || $$self{quotes} =~ /^(..)(..)$/) {
 353         $$self{LQUOTE} = $1;
 354         $$self{RQUOTE} = $2;
 355     } else {
 356         croak qq(Invalid quote specification "$$self{quotes}");
 357     }
 358
 359     # Double the first quote; note that this should not be s///g as two double
 360     # quotes is represented in *roff as three double quotes, not four.  Weird,
 361     # I know.
 362     $$self{LQUOTE} =~ s/\"/\"\"/;
 363     $$self{RQUOTE} =~ s/\"/\"\"/;
 364
 365     $self->SUPER::initialize;
 366 }
 367
 368 # For each document we process, output the preamble first.
 369 sub begin_pod {
 370     my $self = shift;
 371
 372     # Try to figure out the name and section from the file name.
 373     my $section = $$self{section} || 1;
 374     my $name = $$self{name};
 375     if (!defined $name) {
 376         $name = $self->input_file;
 377         $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
 378         $name =~ s/\.p(od|[lm])\z//i;
 379         if ($section !~ /^3/) {
 380             require File::Basename;
 381             $name = uc File::Basename::basename ($name);
 382         } else {
 383             # Assume that we're dealing with a module.  We want to figure out
 384             # the full module name from the path to the file, but we don't
 385             # want to include too much of the path into the module name.  Lose
 386             # everything up to the first of:
 387             #
 388             #     */lib/*perl*/         standard or site_perl module
 389             #     */*perl*/lib/         from -Dprefix=/opt/perl
 390             #     */*perl*/             random module hierarchy
 391             #
 392             # which works.  Also strip off a leading site or site_perl
 393             # component, any OS-specific component, and any version number
 394             # component, and strip off an initial component of "lib" or
 395             # "blib/lib" since that's what ExtUtils::MakeMaker creates.
 396             # splitdir requires at least File::Spec 0.8.
 397             require File::Spec;
 398             my ($volume, $dirs, $file) = File::Spec->splitpath ($name);
 399             my @dirs = File::Spec->splitdir ($dirs);
 400             my $cut = 0;
 401             my $i;
 402             for ($i = 0; $i < scalar @dirs; $i++) {
 403                 if ($dirs[$i] eq 'lib' && $dirs[$i + 1] =~ /perl/) {
 404                     $cut = $i + 2;
 405                     last;
 406                 } elsif ($dirs[$i] =~ /perl/) {
 407                     $cut = $i + 1;
 408                     $cut++ if $dirs[$i + 1] eq 'lib';
 409                     last;
 410                 }
 411             }
 412             if ($cut > 0) {
 413                 splice (@dirs, 0, $cut);
 414                 shift @dirs if ($dirs[0] =~ /^site(_perl)?$/);
 415                 shift @dirs if ($dirs[0] =~ /^[\d.]+$/);
 416                 shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/);
 417             }
 418             shift @dirs if $dirs[0] eq 'lib';
 419             splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib');
 420
 421             # Remove empty directories when building the module name; they
 422             # occur too easily on Unix by doubling slashes.
 423             $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file);
 424         }
 425     }
 426
 427     # If $name contains spaces, quote it; this mostly comes up in the case of
 428     # input from stdin.
 429     $name = '"' . $name . '"' if ($name =~ /\s/);
 430
 431     # Modification date header.  Try to use the modification time of our
 432     # input.
 433     if (!defined $$self{date}) {
 434         my $time = (stat $self->input_file)[9] || time;
 435         my ($day, $month, $year) = (localtime $time)[3,4,5];
 436         $month++;
 437         $year += 1900;
 438         $$self{date} = sprintf ('%4d-%02d-%02d', $year, $month, $day);
 439     }
 440
 441     # Now, print out the preamble and the title.  The meaning of the arguments
 442     # to .TH unfortunately vary by system; some systems consider the fourth
 443     # argument to be a "source" and others use it as a version number.
 444     # Generally it's just presented as the left-side footer, though, so it
 445     # doesn't matter too much if a particular system gives it another
 446     # interpretation.
 447     #
 448     # The order of date and release used to be reversed in older versions of
 449     # this module, but this order is correct for both Solaris and Linux.
 450     local $_ = $PREAMBLE;
 451     s/\@CFONT\@/$$self{fixed}/;
 452     s/\@LQUOTE\@/$$self{LQUOTE}/;
 453     s/\@RQUOTE\@/$$self{RQUOTE}/;
 454     chomp $_;
 455     my $pversion = $Pod::Parser::VERSION;
 456     print { $self->output_handle } <<"----END OF HEADER----";
 457 .\\" Automatically generated by Pod::Man v$VERSION, Pod::Parser v$pversion
 458 .\\"
 459 .\\" Standard preamble:
 460 .\\" ========================================================================
 461 $_
 462 .\\" ========================================================================
 463 .\\"
 464 .IX Title "$name $section"
 465 .TH $name $section "$$self{date}" "$$self{release}" "$$self{center}"
 466 ----END OF HEADER----
 467
 468     # Initialize a few per-file variables.
 469     $$self{INDENT}    = 0;      # Current indentation level.
 470     $$self{INDENTS}   = [];     # Stack of indentations.
 471     $$self{INDEX}     = [];     # Index keys waiting to be printed.
 472     $$self{IN_NAME}   = 0;      # Whether processing the NAME section.
 473     $$self{ITEMS}     = 0;      # The number of consecutive =items.
 474     $$self{SHIFTWAIT} = 0;      # Whether there is a shift waiting.
 475     $$self{SHIFTS}    = [];     # Stack of .RS shifts.
 476 }
 477
 478
 479 ##############################################################################
 480 # Core overrides
 481 ##############################################################################
 482
 483 # Called for each command paragraph.  Gets the command, the associated
 484 # paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
 485 # the command to a method named the same as the command.  =cut is handled
 486 # internally by Pod::Parser.
 487 sub command {
 488     my $self = shift;
 489     my $command = shift;
 490     return if $command eq 'pod';
 491     return if ($$self{EXCLUDE} && $command ne 'end');
 492     if ($self->can ('cmd_' . $command)) {
 493         $command = 'cmd_' . $command;
 494         $self->$command (@_);
 495     } else {
 496         my ($text, $line, $paragraph) = @_;
 497         my $file;
 498         ($file, $line) = $paragraph->file_line;
 499         $text =~ s/\n+\z//;
 500         $text = " $text" if ($text =~ /^\S/);
 501         warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
 502         return;
 503     }
 504 }
 505
 506 # Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
 507 # Pod::Paragraph object.  Rofficate backslashes, untabify, put a zero-width
 508 # character at the beginning of each line to protect against commands, and
 509 # wrap in .Vb/.Ve.
 510 sub verbatim {
 511     my $self = shift;
 512     return if $$self{EXCLUDE};
 513     local $_ = shift;
 514     return if /^\s+$/;
 515     s/\s+$/\n/;
 516     my $lines = tr/\n/\n/;
 517     1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
 518     s/\\/\\e/g;
 519     s/^(\s*\S)/'\&' . $1/gme;
 520     $self->makespace;
 521     $self->output (".Vb $lines\n$_.Ve\n");
 522     $$self{NEEDSPACE} = 1;
 523 }
 524
 525 # Called for a regular text block.  Gets the paragraph, the line number, and a
 526 # Pod::Paragraph object.  Perform interpolation and output the results.
 527 sub textblock {
 528     my $self = shift;
 529     return if $$self{EXCLUDE};
 530     $self->output ($_[0]), return if $$self{VERBATIM};
 531
 532     # Parse the tree.  collapse knows about references to scalars as well as
 533     # scalars and does the right thing with them.  Tidy up any trailing
 534     # whitespace.
 535     my $text = shift;
 536     $text = $self->parse ($text, @_);
 537     $text =~ s/\n\s*$/\n/;
 538
 539     # Output the paragraph.  We also have to handle =over without =item.  If
 540     # there's an =over without =item, NEWINDENT will be set, and we need to
 541     # handle creation of the indent here.  Set WEIRDINDENT so that it will be
 542     # cleaned up on =back.
 543     $self->makespace;
 544     if ($$self{SHIFTWAIT}) {
 545         $self->output (".RS $$self{INDENT}\n");
 546         push (@{ $$self{SHIFTS} }, $$self{INDENT});
 547         $$self{SHIFTWAIT} = 0;
 548     }
 549     $self->output (protect $self->textmapfonts ($text));
 550     $self->outindex;
 551     $$self{NEEDSPACE} = 1;
 552 }
 553
 554 # Called for a formatting code.  Takes a Pod::InteriorSequence object and
 555 # returns a reference to a scalar.  This scalar is the final formatted text.
 556 # It's returned as a reference to an array so that other formatting codes
 557 # above us know that the text has already been processed.
 558 sub sequence {
 559     my ($self, $seq) = @_;
 560     my $command = $seq->cmd_name;
 561
 562     # We have to defer processing of the inside of an L<> formatting code.  If
 563     # this code is nested inside an L<> code, return the literal raw text of
 564     # it.
 565     my $parent = $seq->nested;
 566     while (defined $parent) {
 567         return $seq->raw_text if ($parent->cmd_name eq 'L');
 568         $parent = $parent->nested;
 569     }
 570
 571     # Zero-width characters.
 572     return [ '\&' ] if ($command eq 'Z');
 573
 574     # C<>, L<>, X<>, and E<> don't apply guesswork to their contents.  C<>
 575     # needs some additional special handling.
 576     my $literal = ($command =~ /^[CELX]$/);
 577     local $_ = $self->collapse ($seq->parse_tree, $literal, $command eq 'C');
 578
 579     # Handle E<> escapes.  Numeric escapes that match one of the supported ISO
 580     # 8859-1 characters don't work at present.
 581     if ($command eq 'E') {
 582         if (/^\d+$/) {
 583             return [ chr ($_) ];
 584         } elsif (exists $ESCAPES{$_}) {
 585             return [ $ESCAPES{$_} ];
 586         } else {
 587             my ($file, $line) = $seq->file_line;
 588             warn "$file:$line: Unknown escape E<$_>\n";
 589             return [ "E<$_>" ];
 590         }
 591     }
 592
 593     # For all the other codes, empty content produces no output.
 594     return '' if $_ eq '';
 595
 596     # Handle simple formatting codes.
 597     if ($command eq 'B') {
 598         return [ '\f(BS' . $_ . '\f(BE' ];
 599     } elsif ($command eq 'F' || $command eq 'I') {
 600         return [ '\f(IS' . $_ . '\f(IE' ];
 601     } elsif ($command eq 'C') {
 602         return [ $self->quote_literal ($_) ];
 603     }
 604
 605     # Handle links.
 606     if ($command eq 'L') {
 607         my ($text, $type) = (parselink ($_))[1,4];
 608         return '' unless $text;
 609         my ($file, $line) = $seq->file_line;
 610         $text = $self->parse ($text, $line);
 611         $text = '<' . $text . '>' if $type eq 'url';
 612         return [ $text ];
 613     }
 614
 615     # Whitespace protection replaces whitespace with "\ ".
 616     if ($command eq 'S') {
 617         s/\s+/\\ /g;
 618         return [ $_ ];
 619     }
 620
 621     # Add an index entry to the list of ones waiting to be output.
 622     if ($command eq 'X') {
 623         push (@{ $$self{INDEX} }, $_);
 624         return '';
 625     }
 626
 627     # Anything else is unknown.
 628     my ($file, $line) = $seq->file_line;
 629     warn "$file:$line: Unknown formatting code $command<$_>\n";
 630 }
 631
 632
 633 ##############################################################################
 634 # Command paragraphs
 635 ##############################################################################
 636
 637 # All command paragraphs take the paragraph and the line number.
 638
 639 # First level heading.  We can't output .IX in the NAME section due to a bug
 640 # in some versions of catman, so don't output a .IX for that section.  .SH
 641 # already uses small caps, so remove \s1 and \s-1.  Maintain IN_NAME as
 642 # appropriate, but don't leave it set while calling parse() so as to not
 643 # override guesswork on section headings after NAME.
 644 sub cmd_head1 {
 645     my $self = shift;
 646     $$self{IN_NAME} = 0;
 647     local $_ = $self->parse (@_);
 648     s/\s+$//;
 649     s/\\s-?\d//g;
 650     s/\s*\n\s*/ /g;
 651     if ($$self{ITEMS} > 1) {
 652         $$self{ITEMS} = 0;
 653         $self->output (".PD\n");
 654     }
 655     $self->output ($self->switchquotes ('.SH', $self->mapfonts ($_)));
 656     $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
 657     $$self{NEEDSPACE} = 0;
 658     $$self{IN_NAME} = ($_ eq 'NAME');
 659 }
 660
 661 # Second level heading.
 662 sub cmd_head2 {
 663     my $self = shift;
 664     local $_ = $self->parse (@_);
 665     s/\s+$//;
 666     s/\s*\n\s*/ /g;
 667     if ($$self{ITEMS} > 1) {
 668         $$self{ITEMS} = 0;
 669         $self->output (".PD\n");
 670     }
 671     $self->output ($self->switchquotes ('.Sh', $self->mapfonts ($_)));
 672     $self->outindex ('Subsection', $_);
 673     $$self{NEEDSPACE} = 0;
 674 }
 675
 676 # Third level heading.
 677 sub cmd_head3 {
 678     my $self = shift;
 679     local $_ = $self->parse (@_);
 680     s/\s+$//;
 681     s/\s*\n\s*/ /g;
 682     if ($$self{ITEMS} > 1) {
 683         $$self{ITEMS} = 0;
 684         $self->output (".PD\n");
 685     }
 686     $self->makespace;
 687     $self->output ($self->textmapfonts ('\f(IS' . $_ . '\f(IE') . "\n");
 688     $self->outindex ('Subsection', $_);
 689     $$self{NEEDSPACE} = 1;
 690 }
 691
 692 # Fourth level heading.
 693 sub cmd_head4 {
 694     my $self = shift;
 695     local $_ = $self->parse (@_);
 696     s/\s+$//;
 697     s/\s*\n\s*/ /g;
 698     if ($$self{ITEMS} > 1) {
 699         $$self{ITEMS} = 0;
 700         $self->output (".PD\n");
 701     }
 702     $self->makespace;
 703     $self->output ($self->textmapfonts ($_) . "\n");
 704     $self->outindex ('Subsection', $_);
 705     $$self{NEEDSPACE} = 1;
 706 }
 707
 708 # Start a list.  For indents after the first, wrap the outside indent in .RS
 709 # so that hanging paragraph tags will be correct.
 710 sub cmd_over {
 711     my $self = shift;
 712     local $_ = shift;
 713     unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
 714     if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) {
 715         $self->output (".RS $$self{INDENT}\n");
 716         push (@{ $$self{SHIFTS} }, $$self{INDENT});
 717     }
 718     push (@{ $$self{INDENTS} }, $$self{INDENT});
 719     $$self{INDENT} = ($_ + 0);
 720     $$self{SHIFTWAIT} = 1;
 721 }
 722
 723 # End a list.  If we've closed an embedded indent, we've mangled the hanging
 724 # paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
 725 # We'll close that .RS at the next =back or =item.
 726 sub cmd_back {
 727     my $self = shift;
 728     $$self{INDENT} = pop @{ $$self{INDENTS} };
 729     unless (defined $$self{INDENT}) {
 730         my ($file, $line, $paragraph) = @_;
 731         ($file, $line) = $paragraph->file_line;
 732         warn "$file:$line: Unmatched =back\n";
 733         $$self{INDENT} = 0;
 734     }
 735     if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) {
 736         $self->output (".RE\n");
 737         pop @{ $$self{SHIFTS} };
 738     }
 739     if (@{ $$self{INDENTS} } > 0) {
 740         $self->output (".RE\n");
 741         $self->output (".RS $$self{INDENT}\n");
 742     }
 743     $$self{NEEDSPACE} = 1;
 744     $$self{SHIFTWAIT} = 0;
 745 }
 746
 747 # An individual list item.  Emit an index entry for anything that's
 748 # interesting, but don't emit index entries for things like bullets and
 749 # numbers.  rofficate bullets too while we're at it (so for nice output, use *
 750 # for your lists rather than o or . or - or some other thing).  Newlines in an
 751 # item title are turned into spaces since *roff can't handle them embedded.
 752 sub cmd_item {
 753     my $self = shift;
 754     local $_ = $self->parse (@_);
 755     s/\s+$//;
 756     s/\s*\n\s*/ /g;
 757     my $index;
 758     if (/\w/ && !/^\w[.\)]\s*$/) {
 759         $index = $_;
 760         $index =~ s/^\s*[-*+o.]?(?:\s+|\Z)//;
 761     }
 762     $_ = '*' unless $_;
 763     s/^\*(\s|\Z)/\\\(bu$1/;
 764     if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) {
 765         $self->output (".RE\n");
 766         pop @{ $$self{SHIFTS} };
 767     }
 768     $_ = $self->textmapfonts ($_);
 769     $self->output (".PD 0\n") if ($$self{ITEMS} == 1);
 770     $self->output ($self->switchquotes ('.IP', $_, $$self{INDENT}));
 771     $self->outindex ($index ? ('Item', $index) : ());
 772     $$self{NEEDSPACE} = 0;
 773     $$self{ITEMS}++;
 774     $$self{SHIFTWAIT} = 0;
 775 }
 776
 777 # Begin a block for a particular translator.  Setting VERBATIM triggers
 778 # special handling in textblock().
 779 sub cmd_begin {
 780     my $self = shift;
 781     local $_ = shift;
 782     my ($kind) = /^(\S+)/ or return;
 783     if ($kind eq 'man' || $kind eq 'roff') {
 784         $$self{VERBATIM} = 1;
 785     } else {
 786         $$self{EXCLUDE} = 1;
 787     }
 788 }
 789
 790 # End a block for a particular translator.  We assume that all =begin/=end
 791 # pairs are properly closed.
 792 sub cmd_end {
 793     my $self = shift;
 794     $$self{EXCLUDE} = 0;
 795     $$self{VERBATIM} = 0;
 796 }
 797
 798 # One paragraph for a particular translator.  Ignore it unless it's intended
 799 # for man or roff, in which case we output it verbatim.
 800 sub cmd_for {
 801     my $self = shift;
 802     local $_ = shift;
 803     return unless s/^(?:man|roff)\b[ \t]*\n?//;
 804     $self->output ($_);
 805 }
 806
 807
 808 ##############################################################################
 809 # Escaping and fontification
 810 ##############################################################################
 811
 812 # At this point, we'll have embedded font codes of the form \f(<font>[SE]
 813 # where <font> is one of B, I, or F.  Turn those into the right font start or
 814 # end codes.  The old pod2man didn't get B<someI<thing> else> right; after I<>
 815 # it switched back to normal text rather than bold.  We take care of this by
 816 # using variables as a combined pointer to our current font sequence, and set
 817 # each to the number of current nestings of start tags for that font.  Use
 818 # them as a vector to look up what font sequence to use.
 819 #
 820 # \fP changes to the previous font, but only one previous font is kept.  We
 821 # don't know what the outside level font is; normally it's R, but if we're
 822 # inside a heading it could be something else.  So arrange things so that the
 823 # outside font is always the "previous" font and end with \fP instead of \fR.
 824 # Idea from Zack Weinberg.
 825 sub mapfonts {
 826     my $self = shift;
 827     local $_ = shift;
 828
 829     my ($fixed, $bold, $italic) = (0, 0, 0);
 830     my %magic = (F => \$fixed, B => \$bold, I => \$italic);
 831     my $last = '\fR';
 832     s { \\f\((.)(.) } {
 833         my $sequence = '';
 834         my $f;
 835         if ($last ne '\fR') { $sequence = '\fP' }
 836         ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
 837         $f = $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
 838         if ($f eq $last) {
 839             '';
 840         } else {
 841             if ($f ne '\fR') { $sequence .= $f }
 842             $last = $f;
 843             $sequence;
 844         }
 845     }gxe;
 846     $_;
 847 }
 848
 849 # Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU
 850 # groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather
 851 # than R, presumably because \f(CW doesn't actually do a font change.  To work
 852 # around this, use a separate textmapfonts for text blocks where the default
 853 # font is always R and only use the smart mapfonts for headings.
 854 sub textmapfonts {
 855     my $self = shift;
 856     local $_ = shift;
 857
 858     my ($fixed, $bold, $italic) = (0, 0, 0);
 859     my %magic = (F => \$fixed, B => \$bold, I => \$italic);
 860     s { \\f\((.)(.) } {
 861         ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
 862         $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
 863     }gxe;
 864     $_;
 865 }
 866
 867
 868 ##############################################################################
 869 # *roff-specific parsing and magic
 870 ##############################################################################
 871
 872 # Called instead of parse_text, calls parse_text with the right flags.
 873 sub parse {
 874     my $self = shift;
 875     $self->parse_text ({ -expand_seq   => 'sequence',
 876                          -expand_ptree => 'collapse' }, @_);
 877 }
 878
 879 # Takes a parse tree, a flag saying whether or not to treat it as literal text
 880 # (not call guesswork on it), and a flag saying whether or not to clean some
 881 # things up for *roff, and returns the concatenation of all of the text
 882 # strings in that parse tree.  If the literal flag isn't true, guesswork()
 883 # will be called on all plain scalars in the parse tree.  Otherwise, if
 884 # collapse is being called on a C<> code, $cleanup should be set to true and
 885 # some additional cleanup will be done.  Assumes that everything in the parse
 886 # tree is either a scalar or a reference to a scalar.
 887 sub collapse {
 888     my ($self, $ptree, $literal, $cleanup) = @_;
 889
 890     # If we're processing the NAME section, don't do normal guesswork.  This
 891     # is because NAME lines are often extracted by utilities like catman that
 892     # require plain text and don't understand *roff markup.  We still need to
 893     # escape backslashes and hyphens for *roff (and catman expects \- instead
 894     # of -).
 895     if ($$self{IN_NAME}) {
 896         $literal = 1;
 897         $cleanup = 1;
 898     }
 899
 900     # Do the collapse of the parse tree as described above.
 901     return join ('', map {
 902         if (ref $_) {
 903             join ('', @$_);
 904         } elsif ($literal) {
 905             if ($cleanup) {
 906                 s/\\/\\e/g;
 907                 s/-/\\-/g;
 908                 s/__/_\\|_/g;
 909             }
 910             $_;
 911         } else {
 912             $self->guesswork ($_);
 913         }
 914     } $ptree->children);
 915 }
 916
 917 # Takes a text block to perform guesswork on; this is guaranteed not to
 918 # contain any formatting codes.  Returns the text block with remapping done.
 919 sub guesswork {
 920     my $self = shift;
 921     local $_ = shift;
 922
 923     # rofficate backslashes.
 924     s/\\/\\e/g;
 925
 926     # Ensure double underbars have a tiny space between them.
 927     s/__/_\\|_/g;
 928
 929     # Leave hyphens only if they're part of regular words and there is only
 930     # one dash at a time.  Leave a dash after the first character as a regular
 931     # non-breaking dash, but don't let it mark the rest of the word invalid
 932     # for hyphenation.
 933     s/-/\\-/g;
 934     s{
 935       ( (?:\G|^|\s) [a-zA-Z] ) ( \\- )?
 936       ( (?: [a-zA-Z]+ \\-)+ )
 937       ( [a-zA-Z]+ ) (?=\s|\Z)
 938       \b
 939      } {
 940          my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4);
 941          $hyphen ||= '';
 942          $main =~ s/\\-/-/g;
 943          $prefix . $hyphen . $main . $suffix;
 944     }egx;
 945
 946     # Translate -- into a real em dash if it's used like one.
 947     s{ (\s) \\-\\- (\s) }                         { $1 . '\*(--' . $2 }egx;
 948     s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx;
 949
 950     # Make all caps a little smaller.  Be careful here, since we don't want to
 951     # make @ARGV into small caps, nor do we want to fix the MIME in
 952     # MIME-Version, since it looks weird with the full-height V.
 953     s{
 954         ( ^ | [\s\(\"\'\`\[\{<>] )
 955         ( [A-Z] [A-Z] (?: [/A-Z+:\d_\$&] | \\- )* )
 956         (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | $ )
 957     } { $1 . '\s-1' . $2 . '\s0' }egx;
 958
 959     # Italize functions in the form func().
 960     s{
 961         ( \b | \\s-1 )
 962         (
 963             [A-Za-z_] ([:\w]|\\s-?[01])+ \(\)
 964         )
 965     } { $1 . '\f(IS' . $2 . '\f(IE' }egx;
 966
 967     # func(n) is a reference to a manual page.  Make it \fIfunc\fR\|(n).
 968     s{
 969         ( \b | \\s-1 )
 970         ( [A-Za-z_] (?:[.:\w]|\\-|\\s-?[01])+ )
 971         (
 972             \( \d [a-z]* \)
 973         )
 974     } { $1 . '\f(IS' . $2 . '\f(IE\|' . $3 }egx;
 975
 976     # Convert simple Perl variable references to a fixed-width font.
 977     s{
 978         ( \s+ )
 979         ( [\$\@%] [\w:]+ )
 980         (?! \( )
 981     } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
 982
 983     # Fix up double quotes.
 984     s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
 985
 986     # Make C++ into \*(C+, which is a squinched version.
 987     s{ \b C\+\+ } {\\*\(C+}gx;
 988
 989     # All done.
 990     $_;
 991 }
 992
 993 # Handles C<> text, deciding whether to put \*C` around it or not.  This is a
 994 # whole bunch of messy heuristics to try to avoid overquoting, originally from
 995 # Barrie Slaymaker.  This largely duplicates similar code in Pod::Text.
 996 sub quote_literal {
 997     my $self = shift;
 998     local $_ = shift;
 999
1000     # A regex that matches the portion of a variable reference that's the
1001     # array or hash index, separated out just because we want to use it in
1002     # several places in the following regex.
1003     my $index = '(?: \[.*\] | \{.*\} )?';
1004
1005     # Check for things that we don't want to quote, and if we find any of
1006     # them, return the string with just a font change and no quoting.
1007     m{
1008       ^\s*
1009       (?:
1010          ( [\'\`\"] ) .* \1                             # already quoted
1011        | \` .* \'                                       # `quoted'
1012        | \$+ [\#^]? \S $index                           # special ($^Foo, $")
1013        | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
1014        | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
1015        | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )?             # a number
1016        | 0x [a-fA-F\d]+                                 # a hex constant
1017       )
1018       \s*\z
1019      }xo && return '\f(FS' . $_ . '\f(FE';
1020
1021     # If we didn't return, go ahead and quote the text.
1022     return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE";
1023 }
1024
1025
1026 ##############################################################################
1027 # Output formatting
1028 ##############################################################################
1029
1030 # Make vertical whitespace.
1031 sub makespace {
1032     my $self = shift;
1033     $self->output (".PD\n") if ($$self{ITEMS} > 1);
1034     $$self{ITEMS} = 0;
1035     $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n")
1036         if $$self{NEEDSPACE};
1037 }
1038
1039 # Output any pending index entries, and optionally an index entry given as an
1040 # argument.  Support multiple index entries in X<> separated by slashes, and
1041 # strip special escapes from index entries.
1042 sub outindex {
1043     my ($self, $section, $index) = @_;
1044     my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
1045     return unless ($section || @entries);
1046     $$self{INDEX} = [];
1047     my @output;
1048     if (@entries) {
1049         push (@output, [ 'Xref', join (' ', @entries) ]);
1050     }
1051     if ($section) {
1052         $index =~ s/\\-/-/g;
1053         $index =~ s/\\(?:s-?\d|.\(..|.)//g;
1054         push (@output, [ $section, $index ]);
1055     }
1056     for (@output) {
1057         my ($type, $entry) = @$_;
1058         $entry =~ s/\"/\"\"/g;
1059         $self->output (".IX $type " . '"' . $entry . '"' . "\n");
1060     }
1061 }
1062
1063 # Output text to the output device.
1064 sub output { print { $_[0]->output_handle } $_[1] }
1065
1066 # Given a command and a single argument that may or may not contain double
1067 # quotes, handle double-quote formatting for it.  If there are no double
1068 # quotes, just return the command followed by the argument in double quotes.
1069 # If there are double quotes, use an if statement to test for nroff, and for
1070 # nroff output the command followed by the argument in double quotes with
1071 # embedded double quotes doubled.  For other formatters, remap paired double
1072 # quotes to LQUOTE and RQUOTE.
1073 sub switchquotes {
1074     my $self = shift;
1075     my $command = shift;
1076     local $_ = shift;
1077     my $extra = shift;
1078     s/\\\*\([LR]\"/\"/g;
1079
1080     # We also have to deal with \*C` and \*C', which are used to add the
1081     # quotes around C<> text, since they may expand to " and if they do this
1082     # confuses the .SH macros and the like no end.  Expand them ourselves.
1083     # Also separate troff from nroff if there are any fixed-width fonts in use
1084     # to work around problems with Solaris nroff.
1085     my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/);
1086     my $fixedpat = join ('|', @{ $$self{FONTS} }{'100', '101', '110', '111'});
1087     $fixedpat =~ s/\\/\\\\/g;
1088     $fixedpat =~ s/\(/\\\(/g;
1089     if (/\"/ || /$fixedpat/) {
1090         s/\"/\"\"/g;
1091         my $nroff = $_;
1092         my $troff = $_;
1093         $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
1094         if ($c_is_quote && /\\\*\(C[\'\`]/) {
1095             $nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g;
1096             $nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g;
1097             $troff =~ s/\\\*\(C[\'\`]//g;
1098         }
1099         $nroff = qq("$nroff") . ($extra ? " $extra" : '');
1100         $troff = qq("$troff") . ($extra ? " $extra" : '');
1101
1102         # Work around the Solaris nroff bug where \f(CW\fP leaves the font set
1103         # to Roman rather than the actual previous font when used in headings.
1104         # troff output may still be broken, but at least we can fix nroff by
1105         # just switching the font changes to the non-fixed versions.
1106         $nroff =~ s/\Q$$self{FONTS}{100}\E(.*)\\f[PR]/$1/g;
1107         $nroff =~ s/\Q$$self{FONTS}{101}\E(.*)\\f([PR])/\\fI$1\\f$2/g;
1108         $nroff =~ s/\Q$$self{FONTS}{110}\E(.*)\\f([PR])/\\fB$1\\f$2/g;
1109         $nroff =~ s/\Q$$self{FONTS}{111}\E(.*)\\f([PR])/\\f\(BI$1\\f$2/g;
1110
1111         # Now finally output the command.  Only bother with .ie if the nroff
1112         # and troff output isn't the same.
1113         if ($nroff ne $troff) {
1114             return ".ie n $command $nroff\n.el $command $troff\n";
1115         } else {
1116             return "$command $nroff\n";
1117         }
1118     } else {
1119         $_ = qq("$_") . ($extra ? " $extra" : '');
1120         return "$command $_\n";
1121     }
1122 }
1123
1124 __END__
1125
1126 ##############################################################################
1127 # Documentation
1128 ##############################################################################
1129
1130 =head1 NAME
1131
1132 Pod::Man - Convert POD data to formatted *roff input
1133
1134 =head1 SYNOPSIS
1135
1136     use Pod::Man;
1137     my $parser = Pod::Man->new (release => $VERSION, section => 8);
1138
1139     # Read POD from STDIN and write to STDOUT.
1140     $parser->parse_from_filehandle;
1141
1142     # Read POD from file.pod and write to file.1.
1143     $parser->parse_from_file ('file.pod', 'file.1');
1144
1145 =head1 DESCRIPTION
1146
1147 Pod::Man is a module to convert documentation in the POD format (the
1148 preferred language for documenting Perl) into *roff input using the man
1149 macro set.  The resulting *roff code is suitable for display on a terminal
1150 using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>.
1151 It is conventionally invoked using the driver script B<pod2man>, but it can
1152 also be used directly.
1153
1154 As a derived class from Pod::Parser, Pod::Man supports the same methods and
1155 interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
1156 new parser with C<< Pod::Man->new() >> and then calls either
1157 parse_from_filehandle() or parse_from_file().
1158
1159 new() can take options, in the form of key/value pairs that control the
1160 behavior of the parser.  See below for details.
1161
1162 If no options are given, Pod::Man uses the name of the input file with any
1163 trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1164 section 1 unless the file ended in C<.pm> in which case it defaults to
1165 section 3, to a centered title of "User Contributed Perl Documentation", to
1166 a centered footer of the Perl version it is run with, and to a left-hand
1167 footer of the modification date of its input (or the current date if given
1168 STDIN for input).
1169
1170 Pod::Man assumes that your *roff formatters have a fixed-width font named
1171 CW.  If yours is called something else (like CR), use the C<fixed> option to
1172 specify it.  This generally only matters for troff output for printing.
1173 Similarly, you can set the fonts used for bold, italic, and bold italic
1174 fixed-width output.
1175
1176 Besides the obvious pod conversions, Pod::Man also takes care of formatting
1177 func(), func(3), and simple variable references like $foo or @bar so you
1178 don't have to use code escapes for them; complex expressions like
1179 C<$fred{'stuff'}> will still need to be escaped, though.  It also translates
1180 dashes that aren't used as hyphens into en dashes, makes long dashes--like
1181 this--into proper em dashes, fixes "paired quotes," makes C++ look right,
1182 puts a little space between double underbars, makes ALLCAPS a teeny bit
1183 smaller in B<troff>, and escapes stuff that *roff treats as special so that
1184 you don't have to.
1185
1186 The recognized options to new() are as follows.  All options take a single
1187 argument.
1188
1189 =over 4
1190
1191 =item center
1192
1193 Sets the centered page header to use instead of "User Contributed Perl
1194 Documentation".
1195
1196 =item date
1197
1198 Sets the left-hand footer.  By default, the modification date of the input
1199 file will be used, or the current date if stat() can't find that file (the
1200 case if the input is from STDIN), and the date will be formatted as
1201 YYYY-MM-DD.
1202
1203 =item fixed
1204
1205 The fixed-width font to use for vertabim text and code.  Defaults to CW.
1206 Some systems may want CR instead.  Only matters for B<troff> output.
1207
1208 =item fixedbold
1209
1210 Bold version of the fixed-width font.  Defaults to CB.  Only matters for
1211 B<troff> output.
1212
1213 =item fixeditalic
1214
1215 Italic version of the fixed-width font (actually, something of a misnomer,
1216 since most fixed-width fonts only have an oblique version, not an italic
1217 version).  Defaults to CI.  Only matters for B<troff> output.
1218
1219 =item fixedbolditalic
1220
1221 Bold italic (probably actually oblique) version of the fixed-width font.
1222 Pod::Man doesn't assume you have this, and defaults to CB.  Some systems
1223 (such as Solaris) have this font available as CX.  Only matters for B<troff>
1224 output.
1225
1226 =item name
1227
1228 Set the name of the manual page.  Without this option, the manual name is
1229 set to the uppercased base name of the file being converted unless the
1230 manual section is 3, in which case the path is parsed to see if it is a Perl
1231 module path.  If it is, a path like C<.../lib/Pod/Man.pm> is converted into
1232 a name like C<Pod::Man>.  This option, if given, overrides any automatic
1233 determination of the name.
1234
1235 =item quotes
1236
1237 Sets the quote marks used to surround CE<lt>> text.  If the value is a
1238 single character, it is used as both the left and right quote; if it is two
1239 characters, the first character is used as the left quote and the second as
1240 the right quoted; and if it is four characters, the first two are used as
1241 the left quote and the second two as the right quote.
1242
1243 This may also be set to the special value C<none>, in which case no quote
1244 marks are added around CE<lt>> text (but the font is still changed for troff
1245 output).
1246
1247 =item release
1248
1249 Set the centered footer.  By default, this is the version of Perl you run
1250 Pod::Man under.  Note that some system an macro sets assume that the
1251 centered footer will be a modification date and will prepend something like
1252 "Last modified: "; if this is the case, you may want to set C<release> to
1253 the last modified date and C<date> to the version number.
1254
1255 =item section
1256
1257 Set the section for the C<.TH> macro.  The standard section numbering
1258 convention is to use 1 for user commands, 2 for system calls, 3 for
1259 functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1260 miscellaneous information, and 8 for administrator commands.  There is a lot
1261 of variation here, however; some systems (like Solaris) use 4 for file
1262 formats, 5 for miscellaneous information, and 7 for devices.  Still others
1263 use 1m instead of 8, or some mix of both.  About the only section numbers
1264 that are reliably consistent are 1, 2, and 3.
1265
1266 By default, section 1 will be used unless the file ends in .pm in which case
1267 section 3 will be selected.
1268
1269 =back
1270
1271 The standard Pod::Parser method parse_from_filehandle() takes up to two
1272 arguments, the first being the file handle to read POD from and the second
1273 being the file handle to write the formatted output to.  The first defaults
1274 to STDIN if not given, and the second defaults to STDOUT.  The method
1275 parse_from_file() is almost identical, except that its two arguments are the
1276 input and output disk files instead.  See L<Pod::Parser> for the specific
1277 details.
1278
1279 =head1 DIAGNOSTICS
1280
1281 =over 4
1282
1283 =item roff font should be 1 or 2 chars, not "%s"
1284
1285 (F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1286 wasn't either one or two characters.  Pod::Man doesn't support *roff fonts
1287 longer than two characters, although some *roff extensions do (the canonical
1288 versions of B<nroff> and B<troff> don't either).
1289
1290 =item Invalid link %s
1291
1292 (W) The POD source contained a C<LE<lt>E<gt>> formatting code that
1293 Pod::Man was unable to parse.  You should never see this error message; it
1294 probably indicates a bug in Pod::Man.
1295
1296 =item Invalid quote specification "%s"
1297
1298 (F) The quote specification given (the quotes option to the constructor) was
1299 invalid.  A quote specification must be one, two, or four characters long.
1300
1301 =item %s:%d: Unknown command paragraph "%s".
1302
1303 (W) The POD source contained a non-standard command paragraph (something of
1304 the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
1305
1306 =item %s:%d: Unknown escape EE<lt>%sE<gt>
1307
1308 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1309 know about.  C<EE<lt>%sE<gt>> was printed verbatim in the output.
1310
1311 =item %s:%d: Unknown formatting code %s
1312
1313 (W) The POD source contained a non-standard formatting code (something of
1314 the form C<XE<lt>E<gt>>) that Pod::Man didn't know about.  It was ignored.
1315
1316 =item %s:%d: Unmatched =back
1317
1318 (W) Pod::Man encountered a C<=back> command that didn't correspond to an
1319 C<=over> command.
1320
1321 =back
1322
1323 =head1 BUGS
1324
1325 Eight-bit input data isn't handled at all well at present.  The correct
1326 approach would be to map EE<lt>E<gt> escapes to the appropriate UTF-8
1327 characters and then do a translation pass on the output according to the
1328 user-specified output character set.  Unfortunately, we can't send eight-bit
1329 data directly to the output unless the user says this is okay, since some
1330 vendor *roff implementations can't handle eight-bit data.  If the *roff
1331 implementation can, however, that's far superior to the current hacked
1332 characters that only work under troff.
1333
1334 There is currently no way to turn off the guesswork that tries to format
1335 unmarked text appropriately, and sometimes it isn't wanted (particularly
1336 when using POD to document something other than Perl).
1337
1338 The NAME section should be recognized specially and index entries emitted
1339 for everything in that section.  This would have to be deferred until the
1340 next section, since extraneous things in NAME tends to confuse various man
1341 page processors.
1342
1343 Pod::Man doesn't handle font names longer than two characters.  Neither do
1344 most B<troff> implementations, but GNU troff does as an extension.  It would
1345 be nice to support as an option for those who want to use it.
1346
1347 The preamble added to each output file is rather verbose, and most of it is
1348 only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1349 characters.  It would ideally be nice if all of those definitions were only
1350 output if needed, perhaps on the fly as the characters are used.
1351
1352 Pod::Man is excessively slow.
1353
1354 =head1 CAVEATS
1355
1356 The handling of hyphens and em dashes is somewhat fragile, and one may get
1357 the wrong one under some circumstances.  This should only matter for
1358 B<troff> output.
1359
1360 When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1361 necessarily get it right.
1362
1363 =head1 SEE ALSO
1364
1365 L<Pod::Parser>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>,
1366 L<man(1)>, L<man(7)>
1367
1368 Ossanna, Joseph F., and Brian W. Kernighan.  "Troff User's Manual,"
1369 Computing Science Technical Report No. 54, AT&T Bell Laboratories.  This is
1370 the best documentation of standard B<nroff> and B<troff>.  At the time of
1371 this writing, it's available at
1372 L<http://www.cs.bell-labs.com/cm/cs/cstr.html>.
1373
1374 The man page documenting the man macro set may be L<man(5)> instead of
1375 L<man(7)> on your system.  Also, please see L<pod2man(1)> for extensive
1376 documentation on writing manual pages if you've not done it before and
1377 aren't familiar with the conventions.
1378
1379 =head1 AUTHOR
1380
1381 Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
1382 B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>.
1383
1384 =head1 COPYRIGHT AND LICENSE
1385
1386 Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>.
1387
1388 This program is free software; you may redistribute it and/or modify it
1389 under the same terms as Perl itself.
1390
1391 =cut