1 # Pod::Man -- Convert POD data to formatted *roff input.
2 # $Id: Man.pm,v 1.4 2000/04/26 04:03:41 eagle Exp $
4 # Copyright 1999, 2000 by Russ Allbery <rra@stanford.edu>
6 # This program is free software; you can redistribute it and/or modify it
7 # under the same terms as Perl itself.
9 # This module is intended to be a replacement for the pod2man script
10 # distributed with versions of Perl prior to 5.6, and attempts to match its
11 # output except for some specific circumstances where other decisions seemed
12 # to produce better output. It uses Pod::Parser and is designed to be easy
15 # Perl core hackers, please note that this module is also separately
16 # maintained outside of the Perl core as part of the podlators. Please send
17 # me any patches at the address above in addition to sending them to the
18 # standard Perl mailing lists.
20 ############################################################################
21 # Modules and declarations
22 ############################################################################
28 use Carp qw(carp croak);
32 use subs qw(makespace);
33 use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
35 @ISA = qw(Pod::Parser);
37 # Don't use the CVS revision as the version, since this module is also in
38 # Perl core and too many things could munge CVS magic revision strings.
39 # This number should ideally be the same as the CVS revision in podlators,
44 ############################################################################
45 # Preamble and *roff output tables
46 ############################################################################
48 # The following is the static preamble which starts all *roff output we
49 # generate. It's completely static except for the font to use as a
50 # fixed-width font, which is designed by @CFONT@. $PREAMBLE should
51 # therefore be run through s/\@CFONT\@/<font>/g before output.
52 $PREAMBLE = <<'----END OF PREAMBLE----';
53 .de Sh \" Subsection heading
61 .de Sp \" Vertical space (when we can't use .PP)
67 .ie \\n(.$>=3 .ne \\$3
71 .de Vb \" Begin verbatim text
76 .de Ve \" End verbatim text
81 .\" Set up some character translations and predefined strings. \*(-- will
82 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
83 .\" double quote, and \*(R" will give a right double quote. | will give a
84 .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used
85 .\" to do unbreakable dashes and therefore won't be available. \*(C` and
86 .\" \*(C' expand to `' in nroff, nothing in troff, for use with C<>
88 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
92 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
93 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
106 .\" If the F register is turned on, we'll generate index entries on stderr
107 .\" for titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and
108 .\" index entries marked with X<> in POD. Of course, you'll have to process
109 .\" the output yourself in some meaningful fashion.
112 . tm Index:\\$1\t\\n%\t"\\$2"
118 .\" For nroff, turn off justification. Always turn off hyphenation; it
119 .\" makes way too many mistakes in technical documents.
123 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
124 .\" Fear. Run. Save yourself. No user-serviceable parts.
126 . \" fudge factors for nroff and troff
135 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
141 . \" simple accents for nroff and troff
151 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
152 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
153 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
154 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
155 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
156 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
158 . \" troff and (daisy-wheel) nroff accents
159 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
160 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
161 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
162 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
163 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
164 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
165 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
166 .ds ae a\h'-(\w'a'u*4/10)'e
167 .ds Ae A\h'-(\w'A'u*4/10)'E
168 . \" corrections for vroff
169 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
170 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
171 . \" for low resolution devices (crt and lpr)
172 .if \n(.H>23 .if \n(.V>19 \
185 ----END OF PREAMBLE----
187 # This table is taken nearly verbatim from Tom Christiansen's pod2man. It
188 # assumes that the standard preamble has already been printed, since that's
189 # what defines all of the accent marks. Note that some of these are quoted
190 # with double quotes since they contain embedded single quotes, so use \\
191 # uniformly for backslash for readability.
193 'amp' => '&', # ampersand
194 'lt' => '<', # left chevron, less-than
195 'gt' => '>', # right chevron, greater-than
196 'quot' => '"', # double quote
197 'sol' => '/', # solidus
198 'verbar' => '|', # vertical bar
200 'Aacute' => "A\\*'", # capital A, acute accent
201 'aacute' => "a\\*'", # small a, acute accent
202 'Acirc' => 'A\\*^', # capital A, circumflex accent
203 'acirc' => 'a\\*^', # small a, circumflex accent
204 'AElig' => '\*(AE', # capital AE diphthong (ligature)
205 'aelig' => '\*(ae', # small ae diphthong (ligature)
206 'Agrave' => "A\\*`", # capital A, grave accent
207 'agrave' => "A\\*`", # small a, grave accent
208 'Aring' => 'A\\*o', # capital A, ring
209 'aring' => 'a\\*o', # small a, ring
210 'Atilde' => 'A\\*~', # capital A, tilde
211 'atilde' => 'a\\*~', # small a, tilde
212 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark
213 'auml' => 'a\\*:', # small a, dieresis or umlaut mark
214 'Ccedil' => 'C\\*,', # capital C, cedilla
215 'ccedil' => 'c\\*,', # small c, cedilla
216 'Eacute' => "E\\*'", # capital E, acute accent
217 'eacute' => "e\\*'", # small e, acute accent
218 'Ecirc' => 'E\\*^', # capital E, circumflex accent
219 'ecirc' => 'e\\*^', # small e, circumflex accent
220 'Egrave' => 'E\\*`', # capital E, grave accent
221 'egrave' => 'e\\*`', # small e, grave accent
222 'ETH' => '\\*(D-', # capital Eth, Icelandic
223 'eth' => '\\*(d-', # small eth, Icelandic
224 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark
225 'euml' => 'e\\*:', # small e, dieresis or umlaut mark
226 'Iacute' => "I\\*'", # capital I, acute accent
227 'iacute' => "i\\*'", # small i, acute accent
228 'Icirc' => 'I\\*^', # capital I, circumflex accent
229 'icirc' => 'i\\*^', # small i, circumflex accent
230 'Igrave' => 'I\\*`', # capital I, grave accent
231 'igrave' => 'i\\*`', # small i, grave accent
232 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark
233 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark
234 'Ntilde' => 'N\*~', # capital N, tilde
235 'ntilde' => 'n\*~', # small n, tilde
236 'Oacute' => "O\\*'", # capital O, acute accent
237 'oacute' => "o\\*'", # small o, acute accent
238 'Ocirc' => 'O\\*^', # capital O, circumflex accent
239 'ocirc' => 'o\\*^', # small o, circumflex accent
240 'Ograve' => 'O\\*`', # capital O, grave accent
241 'ograve' => 'o\\*`', # small o, grave accent
242 'Oslash' => 'O\\*/', # capital O, slash
243 'oslash' => 'o\\*/', # small o, slash
244 'Otilde' => 'O\\*~', # capital O, tilde
245 'otilde' => 'o\\*~', # small o, tilde
246 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark
247 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark
248 'szlig' => '\*8', # small sharp s, German (sz ligature)
249 'THORN' => '\\*(Th', # capital THORN, Icelandic
250 'thorn' => '\\*(th', # small thorn, Icelandic
251 'Uacute' => "U\\*'", # capital U, acute accent
252 'uacute' => "u\\*'", # small u, acute accent
253 'Ucirc' => 'U\\*^', # capital U, circumflex accent
254 'ucirc' => 'u\\*^', # small u, circumflex accent
255 'Ugrave' => 'U\\*`', # capital U, grave accent
256 'ugrave' => 'u\\*`', # small u, grave accent
257 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark
258 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark
259 'Yacute' => "Y\\*'", # capital Y, acute accent
260 'yacute' => "y\\*'", # small y, acute accent
261 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark
265 ############################################################################
266 # Static helper functions
267 ############################################################################
269 # Protect leading quotes and periods against interpretation as commands.
270 # Also protect anything starting with a backslash, since it could expand
271 # or hide something that *roff would interpret as a command. This is
272 # overkill, but it's much simpler than trying to parse *roff here.
275 s/^([.\'\\])/\\&$1/mg;
279 # Given a command and a single argument that may or may not contain double
280 # quotes, handle double-quote formatting for it. If there are no double
281 # quotes, just return the command followed by the argument in double quotes.
282 # If there are double quotes, use an if statement to test for nroff, and for
283 # nroff output the command followed by the argument in double quotes with
284 # embedded double quotes doubled. For other formatters, remap paired double
285 # quotes to `` and ''.
294 $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
295 s/\"/\"\"/g if $extra;
296 $troff =~ s/\"/\"\"/g if $extra;
297 $_ = qq("$_") . ($extra ? " $extra" : '');
298 $troff = qq("$troff") . ($extra ? " $extra" : '');
299 return ".if n $command $_\n.el $command $troff\n";
301 $_ = qq("$_") . ($extra ? " $extra" : '');
302 return "$command $_\n";
306 # Translate a font string into an escape.
307 sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
310 ############################################################################
312 ############################################################################
314 # Initialize the object. Here, we also process any additional options
315 # passed to the constructor or set up defaults if none were given. center
316 # is the centered title, release is the version number, and date is the date
317 # for the documentation. Note that we can't know what file name we're
318 # processing due to the architecture of Pod::Parser, so that *has* to either
319 # be passed to the constructor or set separately with Pod::Man::name().
323 # Figure out the fixed-width font. If user-supplied, make sure that
324 # they are the right length.
325 for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
326 if (defined $$self{$_}) {
327 if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
328 croak "roff font should be 1 or 2 chars, not `$$self{$_}'";
335 # Set the default fonts. We can't be sure what fixed bold-italic is
336 # going to be called, so default to just bold.
337 $$self{fixed} ||= 'CW';
338 $$self{fixedbold} ||= 'CB';
339 $$self{fixeditalic} ||= 'CI';
340 $$self{fixedbolditalic} ||= 'CB';
342 # Set up a table of font escapes. First number is fixed-width, second
343 # is bold, third is italic.
344 $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
345 '010' => '\fB', '011' => '\f(BI',
346 '100' => toescape ($$self{fixed}),
347 '101' => toescape ($$self{fixeditalic}),
348 '110' => toescape ($$self{fixedbold}),
349 '111' => toescape ($$self{fixedbolditalic})};
351 # Extra stuff for page titles.
352 $$self{center} = 'User Contributed Perl Documentation'
353 unless defined $$self{center};
354 $$self{indent} = 4 unless defined $$self{indent};
356 # We used to try first to get the version number from a local binary,
357 # but we shouldn't need that any more. Get the version from the running
358 # Perl. Work a little magic to handle subversions correctly under both
359 # the pre-5.6 and the post-5.6 version numbering schemes.
360 if (!defined $$self{release}) {
361 my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
363 $version[2] *= 10 ** (3 - length $version[2]);
364 for (@version) { $_ += 0 }
365 $$self{release} = 'perl v' . join ('.', @version);
368 # Double quotes in things that will be quoted.
369 for (qw/center date release/) {
370 $$self{$_} =~ s/\"/\"\"/g if $$self{$_};
373 $$self{INDENT} = 0; # Current indentation level.
374 $$self{INDENTS} = []; # Stack of indentations.
375 $$self{INDEX} = []; # Index keys waiting to be printed.
377 $self->SUPER::initialize;
380 # For each document we process, output the preamble first. Note that the
381 # fixed width font is a global default; once we interpolate it into the
382 # PREAMBLE, it ain't ever changing. Maybe fix this later.
386 # Try to figure out the name and section from the file name.
387 my $section = $$self{section} || 1;
388 my $name = $$self{name};
389 if (!defined $name) {
390 $name = $self->input_file;
391 $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
392 $name =~ s/\.p(od|[lm])\z//i;
393 if ($section =~ /^1/) {
394 require File::Basename;
395 $name = uc File::Basename::basename ($name);
397 # Lose everything up to the first of
398 # */lib/*perl* standard or site_perl module
399 # */*perl*/lib from -D prefix=/opt/perl
400 # */*perl*/ random module hierarchy
401 # which works. Should be fixed to use File::Spec. Also handle
402 # a leading lib/ since that's what ExtUtils::MakeMaker creates.
405 if ( s%^.*?/lib/[^/]*perl[^/]*/%%si
406 or s%^.*?/[^/]*perl[^/]*/(?:lib/)?%%si) {
407 s%^site(_perl)?/%%s; # site and site_perl
408 s%^(.*-$^O|$^O-.*)/%%so; # arch
409 s%^\d+\.\d+%%s; # version
417 # Modification date header. Try to use the modification time of our
419 if (!defined $$self{date}) {
420 my $time = (stat $self->input_file)[9] || time;
421 my ($day, $month, $year) = (localtime $time)[3,4,5];
424 $$self{date} = sprintf ('%4d-%02d-%02d', $year, $month, $day);
427 # Now, print out the preamble and the title.
428 $PREAMBLE =~ s/\@CFONT\@/$$self{fixed}/;
430 print { $self->output_handle } <<"----END OF HEADER----";
431 .\\" Automatically generated by Pod::Man version $VERSION
432 .\\" @{[ scalar localtime ]}
434 .\\" Standard preamble:
435 .\\" ======================================================================
437 .\\" ======================================================================
439 .IX Title "$name $section"
440 .TH $name $section "$$self{release}" "$$self{date}" "$$self{center}"
442 ----END OF HEADER----
445 # Initialize a few per-file variables.
447 $$self{NEEDSPACE} = 0;
451 ############################################################################
453 ############################################################################
455 # Called for each command paragraph. Gets the command, the associated
456 # paragraph, the line number, and a Pod::Paragraph object. Just dispatches
457 # the command to a method named the same as the command. =cut is handled
458 # internally by Pod::Parser.
462 return if $command eq 'pod';
463 return if ($$self{EXCLUDE} && $command ne 'end');
464 $command = 'cmd_' . $command;
465 unless ($self -> can ($command)) {
466 my $com = substr $command => 4;
467 my ($file, $line) = $_ [2] -> file_line;
468 (my $text = $_ [0]) =~ s/\n+\z//g;
469 $text = " $text" if $text =~ /^\S/;
470 warn qq {$file: Unknown command paragraph "=$com${text}"},
471 qq { on line $line.\n};
475 $self->$command (@_);
479 # Called for a verbatim paragraph. Gets the paragraph, the line number, and
480 # a Pod::Paragraph object. Rofficate backslashes, untabify, put a
481 # zero-width character at the beginning of each line to protect against
482 # commands, and wrap in .Vb/.Ve.
485 return if $$self{EXCLUDE};
489 my $lines = tr/\n/\n/;
490 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
492 s/^(\s*\S)/'\&' . $1/gme;
493 $self->makespace if $$self{NEEDSPACE};
494 $self->output (".Vb $lines\n$_.Ve\n");
495 $$self{NEEDSPACE} = 0;
498 # Called for a regular text block. Gets the paragraph, the line number, and
499 # a Pod::Paragraph object. Perform interpolation and output the results.
502 return if $$self{EXCLUDE};
503 $self->output ($_[0]), return if $$self{VERBATIM};
505 # Perform a little magic to collapse multiple L<> references. We'll
506 # just rewrite the whole thing into actual text at this part, bypassing
507 # the whole internal sequence parsing thing.
510 (L< # A link of the form L</something>.
513 [:\w]+ # The item has to be a simple word...
514 (\(\))? # ...or simple function.
518 ,?\s+(and\s+)? # Allow lots of them, conjuncted.
527 s{ L< / ( [^>]+ ) > } {$1}xg;
528 my @items = split /(?:,?\s+(?:and\s+)?)/;
531 for ($i = 0; $i < @items; $i++) {
532 $string .= $items[$i];
533 $string .= ', ' if @items > 2 && $i != $#items;
534 $string .= ' ' if @items == 2 && $i == 2;
535 $string .= 'and ' if ($i == $#items - 1);
537 $string .= ' entries elsewhere in this document';
541 # Parse the tree and output it. collapse knows about references to
542 # scalars as well as scalars and does the right thing with them.
543 $text = $self->parse ($text, @_);
544 $text =~ s/\n\s*$/\n/;
545 $self->makespace if $$self{NEEDSPACE};
546 $self->output (protect $self->mapfonts ($text));
548 $$self{NEEDSPACE} = 1;
551 # Called for an interior sequence. Takes a Pod::InteriorSequence object and
552 # returns a reference to a scalar. This scalar is the final formatted text.
553 # It's returned as a reference so that other interior sequences above us
554 # know that the text has already been processed.
556 my ($self, $seq) = @_;
557 my $command = $seq->cmd_name;
559 # Zero-width characters.
560 if ($command eq 'Z') {
561 # Workaround to generate a blessable reference, needed by 5.005.
563 return bless \ "$tmp", 'Pod::Man::String';
566 # C<>, L<>, X<>, and E<> don't apply guesswork to their contents. C<>
567 # needs some additional special handling.
568 my $literal = ($command =~ /^[CELX]$/);
569 $literal++ if $command eq 'C';
570 local $_ = $self->collapse ($seq->parse_tree, $literal);
572 # Handle E<> escapes.
573 if ($command eq 'E') {
575 return bless \ chr ($_), 'Pod::Man::String';
576 } elsif (exists $ESCAPES{$_}) {
577 return bless \ "$ESCAPES{$_}", 'Pod::Man::String';
579 carp "Unknown escape E<$1>";
580 return bless \ "E<$_>", 'Pod::Man::String';
584 # For all the other sequences, empty content produces no output.
585 return '' if $_ eq '';
587 # Handle formatting sequences.
588 if ($command eq 'B') {
589 return bless \ ('\f(BS' . $_ . '\f(BE'), 'Pod::Man::String';
590 } elsif ($command eq 'F') {
591 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
592 } elsif ($command eq 'I') {
593 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
594 } elsif ($command eq 'C') {
595 return bless \ ('\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"),
600 if ($command eq 'L') {
601 # A bug in lvalue subs in 5.6 requires the temporary variable.
602 my $tmp = $self->buildlink ($_);
603 return bless \ "$tmp", 'Pod::Man::String';
606 # Whitespace protection replaces whitespace with "\ ".
607 if ($command eq 'S') {
609 return bless \ "$_", 'Pod::Man::String';
612 # Add an index entry to the list of ones waiting to be output.
613 if ($command eq 'X') { push (@{ $$self{INDEX} }, $_); return '' }
615 # Anything else is unknown.
616 carp "Unknown sequence $command<$_>";
620 ############################################################################
622 ############################################################################
624 # All command paragraphs take the paragraph and the line number.
626 # First level heading. We can't output .IX in the NAME section due to a bug
627 # in some versions of catman, so don't output a .IX for that section. .SH
628 # already uses small caps, so remove any E<> sequences that would cause
632 local $_ = $self->parse (@_);
635 $self->output (switchquotes ('.SH', $self->mapfonts ($_)));
636 $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
637 $$self{NEEDSPACE} = 0;
640 # Second level heading.
643 local $_ = $self->parse (@_);
645 $self->output (switchquotes ('.Sh', $self->mapfonts ($_)));
646 $self->outindex ('Subsection', $_);
647 $$self{NEEDSPACE} = 0;
650 # Start a list. For indents after the first, wrap the outside indent in .RS
651 # so that hanging paragraph tags will be correct.
655 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
656 if (@{ $$self{INDENTS} } > 0) {
657 $self->output (".RS $$self{INDENT}\n");
659 push (@{ $$self{INDENTS} }, $$self{INDENT});
660 $$self{INDENT} = ($_ + 0);
663 # End a list. If we've closed an embedded indent, we've mangled the hanging
664 # paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
665 # We'll close that .RS at the next =back or =item.
668 $$self{INDENT} = pop @{ $$self{INDENTS} };
669 unless (defined $$self{INDENT}) {
670 carp "Unmatched =back";
673 if ($$self{WEIRDINDENT}) {
674 $self->output (".RE\n");
675 $$self{WEIRDINDENT} = 0;
677 if (@{ $$self{INDENTS} } > 0) {
678 $self->output (".RE\n");
679 $self->output (".RS $$self{INDENT}\n");
680 $$self{WEIRDINDENT} = 1;
682 $$self{NEEDSPACE} = 1;
685 # An individual list item. Emit an index entry for anything that's
686 # interesting, but don't emit index entries for things like bullets and
687 # numbers. rofficate bullets too while we're at it (so for nice output, use
688 # * for your lists rather than o or . or - or some other thing). Newlines
689 # in an item title are turned into spaces since *roff can't handle them
693 local $_ = $self->parse (@_);
697 if (/\w/ && !/^\w[.\)]\s*$/) {
699 $index =~ s/^\s*[-*+o.]?(?:\s+|\Z)//;
701 s/^\*(\s|\Z)/\\\(bu$1/;
702 if ($$self{WEIRDINDENT}) {
703 $self->output (".RE\n");
704 $$self{WEIRDINDENT} = 0;
706 $_ = $self->mapfonts ($_);
707 $self->output (switchquotes ('.Ip', $_, $$self{INDENT}));
708 $self->outindex ($index ? ('Item', $index) : ());
709 $$self{NEEDSPACE} = 0;
712 # Begin a block for a particular translator. Setting VERBATIM triggers
713 # special handling in textblock().
717 my ($kind) = /^(\S+)/ or return;
718 if ($kind eq 'man' || $kind eq 'roff') {
719 $$self{VERBATIM} = 1;
725 # End a block for a particular translator. We assume that all =begin/=end
726 # pairs are properly closed.
730 $$self{VERBATIM} = 0;
733 # One paragraph for a particular translator. Ignore it unless it's intended
734 # for man or roff, in which case we output it verbatim.
738 return unless s/^(?:man|roff)\b[ \t]*\n?//;
743 ############################################################################
745 ############################################################################
747 # Handle links. We can't actually make real hyperlinks, so this is all to
748 # figure out what text and formatting we print out.
753 # Smash whitespace in case we were split across multiple lines.
756 # If we were given any explicit text, just output it.
757 if (m{ ^ ([^|]+) \| }x) { return $1 }
759 # Okay, leading and trailing whitespace isn't important.
763 # Default to using the whole content of the link entry as a section
764 # name. Note that L<manpage/> forces a manpage interpretation, as does
765 # something looking like L<manpage(section)>. Do the same thing to
766 # L<manpage(section)> as we would to manpage(section) without the L<>;
767 # see guesswork(). If we've added italics, don't add the "manpage"
768 # text; markup is sufficient.
769 my ($manpage, $section) = ('', $_);
770 if (/^"\s*(.*?)\s*"$/) {
771 $section = '"' . $1 . '"';
772 } elsif (m{ ^ [-:.\w]+ (?: \( \S+ \) )? $ }x) {
773 ($manpage, $section) = ($_, '');
774 $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|('/e;
776 ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
777 if ($manpage =~ /^[-:.\w]+(?:\(\S+\))?$/) {
778 $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|'/e;
780 $section =~ s/^\"\s*//;
781 $section =~ s/\s*\"$//;
783 if ($manpage && $manpage !~ /\\f\(IS/) {
784 $manpage = "the $manpage manpage";
787 # Now build the actual output text.
789 if (!length ($section) && !length ($manpage)) {
790 carp "Invalid link $_";
791 } elsif (!length ($section)) {
793 } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
794 $text .= 'the ' . $section . ' entry';
795 $text .= (length $manpage) ? " in $manpage"
796 : " elsewhere in this document";
798 if ($section !~ /^".*"$/) { $section = '"' . $section . '"' }
799 $text .= 'the section on ' . $section;
800 $text .= " in $manpage" if length $manpage;
806 ############################################################################
807 # Escaping and fontification
808 ############################################################################
810 # At this point, we'll have embedded font codes of the form \f(<font>[SE]
811 # where <font> is one of B, I, or F. Turn those into the right font start
812 # or end codes. B<someI<thing> else> should map to \fBsome\f(BIthing\fB
813 # else\fR. The old pod2man didn't get this right; the second \fB was \fR,
814 # so nested sequences didn't work right. We take care of this by using
815 # variables as a combined pointer to our current font sequence, and set each
816 # to the number of current nestings of start tags for that font. Use them
817 # as a vector to look up what font sequence to use.
822 my ($fixed, $bold, $italic) = (0, 0, 0);
823 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
825 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
826 $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
832 ############################################################################
833 # *roff-specific parsing
834 ############################################################################
836 # Called instead of parse_text, calls parse_text with the right flags.
839 $self->parse_text ({ -expand_seq => 'sequence',
840 -expand_ptree => 'collapse' }, @_);
843 # Takes a parse tree and a flag saying whether or not to treat it as literal
844 # text (not call guesswork on it), and returns the concatenation of all of
845 # the text strings in that parse tree. If the literal flag isn't true,
846 # guesswork() will be called on all plain scalars in the parse tree.
847 # Otherwise, just escape backslashes in the normal case. If collapse is
848 # being called on a C<> sequence, literal is set to 2, and we do some
849 # additional cleanup. Assumes that everything in the parse tree is either a
850 # scalar or a reference to a scalar.
852 my ($self, $ptree, $literal) = @_;
854 return join ('', map {
859 s/-/\\-/g if $literal > 1;
860 s/__/_\\|_/g if $literal > 1;
865 return join ('', map {
866 ref ($_) ? $$_ : $self->guesswork ($_)
871 # Takes a text block to perform guesswork on; this is guaranteed not to
872 # contain any interior sequences. Returns the text block with remapping
878 # rofficate backslashes.
881 # Ensure double underbars have a tiny space between them.
884 # Make all caps a little smaller. Be careful here, since we don't want
885 # to make @ARGV into small caps, nor do we want to fix the MIME in
886 # MIME-Version, since it looks weird with the full-height V.
888 ( ^ | [\s\(\"\'\`\[\{<>] )
889 ( [A-Z] [A-Z] [/A-Z+:\d_\$&-]* )
890 (?: (?= [\s>\}\]\)\'\".?!,;:] | -- ) | $ )
891 } { $1 . '\s-1' . $2 . '\s0' }egx;
893 # Turn PI into a pretty pi.
894 s{ (?: \\s-1 | \b ) PI (?: \\s0 | \b ) } {\\*\(PI}gx;
896 # Italize functions in the form func().
900 [:\w]+ (?:\\s-1)? \(\)
902 } { '\f(IS' . $1 . '\f(IE' }egx;
904 # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n).
907 (\w[-:.\w]+ (?:\\s-1)?)
911 } { '\f(IS' . $1 . '\f(IE\|' . $2 }egx;
913 # Convert simple Perl variable references to a fixed-width font.
918 } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
920 # Translate -- into a real em dash if it's used like one and fix up
921 # dashes, but keep hyphens hyphens.
922 s{ (\G|^|.) (-+) (\b|.) } {
923 my ($pre, $dash, $post) = ($1, $2, $3);
924 if (length ($dash) == 1) {
925 ($pre =~ /[a-zA-Z]/) ? "$pre-$post" : "$pre\\-$post";
926 } elsif (length ($dash) == 2
927 && ((!$pre && !$post)
928 || ($pre =~ /\w/ && !$post)
929 || ($pre eq ' ' && $post eq ' ')
930 || ($pre eq '=' && $post ne '=')
931 || ($pre ne '=' && $post eq '='))) {
934 $pre . ('\-' x length $dash) . $post;
938 # Fix up double quotes.
939 s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
941 # Make C++ into \*(C+, which is a squinched version.
942 s{ \b C\+\+ } {\\*\(C+}gx;
949 ############################################################################
951 ############################################################################
953 # Make vertical whitespace.
956 $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n");
959 # Output any pending index entries, and optionally an index entry given as
960 # an argument. Support multiple index entries in X<> separated by slashes,
961 # and strip special escapes from index entries.
963 my ($self, $section, $index) = @_;
964 my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
965 return unless ($section || @entries);
969 my $output = '.IX Xref "'
970 . join (' ', map { s/\"/\"\"/; $_ } @entries)
974 $index =~ s/\"/\"\"/;
976 $index =~ s/\\(?:s-?\d|.\(..|.)//g;
977 $output .= ".IX $section " . '"' . $index . '"' . "\n";
979 $self->output ($output);
982 # Output text to the output device.
983 sub output { print { $_[0]->output_handle } $_[1] }
987 .\" These are some extra bits of roff that I don't want to lose track of
988 .\" but that have been removed from the preamble to make it a bit shorter
989 .\" since they're not currently being used. They're accents and special
990 .\" characters we don't currently have escapes for.
997 . ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
998 . ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
999 . ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
1001 .ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
1002 .ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
1003 .ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
1004 .ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
1005 .ds oe o\h'-(\w'o'u*4/10)'e
1006 .ds Oe O\h'-(\w'O'u*4/10)'E
1007 .if \n(.H>23 .if \n(.V>19 \
1009 . ds v \h'-1'\o'\(aa\(ga'
1017 ############################################################################
1019 ############################################################################
1023 Pod::Man - Convert POD data to formatted *roff input
1028 my $parser = Pod::Man->new (release => $VERSION, section => 8);
1030 # Read POD from STDIN and write to STDOUT.
1031 $parser->parse_from_filehandle;
1033 # Read POD from file.pod and write to file.1.
1034 $parser->parse_from_file ('file.pod', 'file.1');
1038 Pod::Man is a module to convert documentation in the POD format (the
1039 preferred language for documenting Perl) into *roff input using the man
1040 macro set. The resulting *roff code is suitable for display on a terminal
1041 using nroff(1), normally via man(1), or printing using troff(1). It is
1042 conventionally invoked using the driver script B<pod2man>, but it can also
1045 As a derived class from Pod::Parser, Pod::Man supports the same methods and
1046 interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
1047 new parser with C<Pod::Man-E<gt>new()> and then calls either
1048 parse_from_filehandle() or parse_from_file().
1050 new() can take options, in the form of key/value pairs that control the
1051 behavior of the parser. See below for details.
1053 If no options are given, Pod::Man uses the name of the input file with any
1054 trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1055 section 1 unless the file ended in C<.pm> in which case it defaults to
1056 section 3, to a centered title of "User Contributed Perl Documentation", to
1057 a centered footer of the Perl version it is run with, and to a left-hand
1058 footer of the modification date of its input (or the current date if given
1061 Pod::Man assumes that your *roff formatters have a fixed-width font named
1062 CW. If yours is called something else (like CR), use the C<fixed> option to
1063 specify it. This generally only matters for troff output for printing.
1064 Similarly, you can set the fonts used for bold, italic, and bold italic
1067 Besides the obvious pod conversions, Pod::Man also takes care of formatting
1068 func(), func(n), and simple variable references like $foo or @bar so you
1069 don't have to use code escapes for them; complex expressions like
1070 C<$fred{'stuff'}> will still need to be escaped, though. It also translates
1071 dashes that aren't used as hyphens into en dashes, makes long dashes--like
1072 this--into proper em dashes, fixes "paired quotes," makes C++ and PI look
1073 right, puts a little space between double underbars, makes ALLCAPS a teeny
1074 bit smaller in troff(1), and escapes stuff that *roff treats as special so
1075 that you don't have to.
1077 The recognized options to new() are as follows. All options take a single
1084 Sets the centered page header to use instead of "User Contributed Perl
1089 Sets the left-hand footer. By default, the modification date of the input
1090 file will be used, or the current date if stat() can't find that file (the
1091 case if the input is from STDIN), and the date will be formatted as
1096 The fixed-width font to use for vertabim text and code. Defaults to CW.
1097 Some systems may want CR instead. Only matters for troff(1) output.
1101 Bold version of the fixed-width font. Defaults to CB. Only matters for
1106 Italic version of the fixed-width font (actually, something of a misnomer,
1107 since most fixed-width fonts only have an oblique version, not an italic
1108 version). Defaults to CI. Only matters for troff(1) output.
1110 =item fixedbolditalic
1112 Bold italic (probably actually oblique) version of the fixed-width font.
1113 Pod::Man doesn't assume you have this, and defaults to CB. Some systems
1114 (such as Solaris) have this font available as CX. Only matters for troff(1)
1119 Set the centered footer. By default, this is the version of Perl you run
1120 Pod::Man under. Note that some system an macro sets assume that the
1121 centered footer will be a modification date and will prepend something like
1122 "Last modified: "; if this is the case, you may want to set C<release> to
1123 the last modified date and C<date> to the version number.
1127 Set the section for the C<.TH> macro. The standard section numbering
1128 convention is to use 1 for user commands, 2 for system calls, 3 for
1129 functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1130 miscellaneous information, and 8 for administrator commands. There is a lot
1131 of variation here, however; some systems (like Solaris) use 4 for file
1132 formats, 5 for miscellaneous information, and 7 for devices. Still others
1133 use 1m instead of 8, or some mix of both. About the only section numbers
1134 that are reliably consistent are 1, 2, and 3.
1136 By default, section 1 will be used unless the file ends in .pm in which case
1137 section 3 will be selected.
1141 The standard Pod::Parser method parse_from_filehandle() takes up to two
1142 arguments, the first being the file handle to read POD from and the second
1143 being the file handle to write the formatted output to. The first defaults
1144 to STDIN if not given, and the second defaults to STDOUT. The method
1145 parse_from_file() is almost identical, except that its two arguments are the
1146 input and output disk files instead. See L<Pod::Parser> for the specific
1153 =item roff font should be 1 or 2 chars, not `%s'
1155 (F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1156 wasn't either one or two characters. Pod::Man doesn't support *roff fonts
1157 longer than two characters, although some *roff extensions do (the canonical
1158 versions of nroff(1) and troff(1) don't either).
1160 =item Invalid link %s
1162 (W) The POD source contained a C<LE<lt>E<gt>> sequence that Pod::Man was
1163 unable to parse. You should never see this error message; it probably
1164 indicates a bug in Pod::Man.
1166 =item Unknown escape EE<lt>%sE<gt>
1168 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1169 know about. C<EE<lt>%sE<gt>> was printed verbatim in the output.
1171 =item Unknown sequence %s
1173 (W) The POD source contained a non-standard interior sequence (something of
1174 the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored.
1176 =item %s: Unknown command paragraph "%s" on line %d.
1178 (W) The POD source contained a non-standard command paragraph (something of
1179 the form C<=command args>) that Pod::Man didn't know about. It was ignored.
1181 =item Unmatched =back
1183 (W) Pod::Man encountered a C<=back> command that didn't correspond to an
1190 The lint-like features and strict POD format checking done by B<pod2man> are
1191 not yet implemented and should be, along with the corresponding C<lax>
1194 The NAME section should be recognized specially and index entries emitted
1195 for everything in that section. This would have to be deferred until the
1196 next section, since extraneous things in NAME tends to confuse various man
1199 The handling of hyphens, en dashes, and em dashes is somewhat fragile, and
1200 one may get the wrong one under some circumstances. This should only matter
1201 for troff(1) output.
1203 When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1204 necessarily get it right.
1206 Pod::Man doesn't handle font names longer than two characters. Neither do
1207 most troff(1) implementations, but GNU troff does as an extension. It would
1208 be nice to support as an option for those who want to use it.
1210 The preamble added to each output file is rather verbose, and most of it is
1211 only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1212 characters. It would ideally be nice if all of those definitions were only
1213 output if needed, perhaps on the fly as the characters are used.
1215 Some of the automagic applied to file names assumes Unix directory
1218 Pod::Man is excessively slow.
1222 L<Pod::Parser|Pod::Parser>, perlpod(1), pod2man(1), nroff(1), troff(1),
1225 Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
1226 Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
1227 the best documentation of standard nroff(1) and troff(1). At the time of
1228 this writing, it's available at http://www.cs.bell-labs.com/cm/cs/cstr.html.
1230 The man page documenting the man macro set may be man(5) instead of man(7)
1231 on your system. Also, please see pod2man(1) for extensive documentation on
1232 writing manual pages if you've not done it before and aren't familiar with
1237 Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
1238 original B<pod2man> by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>.