| 1 | # Pod::Man -- Convert POD data to formatted *roff input. |
| 2 | # $Id: Man.pm,v 2.16 2007-11-29 01:35:53 eagle Exp $ |
| 3 | # |
| 4 | # Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 |
| 5 | # Russ Allbery <rra@stanford.edu> |
| 6 | # Substantial contributions by Sean Burke <sburke@cpan.org> |
| 7 | # |
| 8 | # This program is free software; you may redistribute it and/or modify it |
| 9 | # under the same terms as Perl itself. |
| 10 | # |
| 11 | # This module translates POD documentation into *roff markup using the man |
| 12 | # macro set, and is intended for converting POD documents written as Unix |
| 13 | # manual pages to manual pages that can be read by the man(1) command. It is |
| 14 | # a replacement for the pod2man command distributed with versions of Perl |
| 15 | # prior to 5.6. |
| 16 | # |
| 17 | # Perl core hackers, please note that this module is also separately |
| 18 | # maintained outside of the Perl core as part of the podlators. Please send |
| 19 | # me any patches at the address above in addition to sending them to the |
| 20 | # standard Perl mailing lists. |
| 21 | |
| 22 | ############################################################################## |
| 23 | # Modules and declarations |
| 24 | ############################################################################## |
| 25 | |
| 26 | package Pod::Man; |
| 27 | |
| 28 | require 5.005; |
| 29 | |
| 30 | use strict; |
| 31 | use subs qw(makespace); |
| 32 | use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION); |
| 33 | |
| 34 | use Carp qw(croak); |
| 35 | use Pod::Simple (); |
| 36 | use POSIX qw(strftime); |
| 37 | |
| 38 | @ISA = qw(Pod::Simple); |
| 39 | |
| 40 | # Don't use the CVS revision as the version, since this module is also in Perl |
| 41 | # core and too many things could munge CVS magic revision strings. This |
| 42 | # number should ideally be the same as the CVS revision in podlators, however. |
| 43 | $VERSION = '2.16'; |
| 44 | |
| 45 | # Set the debugging level. If someone has inserted a debug function into this |
| 46 | # class already, use that. Otherwise, use any Pod::Simple debug function |
| 47 | # that's defined, and failing that, define a debug level of 10. |
| 48 | BEGIN { |
| 49 | my $parent = defined (&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG : undef; |
| 50 | unless (defined &DEBUG) { |
| 51 | *DEBUG = $parent || sub () { 10 }; |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | # Import the ASCII constant from Pod::Simple. This is true iff we're in an |
| 56 | # ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is |
| 57 | # generally only false for EBCDIC. |
| 58 | BEGIN { *ASCII = \&Pod::Simple::ASCII } |
| 59 | |
| 60 | # Pretty-print a data structure. Only used for debugging. |
| 61 | BEGIN { *pretty = \&Pod::Simple::pretty } |
| 62 | |
| 63 | ############################################################################## |
| 64 | # Object initialization |
| 65 | ############################################################################## |
| 66 | |
| 67 | # Initialize the object and set various Pod::Simple options that we need. |
| 68 | # Here, we also process any additional options passed to the constructor or |
| 69 | # set up defaults if none were given. Note that all internal object keys are |
| 70 | # in all-caps, reserving all lower-case object keys for Pod::Simple and user |
| 71 | # arguments. |
| 72 | sub new { |
| 73 | my $class = shift; |
| 74 | my $self = $class->SUPER::new; |
| 75 | |
| 76 | # Tell Pod::Simple to handle S<> by automatically inserting . |
| 77 | $self->nbsp_for_S (1); |
| 78 | |
| 79 | # Tell Pod::Simple to keep whitespace whenever possible. |
| 80 | if ($self->can ('preserve_whitespace')) { |
| 81 | $self->preserve_whitespace (1); |
| 82 | } else { |
| 83 | $self->fullstop_space_harden (1); |
| 84 | } |
| 85 | |
| 86 | # The =for and =begin targets that we accept. |
| 87 | $self->accept_targets (qw/man MAN roff ROFF/); |
| 88 | |
| 89 | # Ensure that contiguous blocks of code are merged together. Otherwise, |
| 90 | # some of the guesswork heuristics don't work right. |
| 91 | $self->merge_text (1); |
| 92 | |
| 93 | # Pod::Simple doesn't do anything useful with our arguments, but we want |
| 94 | # to put them in our object as hash keys and values. This could cause |
| 95 | # problems if we ever clash with Pod::Simple's own internal class |
| 96 | # variables. |
| 97 | %$self = (%$self, @_); |
| 98 | |
| 99 | # Initialize various other internal constants based on our arguments. |
| 100 | $self->init_fonts; |
| 101 | $self->init_quotes; |
| 102 | $self->init_page; |
| 103 | |
| 104 | # For right now, default to turning on all of the magic. |
| 105 | $$self{MAGIC_CPP} = 1; |
| 106 | $$self{MAGIC_EMDASH} = 1; |
| 107 | $$self{MAGIC_FUNC} = 1; |
| 108 | $$self{MAGIC_MANREF} = 1; |
| 109 | $$self{MAGIC_SMALLCAPS} = 1; |
| 110 | $$self{MAGIC_VARS} = 1; |
| 111 | |
| 112 | return $self; |
| 113 | } |
| 114 | |
| 115 | # Translate a font string into an escape. |
| 116 | sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] } |
| 117 | |
| 118 | # Determine which fonts the user wishes to use and store them in the object. |
| 119 | # Regular, italic, bold, and bold-italic are constants, but the fixed width |
| 120 | # fonts may be set by the user. Sets the internal hash key FONTS which is |
| 121 | # used to map our internal font escapes to actual *roff sequences later. |
| 122 | sub init_fonts { |
| 123 | my ($self) = @_; |
| 124 | |
| 125 | # Figure out the fixed-width font. If user-supplied, make sure that they |
| 126 | # are the right length. |
| 127 | for (qw/fixed fixedbold fixeditalic fixedbolditalic/) { |
| 128 | my $font = $$self{$_}; |
| 129 | if (defined ($font) && (length ($font) < 1 || length ($font) > 2)) { |
| 130 | croak qq(roff font should be 1 or 2 chars, not "$font"); |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | # Set the default fonts. We can't be sure portably across different |
| 135 | # implementations what fixed bold-italic may be called (if it's even |
| 136 | # available), so default to just bold. |
| 137 | $$self{fixed} ||= 'CW'; |
| 138 | $$self{fixedbold} ||= 'CB'; |
| 139 | $$self{fixeditalic} ||= 'CI'; |
| 140 | $$self{fixedbolditalic} ||= 'CB'; |
| 141 | |
| 142 | # Set up a table of font escapes. First number is fixed-width, second is |
| 143 | # bold, third is italic. |
| 144 | $$self{FONTS} = { '000' => '\fR', '001' => '\fI', |
| 145 | '010' => '\fB', '011' => '\f(BI', |
| 146 | '100' => toescape ($$self{fixed}), |
| 147 | '101' => toescape ($$self{fixeditalic}), |
| 148 | '110' => toescape ($$self{fixedbold}), |
| 149 | '111' => toescape ($$self{fixedbolditalic}) }; |
| 150 | } |
| 151 | |
| 152 | # Initialize the quotes that we'll be using for C<> text. This requires some |
| 153 | # special handling, both to parse the user parameter if given and to make sure |
| 154 | # that the quotes will be safe against *roff. Sets the internal hash keys |
| 155 | # LQUOTE and RQUOTE. |
| 156 | sub init_quotes { |
| 157 | my ($self) = (@_); |
| 158 | |
| 159 | $$self{quotes} ||= '"'; |
| 160 | if ($$self{quotes} eq 'none') { |
| 161 | $$self{LQUOTE} = $$self{RQUOTE} = ''; |
| 162 | } elsif (length ($$self{quotes}) == 1) { |
| 163 | $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes}; |
| 164 | } elsif ($$self{quotes} =~ /^(.)(.)$/ |
| 165 | || $$self{quotes} =~ /^(..)(..)$/) { |
| 166 | $$self{LQUOTE} = $1; |
| 167 | $$self{RQUOTE} = $2; |
| 168 | } else { |
| 169 | croak(qq(Invalid quote specification "$$self{quotes}")) |
| 170 | } |
| 171 | |
| 172 | # Double the first quote; note that this should not be s///g as two double |
| 173 | # quotes is represented in *roff as three double quotes, not four. Weird, |
| 174 | # I know. |
| 175 | $$self{LQUOTE} =~ s/\"/\"\"/; |
| 176 | $$self{RQUOTE} =~ s/\"/\"\"/; |
| 177 | } |
| 178 | |
| 179 | # Initialize the page title information and indentation from our arguments. |
| 180 | sub init_page { |
| 181 | my ($self) = @_; |
| 182 | |
| 183 | # We used to try first to get the version number from a local binary, but |
| 184 | # we shouldn't need that any more. Get the version from the running Perl. |
| 185 | # Work a little magic to handle subversions correctly under both the |
| 186 | # pre-5.6 and the post-5.6 version numbering schemes. |
| 187 | my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/); |
| 188 | $version[2] ||= 0; |
| 189 | $version[2] *= 10 ** (3 - length $version[2]); |
| 190 | for (@version) { $_ += 0 } |
| 191 | my $version = join ('.', @version); |
| 192 | |
| 193 | # Set the defaults for page titles and indentation if the user didn't |
| 194 | # override anything. |
| 195 | $$self{center} = 'User Contributed Perl Documentation' |
| 196 | unless defined $$self{center}; |
| 197 | $$self{release} = 'perl v' . $version |
| 198 | unless defined $$self{release}; |
| 199 | $$self{indent} = 4 |
| 200 | unless defined $$self{indent}; |
| 201 | |
| 202 | # Double quotes in things that will be quoted. |
| 203 | for (qw/center release/) { |
| 204 | $$self{$_} =~ s/\"/\"\"/g if $$self{$_}; |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | ############################################################################## |
| 209 | # Core parsing |
| 210 | ############################################################################## |
| 211 | |
| 212 | # This is the glue that connects the code below with Pod::Simple itself. The |
| 213 | # goal is to convert the event stream coming from the POD parser into method |
| 214 | # calls to handlers once the complete content of a tag has been seen. Each |
| 215 | # paragraph or POD command will have textual content associated with it, and |
| 216 | # as soon as all of a paragraph or POD command has been seen, that content |
| 217 | # will be passed in to the corresponding method for handling that type of |
| 218 | # object. The exceptions are handlers for lists, which have opening tag |
| 219 | # handlers and closing tag handlers that will be called right away. |
| 220 | # |
| 221 | # The internal hash key PENDING is used to store the contents of a tag until |
| 222 | # all of it has been seen. It holds a stack of open tags, each one |
| 223 | # represented by a tuple of the attributes hash for the tag, formatting |
| 224 | # options for the tag (which are inherited), and the contents of the tag. |
| 225 | |
| 226 | # Add a block of text to the contents of the current node, formatting it |
| 227 | # according to the current formatting instructions as we do. |
| 228 | sub _handle_text { |
| 229 | my ($self, $text) = @_; |
| 230 | DEBUG > 3 and print "== $text\n"; |
| 231 | my $tag = $$self{PENDING}[-1]; |
| 232 | $$tag[2] .= $self->format_text ($$tag[1], $text); |
| 233 | } |
| 234 | |
| 235 | # Given an element name, get the corresponding method name. |
| 236 | sub method_for_element { |
| 237 | my ($self, $element) = @_; |
| 238 | $element =~ tr/-/_/; |
| 239 | $element =~ tr/A-Z/a-z/; |
| 240 | $element =~ tr/_a-z0-9//cd; |
| 241 | return $element; |
| 242 | } |
| 243 | |
| 244 | # Handle the start of a new element. If cmd_element is defined, assume that |
| 245 | # we need to collect the entire tree for this element before passing it to the |
| 246 | # element method, and create a new tree into which we'll collect blocks of |
| 247 | # text and nested elements. Otherwise, if start_element is defined, call it. |
| 248 | sub _handle_element_start { |
| 249 | my ($self, $element, $attrs) = @_; |
| 250 | DEBUG > 3 and print "++ $element (<", join ('> <', %$attrs), ">)\n"; |
| 251 | my $method = $self->method_for_element ($element); |
| 252 | |
| 253 | # If we have a command handler, we need to accumulate the contents of the |
| 254 | # tag before calling it. Turn off IN_NAME for any command other than |
| 255 | # <Para> so that IN_NAME isn't still set for the first heading after the |
| 256 | # NAME heading. |
| 257 | if ($self->can ("cmd_$method")) { |
| 258 | DEBUG > 2 and print "<$element> starts saving a tag\n"; |
| 259 | $$self{IN_NAME} = 0 if ($element ne 'Para'); |
| 260 | |
| 261 | # How we're going to format embedded text blocks depends on the tag |
| 262 | # and also depends on our parent tags. Thankfully, inside tags that |
| 263 | # turn off guesswork and reformatting, nothing else can turn it back |
| 264 | # on, so this can be strictly inherited. |
| 265 | my $formatting = $$self{PENDING}[-1][1]; |
| 266 | $formatting = $self->formatting ($formatting, $element); |
| 267 | push (@{ $$self{PENDING} }, [ $attrs, $formatting, '' ]); |
| 268 | DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n"; |
| 269 | } elsif ($self->can ("start_$method")) { |
| 270 | my $method = 'start_' . $method; |
| 271 | $self->$method ($attrs, ''); |
| 272 | } else { |
| 273 | DEBUG > 2 and print "No $method start method, skipping\n"; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | # Handle the end of an element. If we had a cmd_ method for this element, |
| 278 | # this is where we pass along the tree that we built. Otherwise, if we have |
| 279 | # an end_ method for the element, call that. |
| 280 | sub _handle_element_end { |
| 281 | my ($self, $element) = @_; |
| 282 | DEBUG > 3 and print "-- $element\n"; |
| 283 | my $method = $self->method_for_element ($element); |
| 284 | |
| 285 | # If we have a command handler, pull off the pending text and pass it to |
| 286 | # the handler along with the saved attribute hash. |
| 287 | if ($self->can ("cmd_$method")) { |
| 288 | DEBUG > 2 and print "</$element> stops saving a tag\n"; |
| 289 | my $tag = pop @{ $$self{PENDING} }; |
| 290 | DEBUG > 4 and print "Popped: [", pretty ($tag), "]\n"; |
| 291 | DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n"; |
| 292 | my $method = 'cmd_' . $method; |
| 293 | my $text = $self->$method ($$tag[0], $$tag[2]); |
| 294 | if (defined $text) { |
| 295 | if (@{ $$self{PENDING} } > 1) { |
| 296 | $$self{PENDING}[-1][2] .= $text; |
| 297 | } else { |
| 298 | $self->output ($text); |
| 299 | } |
| 300 | } |
| 301 | } elsif ($self->can ("end_$method")) { |
| 302 | my $method = 'end_' . $method; |
| 303 | $self->$method (); |
| 304 | } else { |
| 305 | DEBUG > 2 and print "No $method end method, skipping\n"; |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | ############################################################################## |
| 310 | # General formatting |
| 311 | ############################################################################## |
| 312 | |
| 313 | # Return formatting instructions for a new block. Takes the current |
| 314 | # formatting and the new element. Formatting inherits negatively, in the |
| 315 | # sense that if the parent has turned off guesswork, all child elements should |
| 316 | # leave it off. We therefore return a copy of the same formatting |
| 317 | # instructions but possibly with more things turned off depending on the |
| 318 | # element. |
| 319 | sub formatting { |
| 320 | my ($self, $current, $element) = @_; |
| 321 | my %options; |
| 322 | if ($current) { |
| 323 | %options = %$current; |
| 324 | } else { |
| 325 | %options = (guesswork => 1, cleanup => 1, convert => 1); |
| 326 | } |
| 327 | if ($element eq 'Data') { |
| 328 | $options{guesswork} = 0; |
| 329 | $options{cleanup} = 0; |
| 330 | $options{convert} = 0; |
| 331 | } elsif ($element eq 'X') { |
| 332 | $options{guesswork} = 0; |
| 333 | $options{cleanup} = 0; |
| 334 | } elsif ($element eq 'Verbatim' || $element eq 'C') { |
| 335 | $options{guesswork} = 0; |
| 336 | $options{literal} = 1; |
| 337 | } |
| 338 | return \%options; |
| 339 | } |
| 340 | |
| 341 | # Format a text block. Takes a hash of formatting options and the text to |
| 342 | # format. Currently, the only formatting options are guesswork, cleanup, and |
| 343 | # convert, all of which are boolean. |
| 344 | sub format_text { |
| 345 | my ($self, $options, $text) = @_; |
| 346 | my $guesswork = $$options{guesswork} && !$$self{IN_NAME}; |
| 347 | my $cleanup = $$options{cleanup}; |
| 348 | my $convert = $$options{convert}; |
| 349 | my $literal = $$options{literal}; |
| 350 | |
| 351 | # Normally we do character translation, but we won't even do that in |
| 352 | # <Data> blocks. |
| 353 | if ($convert) { |
| 354 | if (ASCII) { |
| 355 | $text =~ s/(\\|[^\x00-\x7F])/$ESCAPES{ord ($1)} || "X"/eg; |
| 356 | } else { |
| 357 | $text =~ s/(\\)/$ESCAPES{ord ($1)} || "X"/eg; |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | # Cleanup just tidies up a few things, telling *roff that the hyphens are |
| 362 | # hard and putting a bit of space between consecutive underscores. |
| 363 | if ($cleanup) { |
| 364 | $text =~ s/-/\\-/g; |
| 365 | $text =~ s/_(?=_)/_\\|/g; |
| 366 | } |
| 367 | |
| 368 | # Ensure that *roff doesn't convert literal quotes to UTF-8 single quotes, |
| 369 | # but don't mess up our accept escapes. |
| 370 | if ($literal) { |
| 371 | $text =~ s/(?<!\\\*)\'/\\*\(Aq/g; |
| 372 | $text =~ s/(?<!\\\*)\`/\\\`/g; |
| 373 | } |
| 374 | |
| 375 | # If guesswork is asked for, do that. This involves more substantial |
| 376 | # formatting based on various heuristics that may only be appropriate for |
| 377 | # particular documents. |
| 378 | if ($guesswork) { |
| 379 | $text = $self->guesswork ($text); |
| 380 | } |
| 381 | |
| 382 | return $text; |
| 383 | } |
| 384 | |
| 385 | # Handles C<> text, deciding whether to put \*C` around it or not. This is a |
| 386 | # whole bunch of messy heuristics to try to avoid overquoting, originally from |
| 387 | # Barrie Slaymaker. This largely duplicates similar code in Pod::Text. |
| 388 | sub quote_literal { |
| 389 | my $self = shift; |
| 390 | local $_ = shift; |
| 391 | |
| 392 | # A regex that matches the portion of a variable reference that's the |
| 393 | # array or hash index, separated out just because we want to use it in |
| 394 | # several places in the following regex. |
| 395 | my $index = '(?: \[.*\] | \{.*\} )?'; |
| 396 | |
| 397 | # Check for things that we don't want to quote, and if we find any of |
| 398 | # them, return the string with just a font change and no quoting. |
| 399 | m{ |
| 400 | ^\s* |
| 401 | (?: |
| 402 | ( [\'\`\"] ) .* \1 # already quoted |
| 403 | | \\\*\(Aq .* \\\*\(Aq # quoted and escaped |
| 404 | | \\?\` .* ( \' | \\\*\(Aq ) # `quoted' |
| 405 | | \$+ [\#^]? \S $index # special ($^Foo, $") |
| 406 | | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func |
| 407 | | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call |
| 408 | | [-+]? ( \d[\d.]* | \.\d+ ) (?: [eE][-+]?\d+ )? # a number |
| 409 | | 0x [a-fA-F\d]+ # a hex constant |
| 410 | ) |
| 411 | \s*\z |
| 412 | }xso and return '\f(FS' . $_ . '\f(FE'; |
| 413 | |
| 414 | # If we didn't return, go ahead and quote the text. |
| 415 | return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"; |
| 416 | } |
| 417 | |
| 418 | # Takes a text block to perform guesswork on. Returns the text block with |
| 419 | # formatting codes added. This is the code that marks up various Perl |
| 420 | # constructs and things commonly used in man pages without requiring the user |
| 421 | # to add any explicit markup, and is applied to all non-literal text. We're |
| 422 | # guaranteed that the text we're applying guesswork to does not contain any |
| 423 | # *roff formatting codes. Note that the inserted font sequences must be |
| 424 | # treated later with mapfonts or textmapfonts. |
| 425 | # |
| 426 | # This method is very fragile, both in the regular expressions it uses and in |
| 427 | # the ordering of those modifications. Care and testing is required when |
| 428 | # modifying it. |
| 429 | sub guesswork { |
| 430 | my $self = shift; |
| 431 | local $_ = shift; |
| 432 | DEBUG > 5 and print " Guesswork called on [$_]\n"; |
| 433 | |
| 434 | # By the time we reach this point, all hypens will be escaped by adding a |
| 435 | # backslash. We want to undo that escaping if they're part of regular |
| 436 | # words and there's only a single dash, since that's a real hyphen that |
| 437 | # *roff gets to consider a possible break point. Make sure that a dash |
| 438 | # after the first character of a word stays non-breaking, however. |
| 439 | # |
| 440 | # Note that this is not user-controllable; we pretty much have to do this |
| 441 | # transformation or *roff will mangle the output in unacceptable ways. |
| 442 | s{ |
| 443 | ( (?:\G|^|\s) [\(\"]* [a-zA-Z] ) ( \\- )? |
| 444 | ( (?: [a-zA-Z\']+ \\-)+ ) |
| 445 | ( [a-zA-Z\']+ ) (?= [\)\".?!,;:]* (?:\s|\Z|\\\ ) ) |
| 446 | \b |
| 447 | } { |
| 448 | my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4); |
| 449 | $hyphen ||= ''; |
| 450 | $main =~ s/\\-/-/g; |
| 451 | $prefix . $hyphen . $main . $suffix; |
| 452 | }egx; |
| 453 | |
| 454 | # Translate "--" into a real em-dash if it's used like one. This means |
| 455 | # that it's either surrounded by whitespace, it follows a regular word, or |
| 456 | # it occurs between two regular words. |
| 457 | if ($$self{MAGIC_EMDASH}) { |
| 458 | s{ (\s) \\-\\- (\s) } { $1 . '\*(--' . $2 }egx; |
| 459 | s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx; |
| 460 | } |
| 461 | |
| 462 | # Make words in all-caps a little bit smaller; they look better that way. |
| 463 | # However, we don't want to change Perl code (like @ARGV), nor do we want |
| 464 | # to fix the MIME in MIME-Version since it looks weird with the |
| 465 | # full-height V. |
| 466 | # |
| 467 | # We change only a string of all caps (2) either at the beginning of the |
| 468 | # line or following regular punctuation (like quotes) or whitespace (1), |
| 469 | # and followed by either similar punctuation, an em-dash, or the end of |
| 470 | # the line (3). |
| 471 | if ($$self{MAGIC_SMALLCAPS}) { |
| 472 | s{ |
| 473 | ( ^ | [\s\(\"\'\`\[\{<>] | \\\ ) # (1) |
| 474 | ( [A-Z] [A-Z] (?: [/A-Z+:\d_\$&] | \\- )* ) # (2) |
| 475 | (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | \\\ | $ ) # (3) |
| 476 | } { |
| 477 | $1 . '\s-1' . $2 . '\s0' |
| 478 | }egx; |
| 479 | } |
| 480 | |
| 481 | # Note that from this point forward, we have to adjust for \s-1 and \s-0 |
| 482 | # strings inserted around things that we've made small-caps if later |
| 483 | # transforms should work on those strings. |
| 484 | |
| 485 | # Italize functions in the form func(), including functions that are in |
| 486 | # all capitals, but don't italize if there's anything between the parens. |
| 487 | # The function must start with an alphabetic character or underscore and |
| 488 | # then consist of word characters or colons. |
| 489 | if ($$self{MAGIC_FUNC}) { |
| 490 | s{ |
| 491 | ( \b | \\s-1 ) |
| 492 | ( [A-Za-z_] ([:\w] | \\s-?[01])+ \(\) ) |
| 493 | } { |
| 494 | $1 . '\f(IS' . $2 . '\f(IE' |
| 495 | }egx; |
| 496 | } |
| 497 | |
| 498 | # Change references to manual pages to put the page name in italics but |
| 499 | # the number in the regular font, with a thin space between the name and |
| 500 | # the number. Only recognize func(n) where func starts with an alphabetic |
| 501 | # character or underscore and contains only word characters, periods (for |
| 502 | # configuration file man pages), or colons, and n is a single digit, |
| 503 | # optionally followed by some number of lowercase letters. Note that this |
| 504 | # does not recognize man page references like perl(l) or socket(3SOCKET). |
| 505 | if ($$self{MAGIC_MANREF}) { |
| 506 | s{ |
| 507 | ( \b | \\s-1 ) |
| 508 | ( [A-Za-z_] (?:[.:\w] | \\- | \\s-?[01])+ ) |
| 509 | ( \( \d [a-z]* \) ) |
| 510 | } { |
| 511 | $1 . '\f(IS' . $2 . '\f(IE\|' . $3 |
| 512 | }egx; |
| 513 | } |
| 514 | |
| 515 | # Convert simple Perl variable references to a fixed-width font. Be |
| 516 | # careful not to convert functions, though; there are too many subtleties |
| 517 | # with them to want to perform this transformation. |
| 518 | if ($$self{MAGIC_VARS}) { |
| 519 | s{ |
| 520 | ( ^ | \s+ ) |
| 521 | ( [\$\@%] [\w:]+ ) |
| 522 | (?! \( ) |
| 523 | } { |
| 524 | $1 . '\f(FS' . $2 . '\f(FE' |
| 525 | }egx; |
| 526 | } |
| 527 | |
| 528 | # Fix up double quotes. Unfortunately, we miss this transformation if the |
| 529 | # quoted text contains any code with formatting codes and there's not much |
| 530 | # we can effectively do about that, which makes it somewhat unclear if |
| 531 | # this is really a good idea. |
| 532 | s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx; |
| 533 | |
| 534 | # Make C++ into \*(C+, which is a squinched version. |
| 535 | if ($$self{MAGIC_CPP}) { |
| 536 | s{ \b C\+\+ } {\\*\(C+}gx; |
| 537 | } |
| 538 | |
| 539 | # Done. |
| 540 | DEBUG > 5 and print " Guesswork returning [$_]\n"; |
| 541 | return $_; |
| 542 | } |
| 543 | |
| 544 | ############################################################################## |
| 545 | # Output |
| 546 | ############################################################################## |
| 547 | |
| 548 | # When building up the *roff code, we don't use real *roff fonts. Instead, we |
| 549 | # embed font codes of the form \f(<font>[SE] where <font> is one of B, I, or |
| 550 | # F, S stands for start, and E stands for end. This method turns these into |
| 551 | # the right start and end codes. |
| 552 | # |
| 553 | # We add this level of complexity because the old pod2man didn't get code like |
| 554 | # B<someI<thing> else> right; after I<> it switched back to normal text rather |
| 555 | # than bold. We take care of this by using variables that state whether bold, |
| 556 | # italic, or fixed are turned on as a combined pointer to our current font |
| 557 | # sequence, and set each to the number of current nestings of start tags for |
| 558 | # that font. |
| 559 | # |
| 560 | # \fP changes to the previous font, but only one previous font is kept. We |
| 561 | # don't know what the outside level font is; normally it's R, but if we're |
| 562 | # inside a heading it could be something else. So arrange things so that the |
| 563 | # outside font is always the "previous" font and end with \fP instead of \fR. |
| 564 | # Idea from Zack Weinberg. |
| 565 | sub mapfonts { |
| 566 | my ($self, $text) = @_; |
| 567 | my ($fixed, $bold, $italic) = (0, 0, 0); |
| 568 | my %magic = (F => \$fixed, B => \$bold, I => \$italic); |
| 569 | my $last = '\fR'; |
| 570 | $text =~ s< |
| 571 | \\f\((.)(.) |
| 572 | > < |
| 573 | my $sequence = ''; |
| 574 | my $f; |
| 575 | if ($last ne '\fR') { $sequence = '\fP' } |
| 576 | ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1; |
| 577 | $f = $$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) }; |
| 578 | if ($f eq $last) { |
| 579 | ''; |
| 580 | } else { |
| 581 | if ($f ne '\fR') { $sequence .= $f } |
| 582 | $last = $f; |
| 583 | $sequence; |
| 584 | } |
| 585 | >gxe; |
| 586 | return $text; |
| 587 | } |
| 588 | |
| 589 | # Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU |
| 590 | # groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather |
| 591 | # than R, presumably because \f(CW doesn't actually do a font change. To work |
| 592 | # around this, use a separate textmapfonts for text blocks where the default |
| 593 | # font is always R and only use the smart mapfonts for headings. |
| 594 | sub textmapfonts { |
| 595 | my ($self, $text) = @_; |
| 596 | my ($fixed, $bold, $italic) = (0, 0, 0); |
| 597 | my %magic = (F => \$fixed, B => \$bold, I => \$italic); |
| 598 | $text =~ s< |
| 599 | \\f\((.)(.) |
| 600 | > < |
| 601 | ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1; |
| 602 | $$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) }; |
| 603 | >gxe; |
| 604 | return $text; |
| 605 | } |
| 606 | |
| 607 | # Given a command and a single argument that may or may not contain double |
| 608 | # quotes, handle double-quote formatting for it. If there are no double |
| 609 | # quotes, just return the command followed by the argument in double quotes. |
| 610 | # If there are double quotes, use an if statement to test for nroff, and for |
| 611 | # nroff output the command followed by the argument in double quotes with |
| 612 | # embedded double quotes doubled. For other formatters, remap paired double |
| 613 | # quotes to LQUOTE and RQUOTE. |
| 614 | sub switchquotes { |
| 615 | my ($self, $command, $text, $extra) = @_; |
| 616 | $text =~ s/\\\*\([LR]\"/\"/g; |
| 617 | |
| 618 | # We also have to deal with \*C` and \*C', which are used to add the |
| 619 | # quotes around C<> text, since they may expand to " and if they do this |
| 620 | # confuses the .SH macros and the like no end. Expand them ourselves. |
| 621 | # Also separate troff from nroff if there are any fixed-width fonts in use |
| 622 | # to work around problems with Solaris nroff. |
| 623 | my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/); |
| 624 | my $fixedpat = join '|', @{ $$self{FONTS} }{'100', '101', '110', '111'}; |
| 625 | $fixedpat =~ s/\\/\\\\/g; |
| 626 | $fixedpat =~ s/\(/\\\(/g; |
| 627 | if ($text =~ m/\"/ || $text =~ m/$fixedpat/) { |
| 628 | $text =~ s/\"/\"\"/g; |
| 629 | my $nroff = $text; |
| 630 | my $troff = $text; |
| 631 | $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g; |
| 632 | if ($c_is_quote and $text =~ m/\\\*\(C[\'\`]/) { |
| 633 | $nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g; |
| 634 | $nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g; |
| 635 | $troff =~ s/\\\*\(C[\'\`]//g; |
| 636 | } |
| 637 | $nroff = qq("$nroff") . ($extra ? " $extra" : ''); |
| 638 | $troff = qq("$troff") . ($extra ? " $extra" : ''); |
| 639 | |
| 640 | # Work around the Solaris nroff bug where \f(CW\fP leaves the font set |
| 641 | # to Roman rather than the actual previous font when used in headings. |
| 642 | # troff output may still be broken, but at least we can fix nroff by |
| 643 | # just switching the font changes to the non-fixed versions. |
| 644 | $nroff =~ s/\Q$$self{FONTS}{100}\E(.*)\\f[PR]/$1/g; |
| 645 | $nroff =~ s/\Q$$self{FONTS}{101}\E(.*)\\f([PR])/\\fI$1\\f$2/g; |
| 646 | $nroff =~ s/\Q$$self{FONTS}{110}\E(.*)\\f([PR])/\\fB$1\\f$2/g; |
| 647 | $nroff =~ s/\Q$$self{FONTS}{111}\E(.*)\\f([PR])/\\f\(BI$1\\f$2/g; |
| 648 | |
| 649 | # Now finally output the command. Bother with .ie only if the nroff |
| 650 | # and troff output aren't the same. |
| 651 | if ($nroff ne $troff) { |
| 652 | return ".ie n $command $nroff\n.el $command $troff\n"; |
| 653 | } else { |
| 654 | return "$command $nroff\n"; |
| 655 | } |
| 656 | } else { |
| 657 | $text = qq("$text") . ($extra ? " $extra" : ''); |
| 658 | return "$command $text\n"; |
| 659 | } |
| 660 | } |
| 661 | |
| 662 | # Protect leading quotes and periods against interpretation as commands. Also |
| 663 | # protect anything starting with a backslash, since it could expand or hide |
| 664 | # something that *roff would interpret as a command. This is overkill, but |
| 665 | # it's much simpler than trying to parse *roff here. |
| 666 | sub protect { |
| 667 | my ($self, $text) = @_; |
| 668 | $text =~ s/^([.\'\\])/\\&$1/mg; |
| 669 | return $text; |
| 670 | } |
| 671 | |
| 672 | # Make vertical whitespace if NEEDSPACE is set, appropriate to the indentation |
| 673 | # level the situation. This function is needed since in *roff one has to |
| 674 | # create vertical whitespace after paragraphs and between some things, but |
| 675 | # other macros create their own whitespace. Also close out a sequence of |
| 676 | # repeated =items, since calling makespace means we're about to begin the item |
| 677 | # body. |
| 678 | sub makespace { |
| 679 | my ($self) = @_; |
| 680 | $self->output (".PD\n") if $$self{ITEMS} > 1; |
| 681 | $$self{ITEMS} = 0; |
| 682 | $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n") |
| 683 | if $$self{NEEDSPACE}; |
| 684 | } |
| 685 | |
| 686 | # Output any pending index entries, and optionally an index entry given as an |
| 687 | # argument. Support multiple index entries in X<> separated by slashes, and |
| 688 | # strip special escapes from index entries. |
| 689 | sub outindex { |
| 690 | my ($self, $section, $index) = @_; |
| 691 | my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} }; |
| 692 | return unless ($section || @entries); |
| 693 | |
| 694 | # We're about to output all pending entries, so clear our pending queue. |
| 695 | $$self{INDEX} = []; |
| 696 | |
| 697 | # Build the output. Regular index entries are marked Xref, and headings |
| 698 | # pass in their own section. Undo some *roff formatting on headings. |
| 699 | my @output; |
| 700 | if (@entries) { |
| 701 | push @output, [ 'Xref', join (' ', @entries) ]; |
| 702 | } |
| 703 | if ($section) { |
| 704 | $index =~ s/\\-/-/g; |
| 705 | $index =~ s/\\(?:s-?\d|.\(..|.)//g; |
| 706 | push @output, [ $section, $index ]; |
| 707 | } |
| 708 | |
| 709 | # Print out the .IX commands. |
| 710 | for (@output) { |
| 711 | my ($type, $entry) = @$_; |
| 712 | $entry =~ s/\"/\"\"/g; |
| 713 | $self->output (".IX $type " . '"' . $entry . '"' . "\n"); |
| 714 | } |
| 715 | } |
| 716 | |
| 717 | # Output some text, without any additional changes. |
| 718 | sub output { |
| 719 | my ($self, @text) = @_; |
| 720 | print { $$self{output_fh} } @text; |
| 721 | } |
| 722 | |
| 723 | ############################################################################## |
| 724 | # Document initialization |
| 725 | ############################################################################## |
| 726 | |
| 727 | # Handle the start of the document. Here we handle empty documents, as well |
| 728 | # as setting up our basic macros in a preamble and building the page title. |
| 729 | sub start_document { |
| 730 | my ($self, $attrs) = @_; |
| 731 | if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) { |
| 732 | DEBUG and print "Document is contentless\n"; |
| 733 | $$self{CONTENTLESS} = 1; |
| 734 | return; |
| 735 | } |
| 736 | |
| 737 | # Determine information for the preamble and then output it. |
| 738 | my ($name, $section); |
| 739 | if (defined $$self{name}) { |
| 740 | $name = $$self{name}; |
| 741 | $section = $$self{section} || 1; |
| 742 | } else { |
| 743 | ($name, $section) = $self->devise_title; |
| 744 | } |
| 745 | my $date = $$self{date} || $self->devise_date; |
| 746 | $self->preamble ($name, $section, $date) |
| 747 | unless $self->bare_output or DEBUG > 9; |
| 748 | |
| 749 | # Initialize a few per-document variables. |
| 750 | $$self{INDENT} = 0; # Current indentation level. |
| 751 | $$self{INDENTS} = []; # Stack of indentations. |
| 752 | $$self{INDEX} = []; # Index keys waiting to be printed. |
| 753 | $$self{IN_NAME} = 0; # Whether processing the NAME section. |
| 754 | $$self{ITEMS} = 0; # The number of consecutive =items. |
| 755 | $$self{ITEMTYPES} = []; # Stack of =item types, one per list. |
| 756 | $$self{SHIFTWAIT} = 0; # Whether there is a shift waiting. |
| 757 | $$self{SHIFTS} = []; # Stack of .RS shifts. |
| 758 | $$self{PENDING} = [[]]; # Pending output. |
| 759 | } |
| 760 | |
| 761 | # Handle the end of the document. This does nothing but print out a final |
| 762 | # comment at the end of the document under debugging. |
| 763 | sub end_document { |
| 764 | my ($self) = @_; |
| 765 | return if $self->bare_output; |
| 766 | return if ($$self{CONTENTLESS} && !$$self{ALWAYS_EMIT_SOMETHING}); |
| 767 | $self->output (q(.\" [End document]) . "\n") if DEBUG; |
| 768 | } |
| 769 | |
| 770 | # Try to figure out the name and section from the file name and return them as |
| 771 | # a list, returning an empty name and section 1 if we can't find any better |
| 772 | # information. Uses File::Basename and File::Spec as necessary. |
| 773 | sub devise_title { |
| 774 | my ($self) = @_; |
| 775 | my $name = $self->source_filename || ''; |
| 776 | my $section = $$self{section} || 1; |
| 777 | $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i); |
| 778 | $name =~ s/\.p(od|[lm])\z//i; |
| 779 | |
| 780 | # If the section isn't 3, then the name defaults to just the basename of |
| 781 | # the file. Otherwise, assume we're dealing with a module. We want to |
| 782 | # figure out the full module name from the path to the file, but we don't |
| 783 | # want to include too much of the path into the module name. Lose |
| 784 | # anything up to the first off: |
| 785 | # |
| 786 | # */lib/*perl*/ standard or site_perl module |
| 787 | # */*perl*/lib/ from -Dprefix=/opt/perl |
| 788 | # */*perl*/ random module hierarchy |
| 789 | # |
| 790 | # which works. Also strip off a leading site, site_perl, or vendor_perl |
| 791 | # component, any OS-specific component, and any version number component, |
| 792 | # and strip off an initial component of "lib" or "blib/lib" since that's |
| 793 | # what ExtUtils::MakeMaker creates. splitdir requires at least File::Spec |
| 794 | # 0.8. |
| 795 | if ($section !~ /^3/) { |
| 796 | require File::Basename; |
| 797 | $name = uc File::Basename::basename ($name); |
| 798 | } else { |
| 799 | require File::Spec; |
| 800 | my ($volume, $dirs, $file) = File::Spec->splitpath ($name); |
| 801 | my @dirs = File::Spec->splitdir ($dirs); |
| 802 | my $cut = 0; |
| 803 | my $i; |
| 804 | for ($i = 0; $i < @dirs; $i++) { |
| 805 | if ($dirs[$i] =~ /perl/) { |
| 806 | $cut = $i + 1; |
| 807 | $cut++ if ($dirs[$i + 1] && $dirs[$i + 1] eq 'lib'); |
| 808 | last; |
| 809 | } |
| 810 | } |
| 811 | if ($cut > 0) { |
| 812 | splice (@dirs, 0, $cut); |
| 813 | shift @dirs if ($dirs[0] =~ /^(site|vendor)(_perl)?$/); |
| 814 | shift @dirs if ($dirs[0] =~ /^[\d.]+$/); |
| 815 | shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/); |
| 816 | } |
| 817 | shift @dirs if $dirs[0] eq 'lib'; |
| 818 | splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib'); |
| 819 | |
| 820 | # Remove empty directories when building the module name; they |
| 821 | # occur too easily on Unix by doubling slashes. |
| 822 | $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file); |
| 823 | } |
| 824 | return ($name, $section); |
| 825 | } |
| 826 | |
| 827 | # Determine the modification date and return that, properly formatted in ISO |
| 828 | # format. If we can't get the modification date of the input, instead use the |
| 829 | # current time. Pod::Simple returns a completely unuseful stringified file |
| 830 | # handle as the source_filename for input from a file handle, so we have to |
| 831 | # deal with that as well. |
| 832 | sub devise_date { |
| 833 | my ($self) = @_; |
| 834 | my $input = $self->source_filename; |
| 835 | my $time; |
| 836 | if ($input) { |
| 837 | $time = (stat $input)[9] || time; |
| 838 | } else { |
| 839 | $time = time; |
| 840 | } |
| 841 | return strftime ('%Y-%m-%d', localtime $time); |
| 842 | } |
| 843 | |
| 844 | # Print out the preamble and the title. The meaning of the arguments to .TH |
| 845 | # unfortunately vary by system; some systems consider the fourth argument to |
| 846 | # be a "source" and others use it as a version number. Generally it's just |
| 847 | # presented as the left-side footer, though, so it doesn't matter too much if |
| 848 | # a particular system gives it another interpretation. |
| 849 | # |
| 850 | # The order of date and release used to be reversed in older versions of this |
| 851 | # module, but this order is correct for both Solaris and Linux. |
| 852 | sub preamble { |
| 853 | my ($self, $name, $section, $date) = @_; |
| 854 | my $preamble = $self->preamble_template; |
| 855 | |
| 856 | # Build the index line and make sure that it will be syntactically valid. |
| 857 | my $index = "$name $section"; |
| 858 | $index =~ s/\"/\"\"/g; |
| 859 | |
| 860 | # If name or section contain spaces, quote them (section really never |
| 861 | # should, but we may as well be cautious). |
| 862 | for ($name, $section) { |
| 863 | if (/\s/) { |
| 864 | s/\"/\"\"/g; |
| 865 | $_ = '"' . $_ . '"'; |
| 866 | } |
| 867 | } |
| 868 | |
| 869 | # Double quotes in date, since it will be quoted. |
| 870 | $date =~ s/\"/\"\"/g; |
| 871 | |
| 872 | # Substitute into the preamble the configuration options. |
| 873 | $preamble =~ s/\@CFONT\@/$$self{fixed}/; |
| 874 | $preamble =~ s/\@LQUOTE\@/$$self{LQUOTE}/; |
| 875 | $preamble =~ s/\@RQUOTE\@/$$self{RQUOTE}/; |
| 876 | chomp $preamble; |
| 877 | |
| 878 | # Get the version information. |
| 879 | my $version = $self->version_report; |
| 880 | |
| 881 | # Finally output everything. |
| 882 | $self->output (<<"----END OF HEADER----"); |
| 883 | .\\" Automatically generated by $version |
| 884 | .\\" |
| 885 | .\\" Standard preamble: |
| 886 | .\\" ======================================================================== |
| 887 | $preamble |
| 888 | .\\" ======================================================================== |
| 889 | .\\" |
| 890 | .IX Title "$index" |
| 891 | .TH $name $section "$date" "$$self{release}" "$$self{center}" |
| 892 | .\\" For nroff, turn off justification. Always turn off hyphenation; it makes |
| 893 | .\\" way too many mistakes in technical documents. |
| 894 | .if n .ad l |
| 895 | .nh |
| 896 | ----END OF HEADER---- |
| 897 | $self->output (".\\\" [End of preamble]\n") if DEBUG; |
| 898 | } |
| 899 | |
| 900 | ############################################################################## |
| 901 | # Text blocks |
| 902 | ############################################################################## |
| 903 | |
| 904 | # Handle a basic block of text. The only tricky part of this is if this is |
| 905 | # the first paragraph of text after an =over, in which case we have to change |
| 906 | # indentations for *roff. |
| 907 | sub cmd_para { |
| 908 | my ($self, $attrs, $text) = @_; |
| 909 | my $line = $$attrs{start_line}; |
| 910 | |
| 911 | # Output the paragraph. We also have to handle =over without =item. If |
| 912 | # there's an =over without =item, SHIFTWAIT will be set, and we need to |
| 913 | # handle creation of the indent here. Add the shift to SHIFTS so that it |
| 914 | # will be cleaned up on =back. |
| 915 | $self->makespace; |
| 916 | if ($$self{SHIFTWAIT}) { |
| 917 | $self->output (".RS $$self{INDENT}\n"); |
| 918 | push (@{ $$self{SHIFTS} }, $$self{INDENT}); |
| 919 | $$self{SHIFTWAIT} = 0; |
| 920 | } |
| 921 | |
| 922 | # Add the line number for debugging, but not in the NAME section just in |
| 923 | # case the comment would confuse apropos. |
| 924 | $self->output (".\\\" [At source line $line]\n") |
| 925 | if defined ($line) && DEBUG && !$$self{IN_NAME}; |
| 926 | |
| 927 | # Force exactly one newline at the end and strip unwanted trailing |
| 928 | # whitespace at the end. |
| 929 | $text =~ s/\s*$/\n/; |
| 930 | |
| 931 | # Output the paragraph. |
| 932 | $self->output ($self->protect ($self->textmapfonts ($text))); |
| 933 | $self->outindex; |
| 934 | $$self{NEEDSPACE} = 1; |
| 935 | return ''; |
| 936 | } |
| 937 | |
| 938 | # Handle a verbatim paragraph. Put a null token at the beginning of each line |
| 939 | # to protect against commands and wrap in .Vb/.Ve (which we define in our |
| 940 | # prelude). |
| 941 | sub cmd_verbatim { |
| 942 | my ($self, $attrs, $text) = @_; |
| 943 | |
| 944 | # Ignore an empty verbatim paragraph. |
| 945 | return unless $text =~ /\S/; |
| 946 | |
| 947 | # Force exactly one newline at the end and strip unwanted trailing |
| 948 | # whitespace at the end. |
| 949 | $text =~ s/\s*$/\n/; |
| 950 | |
| 951 | # Get a count of the number of lines before the first blank line, which |
| 952 | # we'll pass to .Vb as its parameter. This tells *roff to keep that many |
| 953 | # lines together. We don't want to tell *roff to keep huge blocks |
| 954 | # together. |
| 955 | my @lines = split (/\n/, $text); |
| 956 | my $unbroken = 0; |
| 957 | for (@lines) { |
| 958 | last if /^\s*$/; |
| 959 | $unbroken++; |
| 960 | } |
| 961 | $unbroken = 10 if ($unbroken > 12 && !$$self{MAGIC_VNOPAGEBREAK_LIMIT}); |
| 962 | |
| 963 | # Prepend a null token to each line. |
| 964 | $text =~ s/^/\\&/gm; |
| 965 | |
| 966 | # Output the results. |
| 967 | $self->makespace; |
| 968 | $self->output (".Vb $unbroken\n$text.Ve\n"); |
| 969 | $$self{NEEDSPACE} = 1; |
| 970 | return ''; |
| 971 | } |
| 972 | |
| 973 | # Handle literal text (produced by =for and similar constructs). Just output |
| 974 | # it with the minimum of changes. |
| 975 | sub cmd_data { |
| 976 | my ($self, $attrs, $text) = @_; |
| 977 | $text =~ s/^\n+//; |
| 978 | $text =~ s/\n{0,2}$/\n/; |
| 979 | $self->output ($text); |
| 980 | return ''; |
| 981 | } |
| 982 | |
| 983 | ############################################################################## |
| 984 | # Headings |
| 985 | ############################################################################## |
| 986 | |
| 987 | # Common code for all headings. This is called before the actual heading is |
| 988 | # output. It returns the cleaned up heading text (putting the heading all on |
| 989 | # one line) and may do other things, like closing bad =item blocks. |
| 990 | sub heading_common { |
| 991 | my ($self, $text, $line) = @_; |
| 992 | $text =~ s/\s+$//; |
| 993 | $text =~ s/\s*\n\s*/ /g; |
| 994 | |
| 995 | # This should never happen; it means that we have a heading after =item |
| 996 | # without an intervening =back. But just in case, handle it anyway. |
| 997 | if ($$self{ITEMS} > 1) { |
| 998 | $$self{ITEMS} = 0; |
| 999 | $self->output (".PD\n"); |
| 1000 | } |
| 1001 | |
| 1002 | # Output the current source line. |
| 1003 | $self->output ( ".\\\" [At source line $line]\n" ) |
| 1004 | if defined ($line) && DEBUG; |
| 1005 | return $text; |
| 1006 | } |
| 1007 | |
| 1008 | # First level heading. We can't output .IX in the NAME section due to a bug |
| 1009 | # in some versions of catman, so don't output a .IX for that section. .SH |
| 1010 | # already uses small caps, so remove \s0 and \s-1. Maintain IN_NAME as |
| 1011 | # appropriate. |
| 1012 | sub cmd_head1 { |
| 1013 | my ($self, $attrs, $text) = @_; |
| 1014 | $text =~ s/\\s-?\d//g; |
| 1015 | $text = $self->heading_common ($text, $$attrs{start_line}); |
| 1016 | my $isname = ($text eq 'NAME' || $text =~ /\(NAME\)/); |
| 1017 | $self->output ($self->switchquotes ('.SH', $self->mapfonts ($text))); |
| 1018 | $self->outindex ('Header', $text) unless $isname; |
| 1019 | $$self{NEEDSPACE} = 0; |
| 1020 | $$self{IN_NAME} = $isname; |
| 1021 | return ''; |
| 1022 | } |
| 1023 | |
| 1024 | # Second level heading. |
| 1025 | sub cmd_head2 { |
| 1026 | my ($self, $attrs, $text) = @_; |
| 1027 | $text = $self->heading_common ($text, $$attrs{start_line}); |
| 1028 | $self->output ($self->switchquotes ('.Sh', $self->mapfonts ($text))); |
| 1029 | $self->outindex ('Subsection', $text); |
| 1030 | $$self{NEEDSPACE} = 0; |
| 1031 | return ''; |
| 1032 | } |
| 1033 | |
| 1034 | # Third level heading. *roff doesn't have this concept, so just put the |
| 1035 | # heading in italics as a normal paragraph. |
| 1036 | sub cmd_head3 { |
| 1037 | my ($self, $attrs, $text) = @_; |
| 1038 | $text = $self->heading_common ($text, $$attrs{start_line}); |
| 1039 | $self->makespace; |
| 1040 | $self->output ($self->textmapfonts ('\f(IS' . $text . '\f(IE') . "\n"); |
| 1041 | $self->outindex ('Subsection', $text); |
| 1042 | $$self{NEEDSPACE} = 1; |
| 1043 | return ''; |
| 1044 | } |
| 1045 | |
| 1046 | # Fourth level heading. *roff doesn't have this concept, so just put the |
| 1047 | # heading as a normal paragraph. |
| 1048 | sub cmd_head4 { |
| 1049 | my ($self, $attrs, $text) = @_; |
| 1050 | $text = $self->heading_common ($text, $$attrs{start_line}); |
| 1051 | $self->makespace; |
| 1052 | $self->output ($self->textmapfonts ($text) . "\n"); |
| 1053 | $self->outindex ('Subsection', $text); |
| 1054 | $$self{NEEDSPACE} = 1; |
| 1055 | return ''; |
| 1056 | } |
| 1057 | |
| 1058 | ############################################################################## |
| 1059 | # Formatting codes |
| 1060 | ############################################################################## |
| 1061 | |
| 1062 | # All of the formatting codes that aren't handled internally by the parser, |
| 1063 | # other than L<> and X<>. |
| 1064 | sub cmd_b { return '\f(BS' . $_[2] . '\f(BE' } |
| 1065 | sub cmd_i { return '\f(IS' . $_[2] . '\f(IE' } |
| 1066 | sub cmd_f { return '\f(IS' . $_[2] . '\f(IE' } |
| 1067 | sub cmd_c { return $_[0]->quote_literal ($_[2]) } |
| 1068 | |
| 1069 | # Index entries are just added to the pending entries. |
| 1070 | sub cmd_x { |
| 1071 | my ($self, $attrs, $text) = @_; |
| 1072 | push (@{ $$self{INDEX} }, $text); |
| 1073 | return ''; |
| 1074 | } |
| 1075 | |
| 1076 | # Links reduce to the text that we're given, wrapped in angle brackets if it's |
| 1077 | # a URL. |
| 1078 | sub cmd_l { |
| 1079 | my ($self, $attrs, $text) = @_; |
| 1080 | return $$attrs{type} eq 'url' ? "<$text>" : $text; |
| 1081 | } |
| 1082 | |
| 1083 | ############################################################################## |
| 1084 | # List handling |
| 1085 | ############################################################################## |
| 1086 | |
| 1087 | # Handle the beginning of an =over block. Takes the type of the block as the |
| 1088 | # first argument, and then the attr hash. This is called by the handlers for |
| 1089 | # the four different types of lists (bullet, number, text, and block). |
| 1090 | sub over_common_start { |
| 1091 | my ($self, $type, $attrs) = @_; |
| 1092 | my $line = $$attrs{start_line}; |
| 1093 | my $indent = $$attrs{indent}; |
| 1094 | DEBUG > 3 and print " Starting =over $type (line $line, indent ", |
| 1095 | ($indent || '?'), "\n"; |
| 1096 | |
| 1097 | # Find the indentation level. |
| 1098 | unless (defined ($indent) && $indent =~ /^[-+]?\d{1,4}\s*$/) { |
| 1099 | $indent = $$self{indent}; |
| 1100 | } |
| 1101 | |
| 1102 | # If we've gotten multiple indentations in a row, we need to emit the |
| 1103 | # pending indentation for the last level that we saw and haven't acted on |
| 1104 | # yet. SHIFTS is the stack of indentations that we've actually emitted |
| 1105 | # code for. |
| 1106 | if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) { |
| 1107 | $self->output (".RS $$self{INDENT}\n"); |
| 1108 | push (@{ $$self{SHIFTS} }, $$self{INDENT}); |
| 1109 | } |
| 1110 | |
| 1111 | # Now, do record-keeping. INDENTS is a stack of indentations that we've |
| 1112 | # seen so far, and INDENT is the current level of indentation. ITEMTYPES |
| 1113 | # is a stack of list types that we've seen. |
| 1114 | push (@{ $$self{INDENTS} }, $$self{INDENT}); |
| 1115 | push (@{ $$self{ITEMTYPES} }, $type); |
| 1116 | $$self{INDENT} = $indent + 0; |
| 1117 | $$self{SHIFTWAIT} = 1; |
| 1118 | } |
| 1119 | |
| 1120 | # End an =over block. Takes no options other than the class pointer. |
| 1121 | # Normally, once we close a block and therefore remove something from INDENTS, |
| 1122 | # INDENTS will now be longer than SHIFTS, indicating that we also need to emit |
| 1123 | # *roff code to close the indent. This isn't *always* true, depending on the |
| 1124 | # circumstance. If we're still inside an indentation, we need to emit another |
| 1125 | # .RE and then a new .RS to unconfuse *roff. |
| 1126 | sub over_common_end { |
| 1127 | my ($self) = @_; |
| 1128 | DEBUG > 3 and print " Ending =over\n"; |
| 1129 | $$self{INDENT} = pop @{ $$self{INDENTS} }; |
| 1130 | pop @{ $$self{ITEMTYPES} }; |
| 1131 | |
| 1132 | # If we emitted code for that indentation, end it. |
| 1133 | if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) { |
| 1134 | $self->output (".RE\n"); |
| 1135 | pop @{ $$self{SHIFTS} }; |
| 1136 | } |
| 1137 | |
| 1138 | # If we're still in an indentation, *roff will have now lost track of the |
| 1139 | # right depth of that indentation, so fix that. |
| 1140 | if (@{ $$self{INDENTS} } > 0) { |
| 1141 | $self->output (".RE\n"); |
| 1142 | $self->output (".RS $$self{INDENT}\n"); |
| 1143 | } |
| 1144 | $$self{NEEDSPACE} = 1; |
| 1145 | $$self{SHIFTWAIT} = 0; |
| 1146 | } |
| 1147 | |
| 1148 | # Dispatch the start and end calls as appropriate. |
| 1149 | sub start_over_bullet { my $s = shift; $s->over_common_start ('bullet', @_) } |
| 1150 | sub start_over_number { my $s = shift; $s->over_common_start ('number', @_) } |
| 1151 | sub start_over_text { my $s = shift; $s->over_common_start ('text', @_) } |
| 1152 | sub start_over_block { my $s = shift; $s->over_common_start ('block', @_) } |
| 1153 | sub end_over_bullet { $_[0]->over_common_end } |
| 1154 | sub end_over_number { $_[0]->over_common_end } |
| 1155 | sub end_over_text { $_[0]->over_common_end } |
| 1156 | sub end_over_block { $_[0]->over_common_end } |
| 1157 | |
| 1158 | # The common handler for all item commands. Takes the type of the item, the |
| 1159 | # attributes, and then the text of the item. |
| 1160 | # |
| 1161 | # Emit an index entry for anything that's interesting, but don't emit index |
| 1162 | # entries for things like bullets and numbers. Newlines in an item title are |
| 1163 | # turned into spaces since *roff can't handle them embedded. |
| 1164 | sub item_common { |
| 1165 | my ($self, $type, $attrs, $text) = @_; |
| 1166 | my $line = $$attrs{start_line}; |
| 1167 | DEBUG > 3 and print " $type item (line $line): $text\n"; |
| 1168 | |
| 1169 | # Clean up the text. We want to end up with two variables, one ($text) |
| 1170 | # which contains any body text after taking out the item portion, and |
| 1171 | # another ($item) which contains the actual item text. |
| 1172 | $text =~ s/\s+$//; |
| 1173 | my ($item, $index); |
| 1174 | if ($type eq 'bullet') { |
| 1175 | $item = "\\\(bu"; |
| 1176 | $text =~ s/\n*$/\n/; |
| 1177 | } elsif ($type eq 'number') { |
| 1178 | $item = $$attrs{number} . '.'; |
| 1179 | } else { |
| 1180 | $item = $text; |
| 1181 | $item =~ s/\s*\n\s*/ /g; |
| 1182 | $text = ''; |
| 1183 | $index = $item if ($item =~ /\w/); |
| 1184 | } |
| 1185 | |
| 1186 | # Take care of the indentation. If shifts and indents are equal, close |
| 1187 | # the top shift, since we're about to create an indentation with .IP. |
| 1188 | # Also output .PD 0 to turn off spacing between items if this item is |
| 1189 | # directly following another one. We only have to do that once for a |
| 1190 | # whole chain of items so do it for the second item in the change. Note |
| 1191 | # that makespace is what undoes this. |
| 1192 | if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) { |
| 1193 | $self->output (".RE\n"); |
| 1194 | pop @{ $$self{SHIFTS} }; |
| 1195 | } |
| 1196 | $self->output (".PD 0\n") if ($$self{ITEMS} == 1); |
| 1197 | |
| 1198 | # Now, output the item tag itself. |
| 1199 | $item = $self->textmapfonts ($item); |
| 1200 | $self->output ($self->switchquotes ('.IP', $item, $$self{INDENT})); |
| 1201 | $$self{NEEDSPACE} = 0; |
| 1202 | $$self{ITEMS}++; |
| 1203 | $$self{SHIFTWAIT} = 0; |
| 1204 | |
| 1205 | # If body text for this item was included, go ahead and output that now. |
| 1206 | if ($text) { |
| 1207 | $text =~ s/\s*$/\n/; |
| 1208 | $self->makespace; |
| 1209 | $self->output ($self->protect ($self->textmapfonts ($text))); |
| 1210 | $$self{NEEDSPACE} = 1; |
| 1211 | } |
| 1212 | $self->outindex ($index ? ('Item', $index) : ()); |
| 1213 | } |
| 1214 | |
| 1215 | # Dispatch the item commands to the appropriate place. |
| 1216 | sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) } |
| 1217 | sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) } |
| 1218 | sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) } |
| 1219 | sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) } |
| 1220 | |
| 1221 | ############################################################################## |
| 1222 | # Backward compatibility |
| 1223 | ############################################################################## |
| 1224 | |
| 1225 | # Reset the underlying Pod::Simple object between calls to parse_from_file so |
| 1226 | # that the same object can be reused to convert multiple pages. |
| 1227 | sub parse_from_file { |
| 1228 | my $self = shift; |
| 1229 | $self->reinit; |
| 1230 | |
| 1231 | # Fake the old cutting option to Pod::Parser. This fiddings with internal |
| 1232 | # Pod::Simple state and is quite ugly; we need a better approach. |
| 1233 | if (ref ($_[0]) eq 'HASH') { |
| 1234 | my $opts = shift @_; |
| 1235 | if (defined ($$opts{-cutting}) && !$$opts{-cutting}) { |
| 1236 | $$self{in_pod} = 1; |
| 1237 | $$self{last_was_blank} = 1; |
| 1238 | } |
| 1239 | } |
| 1240 | |
| 1241 | # Do the work. |
| 1242 | my $retval = $self->SUPER::parse_from_file (@_); |
| 1243 | |
| 1244 | # Flush output, since Pod::Simple doesn't do this. Ideally we should also |
| 1245 | # close the file descriptor if we had to open one, but we can't easily |
| 1246 | # figure this out. |
| 1247 | my $fh = $self->output_fh (); |
| 1248 | my $oldfh = select $fh; |
| 1249 | my $oldflush = $|; |
| 1250 | $| = 1; |
| 1251 | print $fh ''; |
| 1252 | $| = $oldflush; |
| 1253 | select $oldfh; |
| 1254 | return $retval; |
| 1255 | } |
| 1256 | |
| 1257 | # Pod::Simple failed to provide this backward compatibility function, so |
| 1258 | # implement it ourselves. File handles are one of the inputs that |
| 1259 | # parse_from_file supports. |
| 1260 | sub parse_from_filehandle { |
| 1261 | my $self = shift; |
| 1262 | $self->parse_from_file (@_); |
| 1263 | } |
| 1264 | |
| 1265 | ############################################################################## |
| 1266 | # Translation tables |
| 1267 | ############################################################################## |
| 1268 | |
| 1269 | # The following table is adapted from Tom Christiansen's pod2man. It assumes |
| 1270 | # that the standard preamble has already been printed, since that's what |
| 1271 | # defines all of the accent marks. We really want to do something better than |
| 1272 | # this when *roff actually supports other character sets itself, since these |
| 1273 | # results are pretty poor. |
| 1274 | # |
| 1275 | # This only works in an ASCII world. What to do in a non-ASCII world is very |
| 1276 | # unclear. |
| 1277 | @ESCAPES{0xA0 .. 0xFF} = ( |
| 1278 | "\\ ", undef, undef, undef, undef, undef, undef, undef, |
| 1279 | undef, undef, undef, undef, undef, "\\%", undef, undef, |
| 1280 | |
| 1281 | undef, undef, undef, undef, undef, undef, undef, undef, |
| 1282 | undef, undef, undef, undef, undef, undef, undef, undef, |
| 1283 | |
| 1284 | "A\\*`", "A\\*'", "A\\*^", "A\\*~", "A\\*:", "A\\*o", "\\*(AE", "C\\*,", |
| 1285 | "E\\*`", "E\\*'", "E\\*^", "E\\*:", "I\\*`", "I\\*'", "I\\*^", "I\\*:", |
| 1286 | |
| 1287 | "\\*(D-", "N\\*~", "O\\*`", "O\\*'", "O\\*^", "O\\*~", "O\\*:", undef, |
| 1288 | "O\\*/", "U\\*`", "U\\*'", "U\\*^", "U\\*:", "Y\\*'", "\\*(Th", "\\*8", |
| 1289 | |
| 1290 | "a\\*`", "a\\*'", "a\\*^", "a\\*~", "a\\*:", "a\\*o", "\\*(ae", "c\\*,", |
| 1291 | "e\\*`", "e\\*'", "e\\*^", "e\\*:", "i\\*`", "i\\*'", "i\\*^", "i\\*:", |
| 1292 | |
| 1293 | "\\*(d-", "n\\*~", "o\\*`", "o\\*'", "o\\*^", "o\\*~", "o\\*:", undef, |
| 1294 | "o\\*/" , "u\\*`", "u\\*'", "u\\*^", "u\\*:", "y\\*'", "\\*(th", "y\\*:", |
| 1295 | ) if ASCII; |
| 1296 | |
| 1297 | # Make sure that at least this works even outside of ASCII. |
| 1298 | $ESCAPES{ord("\\")} = "\\e"; |
| 1299 | |
| 1300 | ############################################################################## |
| 1301 | # Premable |
| 1302 | ############################################################################## |
| 1303 | |
| 1304 | # The following is the static preamble which starts all *roff output we |
| 1305 | # generate. It's completely static except for the font to use as a |
| 1306 | # fixed-width font, which is designed by @CFONT@, and the left and right |
| 1307 | # quotes to use for C<> text, designated by @LQOUTE@ and @RQUOTE@. |
| 1308 | sub preamble_template { |
| 1309 | return <<'----END OF PREAMBLE----'; |
| 1310 | .de Sh \" Subsection heading |
| 1311 | .br |
| 1312 | .if t .Sp |
| 1313 | .ne 5 |
| 1314 | .PP |
| 1315 | \fB\\$1\fR |
| 1316 | .PP |
| 1317 | .. |
| 1318 | .de Sp \" Vertical space (when we can't use .PP) |
| 1319 | .if t .sp .5v |
| 1320 | .if n .sp |
| 1321 | .. |
| 1322 | .de Vb \" Begin verbatim text |
| 1323 | .ft @CFONT@ |
| 1324 | .nf |
| 1325 | .ne \\$1 |
| 1326 | .. |
| 1327 | .de Ve \" End verbatim text |
| 1328 | .ft R |
| 1329 | .fi |
| 1330 | .. |
| 1331 | .\" Set up some character translations and predefined strings. \*(-- will |
| 1332 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
| 1333 | .\" double quote, and \*(R" will give a right double quote. \*(C+ will |
| 1334 | .\" give a nicer C++. Capital omega is used to do unbreakable dashes and |
| 1335 | .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, |
| 1336 | .\" nothing in troff, for use with C<>. |
| 1337 | .tr \(*W- |
| 1338 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
| 1339 | .ie n \{\ |
| 1340 | . ds -- \(*W- |
| 1341 | . ds PI pi |
| 1342 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
| 1343 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
| 1344 | . ds L" "" |
| 1345 | . ds R" "" |
| 1346 | . ds C` @LQUOTE@ |
| 1347 | . ds C' @RQUOTE@ |
| 1348 | 'br\} |
| 1349 | .el\{\ |
| 1350 | . ds -- \|\(em\| |
| 1351 | . ds PI \(*p |
| 1352 | . ds L" `` |
| 1353 | . ds R" '' |
| 1354 | 'br\} |
| 1355 | .\" |
| 1356 | .\" Escape single quotes in literal strings from groff's Unicode transform. |
| 1357 | .ie \n(.g .ds Aq \(aq |
| 1358 | .el .ds Aq ' |
| 1359 | .\" |
| 1360 | .\" If the F register is turned on, we'll generate index entries on stderr for |
| 1361 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index |
| 1362 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
| 1363 | .\" output yourself in some meaningful fashion. |
| 1364 | .ie \nF \{\ |
| 1365 | . de IX |
| 1366 | . tm Index:\\$1\t\\n%\t"\\$2" |
| 1367 | .. |
| 1368 | . nr % 0 |
| 1369 | . rr F |
| 1370 | .\} |
| 1371 | .el \{\ |
| 1372 | . de IX |
| 1373 | .. |
| 1374 | .\} |
| 1375 | .\" |
| 1376 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
| 1377 | .\" Fear. Run. Save yourself. No user-serviceable parts. |
| 1378 | . \" fudge factors for nroff and troff |
| 1379 | .if n \{\ |
| 1380 | . ds #H 0 |
| 1381 | . ds #V .8m |
| 1382 | . ds #F .3m |
| 1383 | . ds #[ \f1 |
| 1384 | . ds #] \fP |
| 1385 | .\} |
| 1386 | .if t \{\ |
| 1387 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
| 1388 | . ds #V .6m |
| 1389 | . ds #F 0 |
| 1390 | . ds #[ \& |
| 1391 | . ds #] \& |
| 1392 | .\} |
| 1393 | . \" simple accents for nroff and troff |
| 1394 | .if n \{\ |
| 1395 | . ds ' \& |
| 1396 | . ds ` \& |
| 1397 | . ds ^ \& |
| 1398 | . ds , \& |
| 1399 | . ds ~ ~ |
| 1400 | . ds / |
| 1401 | .\} |
| 1402 | .if t \{\ |
| 1403 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
| 1404 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
| 1405 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
| 1406 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
| 1407 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
| 1408 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
| 1409 | .\} |
| 1410 | . \" troff and (daisy-wheel) nroff accents |
| 1411 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
| 1412 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
| 1413 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
| 1414 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
| 1415 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
| 1416 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
| 1417 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
| 1418 | .ds ae a\h'-(\w'a'u*4/10)'e |
| 1419 | .ds Ae A\h'-(\w'A'u*4/10)'E |
| 1420 | . \" corrections for vroff |
| 1421 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
| 1422 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
| 1423 | . \" for low resolution devices (crt and lpr) |
| 1424 | .if \n(.H>23 .if \n(.V>19 \ |
| 1425 | \{\ |
| 1426 | . ds : e |
| 1427 | . ds 8 ss |
| 1428 | . ds o a |
| 1429 | . ds d- d\h'-1'\(ga |
| 1430 | . ds D- D\h'-1'\(hy |
| 1431 | . ds th \o'bp' |
| 1432 | . ds Th \o'LP' |
| 1433 | . ds ae ae |
| 1434 | . ds Ae AE |
| 1435 | .\} |
| 1436 | .rm #[ #] #H #V #F C |
| 1437 | ----END OF PREAMBLE---- |
| 1438 | #`# for cperl-mode |
| 1439 | } |
| 1440 | |
| 1441 | ############################################################################## |
| 1442 | # Module return value and documentation |
| 1443 | ############################################################################## |
| 1444 | |
| 1445 | 1; |
| 1446 | __END__ |
| 1447 | |
| 1448 | =head1 NAME |
| 1449 | |
| 1450 | Pod::Man - Convert POD data to formatted *roff input |
| 1451 | |
| 1452 | =head1 SYNOPSIS |
| 1453 | |
| 1454 | use Pod::Man; |
| 1455 | my $parser = Pod::Man->new (release => $VERSION, section => 8); |
| 1456 | |
| 1457 | # Read POD from STDIN and write to STDOUT. |
| 1458 | $parser->parse_file (\*STDIN); |
| 1459 | |
| 1460 | # Read POD from file.pod and write to file.1. |
| 1461 | $parser->parse_from_file ('file.pod', 'file.1'); |
| 1462 | |
| 1463 | =head1 DESCRIPTION |
| 1464 | |
| 1465 | Pod::Man is a module to convert documentation in the POD format (the |
| 1466 | preferred language for documenting Perl) into *roff input using the man |
| 1467 | macro set. The resulting *roff code is suitable for display on a terminal |
| 1468 | using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>. |
| 1469 | It is conventionally invoked using the driver script B<pod2man>, but it can |
| 1470 | also be used directly. |
| 1471 | |
| 1472 | As a derived class from Pod::Simple, Pod::Man supports the same methods and |
| 1473 | interfaces. See L<Pod::Simple> for all the details. |
| 1474 | |
| 1475 | new() can take options, in the form of key/value pairs that control the |
| 1476 | behavior of the parser. See below for details. |
| 1477 | |
| 1478 | If no options are given, Pod::Man uses the name of the input file with any |
| 1479 | trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to |
| 1480 | section 1 unless the file ended in C<.pm> in which case it defaults to |
| 1481 | section 3, to a centered title of "User Contributed Perl Documentation", to |
| 1482 | a centered footer of the Perl version it is run with, and to a left-hand |
| 1483 | footer of the modification date of its input (or the current date if given |
| 1484 | STDIN for input). |
| 1485 | |
| 1486 | Pod::Man assumes that your *roff formatters have a fixed-width font named |
| 1487 | CW. If yours is called something else (like CR), use the C<fixed> option to |
| 1488 | specify it. This generally only matters for troff output for printing. |
| 1489 | Similarly, you can set the fonts used for bold, italic, and bold italic |
| 1490 | fixed-width output. |
| 1491 | |
| 1492 | Besides the obvious pod conversions, Pod::Man also takes care of formatting |
| 1493 | func(), func(3), and simple variable references like $foo or @bar so you |
| 1494 | don't have to use code escapes for them; complex expressions like |
| 1495 | C<$fred{'stuff'}> will still need to be escaped, though. It also translates |
| 1496 | dashes that aren't used as hyphens into en dashes, makes long dashes--like |
| 1497 | this--into proper em dashes, fixes "paired quotes," makes C++ look right, |
| 1498 | puts a little space between double underbars, makes ALLCAPS a teeny bit |
| 1499 | smaller in B<troff>, and escapes stuff that *roff treats as special so that |
| 1500 | you don't have to. |
| 1501 | |
| 1502 | The recognized options to new() are as follows. All options take a single |
| 1503 | argument. |
| 1504 | |
| 1505 | =over 4 |
| 1506 | |
| 1507 | =item center |
| 1508 | |
| 1509 | Sets the centered page header to use instead of "User Contributed Perl |
| 1510 | Documentation". |
| 1511 | |
| 1512 | =item date |
| 1513 | |
| 1514 | Sets the left-hand footer. By default, the modification date of the input |
| 1515 | file will be used, or the current date if stat() can't find that file (the |
| 1516 | case if the input is from STDIN), and the date will be formatted as |
| 1517 | YYYY-MM-DD. |
| 1518 | |
| 1519 | =item fixed |
| 1520 | |
| 1521 | The fixed-width font to use for vertabim text and code. Defaults to CW. |
| 1522 | Some systems may want CR instead. Only matters for B<troff> output. |
| 1523 | |
| 1524 | =item fixedbold |
| 1525 | |
| 1526 | Bold version of the fixed-width font. Defaults to CB. Only matters for |
| 1527 | B<troff> output. |
| 1528 | |
| 1529 | =item fixeditalic |
| 1530 | |
| 1531 | Italic version of the fixed-width font (actually, something of a misnomer, |
| 1532 | since most fixed-width fonts only have an oblique version, not an italic |
| 1533 | version). Defaults to CI. Only matters for B<troff> output. |
| 1534 | |
| 1535 | =item fixedbolditalic |
| 1536 | |
| 1537 | Bold italic (probably actually oblique) version of the fixed-width font. |
| 1538 | Pod::Man doesn't assume you have this, and defaults to CB. Some systems |
| 1539 | (such as Solaris) have this font available as CX. Only matters for B<troff> |
| 1540 | output. |
| 1541 | |
| 1542 | =item name |
| 1543 | |
| 1544 | Set the name of the manual page. Without this option, the manual name is |
| 1545 | set to the uppercased base name of the file being converted unless the |
| 1546 | manual section is 3, in which case the path is parsed to see if it is a Perl |
| 1547 | module path. If it is, a path like C<.../lib/Pod/Man.pm> is converted into |
| 1548 | a name like C<Pod::Man>. This option, if given, overrides any automatic |
| 1549 | determination of the name. |
| 1550 | |
| 1551 | =item quotes |
| 1552 | |
| 1553 | Sets the quote marks used to surround CE<lt>> text. If the value is a |
| 1554 | single character, it is used as both the left and right quote; if it is two |
| 1555 | characters, the first character is used as the left quote and the second as |
| 1556 | the right quoted; and if it is four characters, the first two are used as |
| 1557 | the left quote and the second two as the right quote. |
| 1558 | |
| 1559 | This may also be set to the special value C<none>, in which case no quote |
| 1560 | marks are added around CE<lt>> text (but the font is still changed for troff |
| 1561 | output). |
| 1562 | |
| 1563 | =item release |
| 1564 | |
| 1565 | Set the centered footer. By default, this is the version of Perl you run |
| 1566 | Pod::Man under. Note that some system an macro sets assume that the |
| 1567 | centered footer will be a modification date and will prepend something like |
| 1568 | "Last modified: "; if this is the case, you may want to set C<release> to |
| 1569 | the last modified date and C<date> to the version number. |
| 1570 | |
| 1571 | =item section |
| 1572 | |
| 1573 | Set the section for the C<.TH> macro. The standard section numbering |
| 1574 | convention is to use 1 for user commands, 2 for system calls, 3 for |
| 1575 | functions, 4 for devices, 5 for file formats, 6 for games, 7 for |
| 1576 | miscellaneous information, and 8 for administrator commands. There is a lot |
| 1577 | of variation here, however; some systems (like Solaris) use 4 for file |
| 1578 | formats, 5 for miscellaneous information, and 7 for devices. Still others |
| 1579 | use 1m instead of 8, or some mix of both. About the only section numbers |
| 1580 | that are reliably consistent are 1, 2, and 3. |
| 1581 | |
| 1582 | By default, section 1 will be used unless the file ends in .pm in which case |
| 1583 | section 3 will be selected. |
| 1584 | |
| 1585 | =back |
| 1586 | |
| 1587 | The standard Pod::Simple method parse_file() takes one argument naming the |
| 1588 | POD file to read from. By default, the output is sent to STDOUT, but this |
| 1589 | can be changed with the output_fd() method. |
| 1590 | |
| 1591 | The standard Pod::Simple method parse_from_file() takes up to two |
| 1592 | arguments, the first being the input file to read POD from and the second |
| 1593 | being the file to write the formatted output to. |
| 1594 | |
| 1595 | You can also call parse_lines() to parse an array of lines or |
| 1596 | parse_string_document() to parse a document already in memory. To put the |
| 1597 | output into a string instead of a file handle, call the output_string() |
| 1598 | method. See L<Pod::Simple> for the specific details. |
| 1599 | |
| 1600 | =head1 DIAGNOSTICS |
| 1601 | |
| 1602 | =over 4 |
| 1603 | |
| 1604 | =item roff font should be 1 or 2 chars, not "%s" |
| 1605 | |
| 1606 | (F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that |
| 1607 | wasn't either one or two characters. Pod::Man doesn't support *roff fonts |
| 1608 | longer than two characters, although some *roff extensions do (the canonical |
| 1609 | versions of B<nroff> and B<troff> don't either). |
| 1610 | |
| 1611 | =item Invalid quote specification "%s" |
| 1612 | |
| 1613 | (F) The quote specification given (the quotes option to the constructor) was |
| 1614 | invalid. A quote specification must be one, two, or four characters long. |
| 1615 | |
| 1616 | =back |
| 1617 | |
| 1618 | =head1 BUGS |
| 1619 | |
| 1620 | Eight-bit input data isn't handled at all well at present. The correct |
| 1621 | approach would be to map EE<lt>E<gt> escapes to the appropriate UTF-8 |
| 1622 | characters and then do a translation pass on the output according to the |
| 1623 | user-specified output character set. Unfortunately, we can't send eight-bit |
| 1624 | data directly to the output unless the user says this is okay, since some |
| 1625 | vendor *roff implementations can't handle eight-bit data. If the *roff |
| 1626 | implementation can, however, that's far superior to the current hacked |
| 1627 | characters that only work under troff. |
| 1628 | |
| 1629 | There is currently no way to turn off the guesswork that tries to format |
| 1630 | unmarked text appropriately, and sometimes it isn't wanted (particularly |
| 1631 | when using POD to document something other than Perl). Most of the work |
| 1632 | towards fixing this has now been done, however, and all that's still needed |
| 1633 | is a user interface. |
| 1634 | |
| 1635 | The NAME section should be recognized specially and index entries emitted |
| 1636 | for everything in that section. This would have to be deferred until the |
| 1637 | next section, since extraneous things in NAME tends to confuse various man |
| 1638 | page processors. Currently, no index entries are emitted for anything in |
| 1639 | NAME. |
| 1640 | |
| 1641 | Pod::Man doesn't handle font names longer than two characters. Neither do |
| 1642 | most B<troff> implementations, but GNU troff does as an extension. It would |
| 1643 | be nice to support as an option for those who want to use it. |
| 1644 | |
| 1645 | The preamble added to each output file is rather verbose, and most of it |
| 1646 | is only necessary in the presence of non-ASCII characters. It would |
| 1647 | ideally be nice if all of those definitions were only output if needed, |
| 1648 | perhaps on the fly as the characters are used. |
| 1649 | |
| 1650 | Pod::Man is excessively slow. |
| 1651 | |
| 1652 | =head1 CAVEATS |
| 1653 | |
| 1654 | The handling of hyphens and em dashes is somewhat fragile, and one may get |
| 1655 | the wrong one under some circumstances. This should only matter for |
| 1656 | B<troff> output. |
| 1657 | |
| 1658 | When and whether to use small caps is somewhat tricky, and Pod::Man doesn't |
| 1659 | necessarily get it right. |
| 1660 | |
| 1661 | Converting neutral double quotes to properly matched double quotes doesn't |
| 1662 | work unless there are no formatting codes between the quote marks. This |
| 1663 | only matters for troff output. |
| 1664 | |
| 1665 | =head1 AUTHOR |
| 1666 | |
| 1667 | Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original |
| 1668 | B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>. The modifications to |
| 1669 | work with Pod::Simple instead of Pod::Parser were originally contributed by |
| 1670 | Sean Burke (but I've since hacked them beyond recognition and all bugs are |
| 1671 | mine). |
| 1672 | |
| 1673 | =head1 COPYRIGHT AND LICENSE |
| 1674 | |
| 1675 | Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 |
| 1676 | by Russ Allbery <rra@stanford.edu>. |
| 1677 | |
| 1678 | This program is free software; you may redistribute it and/or modify it |
| 1679 | under the same terms as Perl itself. |
| 1680 | |
| 1681 | =head1 SEE ALSO |
| 1682 | |
| 1683 | L<Pod::Simple>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>, |
| 1684 | L<man(1)>, L<man(7)> |
| 1685 | |
| 1686 | Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual," |
| 1687 | Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is |
| 1688 | the best documentation of standard B<nroff> and B<troff>. At the time of |
| 1689 | this writing, it's available at |
| 1690 | L<http://www.cs.bell-labs.com/cm/cs/cstr.html>. |
| 1691 | |
| 1692 | The man page documenting the man macro set may be L<man(5)> instead of |
| 1693 | L<man(7)> on your system. Also, please see L<pod2man(1)> for extensive |
| 1694 | documentation on writing manual pages if you've not done it before and |
| 1695 | aren't familiar with the conventions. |
| 1696 | |
| 1697 | The current version of this module is always available from its web site at |
| 1698 | L<http://www.eyrie.org/~eagle/software/podlators/>. It is also part of the |
| 1699 | Perl core distribution as of 5.6.0. |
| 1700 | |
| 1701 | =cut |