Commit | Line | Data |
---|---|---|
9741dab0 | 1 | # Pod::Man -- Convert POD data to formatted *roff input. |
9741dab0 | 2 | # |
9d1cda07 | 3 | # Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
b7ae008f SP |
4 | # Russ Allbery <rra@stanford.edu> |
5 | # Substantial contributions by Sean Burke <sburke@cpan.org> | |
9741dab0 | 6 | # |
3c014959 | 7 | # This program is free software; you may redistribute it and/or modify it |
9741dab0 GS |
8 | # under the same terms as Perl itself. |
9 | # | |
b84d8b9e JH |
10 | # This module translates POD documentation into *roff markup using the man |
11 | # macro set, and is intended for converting POD documents written as Unix | |
12 | # manual pages to manual pages that can be read by the man(1) command. It is | |
13 | # a replacement for the pod2man command distributed with versions of Perl | |
14 | # prior to 5.6. | |
c9abbd5d GS |
15 | # |
16 | # Perl core hackers, please note that this module is also separately | |
17 | # maintained outside of the Perl core as part of the podlators. Please send | |
18 | # me any patches at the address above in addition to sending them to the | |
19 | # standard Perl mailing lists. | |
9741dab0 | 20 | |
3c014959 | 21 | ############################################################################## |
9741dab0 | 22 | # Modules and declarations |
3c014959 | 23 | ############################################################################## |
9741dab0 GS |
24 | |
25 | package Pod::Man; | |
26 | ||
b84d8b9e | 27 | require 5.005; |
9741dab0 | 28 | |
9741dab0 GS |
29 | use strict; |
30 | use subs qw(makespace); | |
31 | use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION); | |
32 | ||
b7ae008f SP |
33 | use Carp qw(croak); |
34 | use Pod::Simple (); | |
35 | use POSIX qw(strftime); | |
36 | ||
37 | @ISA = qw(Pod::Simple); | |
9741dab0 | 38 | |
9d1cda07 | 39 | $VERSION = '2.22'; |
b7ae008f SP |
40 | |
41 | # Set the debugging level. If someone has inserted a debug function into this | |
42 | # class already, use that. Otherwise, use any Pod::Simple debug function | |
43 | # that's defined, and failing that, define a debug level of 10. | |
44 | BEGIN { | |
45 | my $parent = defined (&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG : undef; | |
46 | unless (defined &DEBUG) { | |
47 | *DEBUG = $parent || sub () { 10 }; | |
48 | } | |
49 | } | |
5cdeb5a2 | 50 | |
b7ae008f SP |
51 | # Import the ASCII constant from Pod::Simple. This is true iff we're in an |
52 | # ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is | |
53 | # generally only false for EBCDIC. | |
54 | BEGIN { *ASCII = \&Pod::Simple::ASCII } | |
9741dab0 | 55 | |
b7ae008f SP |
56 | # Pretty-print a data structure. Only used for debugging. |
57 | BEGIN { *pretty = \&Pod::Simple::pretty } | |
9741dab0 | 58 | |
3c014959 | 59 | ############################################################################## |
b7ae008f | 60 | # Object initialization |
3c014959 | 61 | ############################################################################## |
9741dab0 | 62 | |
b7ae008f SP |
63 | # Initialize the object and set various Pod::Simple options that we need. |
64 | # Here, we also process any additional options passed to the constructor or | |
65 | # set up defaults if none were given. Note that all internal object keys are | |
66 | # in all-caps, reserving all lower-case object keys for Pod::Simple and user | |
67 | # arguments. | |
68 | sub new { | |
69 | my $class = shift; | |
70 | my $self = $class->SUPER::new; | |
71 | ||
0e4e3f6e | 72 | # Tell Pod::Simple not to handle S<> by automatically inserting . |
b7ae008f SP |
73 | $self->nbsp_for_S (1); |
74 | ||
75 | # Tell Pod::Simple to keep whitespace whenever possible. | |
76 | if ($self->can ('preserve_whitespace')) { | |
77 | $self->preserve_whitespace (1); | |
78 | } else { | |
79 | $self->fullstop_space_harden (1); | |
80 | } | |
81 | ||
82 | # The =for and =begin targets that we accept. | |
83 | $self->accept_targets (qw/man MAN roff ROFF/); | |
84 | ||
85 | # Ensure that contiguous blocks of code are merged together. Otherwise, | |
86 | # some of the guesswork heuristics don't work right. | |
87 | $self->merge_text (1); | |
88 | ||
89 | # Pod::Simple doesn't do anything useful with our arguments, but we want | |
90 | # to put them in our object as hash keys and values. This could cause | |
91 | # problems if we ever clash with Pod::Simple's own internal class | |
92 | # variables. | |
93 | %$self = (%$self, @_); | |
94 | ||
bc9c7511 NC |
95 | # Send errors to stderr if requested. |
96 | if ($$self{stderr}) { | |
97 | $self->no_errata_section (1); | |
98 | $self->complain_stderr (1); | |
99 | delete $$self{stderr}; | |
100 | } | |
101 | ||
b7ae008f SP |
102 | # Initialize various other internal constants based on our arguments. |
103 | $self->init_fonts; | |
104 | $self->init_quotes; | |
105 | $self->init_page; | |
106 | ||
107 | # For right now, default to turning on all of the magic. | |
108 | $$self{MAGIC_CPP} = 1; | |
109 | $$self{MAGIC_EMDASH} = 1; | |
110 | $$self{MAGIC_FUNC} = 1; | |
111 | $$self{MAGIC_MANREF} = 1; | |
112 | $$self{MAGIC_SMALLCAPS} = 1; | |
113 | $$self{MAGIC_VARS} = 1; | |
114 | ||
115 | return $self; | |
c9abbd5d | 116 | } |
5cdeb5a2 | 117 | |
9741dab0 GS |
118 | # Translate a font string into an escape. |
119 | sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] } | |
120 | ||
b7ae008f SP |
121 | # Determine which fonts the user wishes to use and store them in the object. |
122 | # Regular, italic, bold, and bold-italic are constants, but the fixed width | |
123 | # fonts may be set by the user. Sets the internal hash key FONTS which is | |
124 | # used to map our internal font escapes to actual *roff sequences later. | |
125 | sub init_fonts { | |
126 | my ($self) = @_; | |
9741dab0 | 127 | |
3c014959 JH |
128 | # Figure out the fixed-width font. If user-supplied, make sure that they |
129 | # are the right length. | |
9741dab0 | 130 | for (qw/fixed fixedbold fixeditalic fixedbolditalic/) { |
b7ae008f SP |
131 | my $font = $$self{$_}; |
132 | if (defined ($font) && (length ($font) < 1 || length ($font) > 2)) { | |
133 | croak qq(roff font should be 1 or 2 chars, not "$font"); | |
9741dab0 GS |
134 | } |
135 | } | |
136 | ||
b7ae008f SP |
137 | # Set the default fonts. We can't be sure portably across different |
138 | # implementations what fixed bold-italic may be called (if it's even | |
139 | # available), so default to just bold. | |
9741dab0 GS |
140 | $$self{fixed} ||= 'CW'; |
141 | $$self{fixedbold} ||= 'CB'; | |
142 | $$self{fixeditalic} ||= 'CI'; | |
143 | $$self{fixedbolditalic} ||= 'CB'; | |
144 | ||
3c014959 JH |
145 | # Set up a table of font escapes. First number is fixed-width, second is |
146 | # bold, third is italic. | |
9741dab0 GS |
147 | $$self{FONTS} = { '000' => '\fR', '001' => '\fI', |
148 | '010' => '\fB', '011' => '\f(BI', | |
149 | '100' => toescape ($$self{fixed}), | |
150 | '101' => toescape ($$self{fixeditalic}), | |
151 | '110' => toescape ($$self{fixedbold}), | |
b7ae008f SP |
152 | '111' => toescape ($$self{fixedbolditalic}) }; |
153 | } | |
9741dab0 | 154 | |
b7ae008f SP |
155 | # Initialize the quotes that we'll be using for C<> text. This requires some |
156 | # special handling, both to parse the user parameter if given and to make sure | |
157 | # that the quotes will be safe against *roff. Sets the internal hash keys | |
158 | # LQUOTE and RQUOTE. | |
159 | sub init_quotes { | |
160 | my ($self) = (@_); | |
9741dab0 | 161 | |
5cdeb5a2 | 162 | $$self{quotes} ||= '"'; |
ab1f1d91 JH |
163 | if ($$self{quotes} eq 'none') { |
164 | $$self{LQUOTE} = $$self{RQUOTE} = ''; | |
165 | } elsif (length ($$self{quotes}) == 1) { | |
166 | $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes}; | |
167 | } elsif ($$self{quotes} =~ /^(.)(.)$/ | |
168 | || $$self{quotes} =~ /^(..)(..)$/) { | |
169 | $$self{LQUOTE} = $1; | |
170 | $$self{RQUOTE} = $2; | |
171 | } else { | |
b7ae008f | 172 | croak(qq(Invalid quote specification "$$self{quotes}")) |
ab1f1d91 JH |
173 | } |
174 | ||
b7ae008f SP |
175 | # Double the first quote; note that this should not be s///g as two double |
176 | # quotes is represented in *roff as three double quotes, not four. Weird, | |
177 | # I know. | |
178 | $$self{LQUOTE} =~ s/\"/\"\"/; | |
179 | $$self{RQUOTE} =~ s/\"/\"\"/; | |
180 | } | |
181 | ||
182 | # Initialize the page title information and indentation from our arguments. | |
183 | sub init_page { | |
184 | my ($self) = @_; | |
185 | ||
186 | # We used to try first to get the version number from a local binary, but | |
187 | # we shouldn't need that any more. Get the version from the running Perl. | |
188 | # Work a little magic to handle subversions correctly under both the | |
189 | # pre-5.6 and the post-5.6 version numbering schemes. | |
190 | my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/); | |
191 | $version[2] ||= 0; | |
192 | $version[2] *= 10 ** (3 - length $version[2]); | |
193 | for (@version) { $_ += 0 } | |
194 | my $version = join ('.', @version); | |
195 | ||
196 | # Set the defaults for page titles and indentation if the user didn't | |
197 | # override anything. | |
198 | $$self{center} = 'User Contributed Perl Documentation' | |
199 | unless defined $$self{center}; | |
200 | $$self{release} = 'perl v' . $version | |
201 | unless defined $$self{release}; | |
202 | $$self{indent} = 4 | |
203 | unless defined $$self{indent}; | |
204 | ||
205 | # Double quotes in things that will be quoted. | |
206 | for (qw/center release/) { | |
207 | $$self{$_} =~ s/\"/\"\"/g if $$self{$_}; | |
208 | } | |
209 | } | |
210 | ||
211 | ############################################################################## | |
212 | # Core parsing | |
213 | ############################################################################## | |
214 | ||
215 | # This is the glue that connects the code below with Pod::Simple itself. The | |
216 | # goal is to convert the event stream coming from the POD parser into method | |
217 | # calls to handlers once the complete content of a tag has been seen. Each | |
218 | # paragraph or POD command will have textual content associated with it, and | |
219 | # as soon as all of a paragraph or POD command has been seen, that content | |
220 | # will be passed in to the corresponding method for handling that type of | |
221 | # object. The exceptions are handlers for lists, which have opening tag | |
222 | # handlers and closing tag handlers that will be called right away. | |
223 | # | |
224 | # The internal hash key PENDING is used to store the contents of a tag until | |
225 | # all of it has been seen. It holds a stack of open tags, each one | |
226 | # represented by a tuple of the attributes hash for the tag, formatting | |
227 | # options for the tag (which are inherited), and the contents of the tag. | |
228 | ||
229 | # Add a block of text to the contents of the current node, formatting it | |
230 | # according to the current formatting instructions as we do. | |
231 | sub _handle_text { | |
232 | my ($self, $text) = @_; | |
233 | DEBUG > 3 and print "== $text\n"; | |
234 | my $tag = $$self{PENDING}[-1]; | |
235 | $$tag[2] .= $self->format_text ($$tag[1], $text); | |
236 | } | |
237 | ||
238 | # Given an element name, get the corresponding method name. | |
239 | sub method_for_element { | |
240 | my ($self, $element) = @_; | |
241 | $element =~ tr/-/_/; | |
242 | $element =~ tr/A-Z/a-z/; | |
243 | $element =~ tr/_a-z0-9//cd; | |
244 | return $element; | |
245 | } | |
246 | ||
247 | # Handle the start of a new element. If cmd_element is defined, assume that | |
248 | # we need to collect the entire tree for this element before passing it to the | |
249 | # element method, and create a new tree into which we'll collect blocks of | |
250 | # text and nested elements. Otherwise, if start_element is defined, call it. | |
251 | sub _handle_element_start { | |
252 | my ($self, $element, $attrs) = @_; | |
253 | DEBUG > 3 and print "++ $element (<", join ('> <', %$attrs), ">)\n"; | |
254 | my $method = $self->method_for_element ($element); | |
255 | ||
256 | # If we have a command handler, we need to accumulate the contents of the | |
257 | # tag before calling it. Turn off IN_NAME for any command other than | |
258 | # <Para> so that IN_NAME isn't still set for the first heading after the | |
259 | # NAME heading. | |
260 | if ($self->can ("cmd_$method")) { | |
261 | DEBUG > 2 and print "<$element> starts saving a tag\n"; | |
262 | $$self{IN_NAME} = 0 if ($element ne 'Para'); | |
263 | ||
264 | # How we're going to format embedded text blocks depends on the tag | |
265 | # and also depends on our parent tags. Thankfully, inside tags that | |
266 | # turn off guesswork and reformatting, nothing else can turn it back | |
267 | # on, so this can be strictly inherited. | |
268 | my $formatting = $$self{PENDING}[-1][1]; | |
269 | $formatting = $self->formatting ($formatting, $element); | |
270 | push (@{ $$self{PENDING} }, [ $attrs, $formatting, '' ]); | |
271 | DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n"; | |
272 | } elsif ($self->can ("start_$method")) { | |
273 | my $method = 'start_' . $method; | |
274 | $self->$method ($attrs, ''); | |
275 | } else { | |
276 | DEBUG > 2 and print "No $method start method, skipping\n"; | |
277 | } | |
278 | } | |
279 | ||
280 | # Handle the end of an element. If we had a cmd_ method for this element, | |
281 | # this is where we pass along the tree that we built. Otherwise, if we have | |
282 | # an end_ method for the element, call that. | |
283 | sub _handle_element_end { | |
284 | my ($self, $element) = @_; | |
285 | DEBUG > 3 and print "-- $element\n"; | |
286 | my $method = $self->method_for_element ($element); | |
287 | ||
288 | # If we have a command handler, pull off the pending text and pass it to | |
289 | # the handler along with the saved attribute hash. | |
290 | if ($self->can ("cmd_$method")) { | |
291 | DEBUG > 2 and print "</$element> stops saving a tag\n"; | |
292 | my $tag = pop @{ $$self{PENDING} }; | |
293 | DEBUG > 4 and print "Popped: [", pretty ($tag), "]\n"; | |
294 | DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n"; | |
295 | my $method = 'cmd_' . $method; | |
296 | my $text = $self->$method ($$tag[0], $$tag[2]); | |
297 | if (defined $text) { | |
298 | if (@{ $$self{PENDING} } > 1) { | |
299 | $$self{PENDING}[-1][2] .= $text; | |
300 | } else { | |
301 | $self->output ($text); | |
302 | } | |
303 | } | |
304 | } elsif ($self->can ("end_$method")) { | |
305 | my $method = 'end_' . $method; | |
8f202758 | 306 | $self->$method (); |
b7ae008f SP |
307 | } else { |
308 | DEBUG > 2 and print "No $method end method, skipping\n"; | |
309 | } | |
310 | } | |
311 | ||
312 | ############################################################################## | |
313 | # General formatting | |
314 | ############################################################################## | |
315 | ||
316 | # Return formatting instructions for a new block. Takes the current | |
317 | # formatting and the new element. Formatting inherits negatively, in the | |
318 | # sense that if the parent has turned off guesswork, all child elements should | |
319 | # leave it off. We therefore return a copy of the same formatting | |
320 | # instructions but possibly with more things turned off depending on the | |
321 | # element. | |
322 | sub formatting { | |
323 | my ($self, $current, $element) = @_; | |
324 | my %options; | |
325 | if ($current) { | |
326 | %options = %$current; | |
327 | } else { | |
328 | %options = (guesswork => 1, cleanup => 1, convert => 1); | |
329 | } | |
330 | if ($element eq 'Data') { | |
331 | $options{guesswork} = 0; | |
332 | $options{cleanup} = 0; | |
333 | $options{convert} = 0; | |
334 | } elsif ($element eq 'X') { | |
335 | $options{guesswork} = 0; | |
336 | $options{cleanup} = 0; | |
337 | } elsif ($element eq 'Verbatim' || $element eq 'C') { | |
338 | $options{guesswork} = 0; | |
40dcca8a | 339 | $options{literal} = 1; |
b7ae008f SP |
340 | } |
341 | return \%options; | |
342 | } | |
343 | ||
344 | # Format a text block. Takes a hash of formatting options and the text to | |
345 | # format. Currently, the only formatting options are guesswork, cleanup, and | |
346 | # convert, all of which are boolean. | |
347 | sub format_text { | |
348 | my ($self, $options, $text) = @_; | |
349 | my $guesswork = $$options{guesswork} && !$$self{IN_NAME}; | |
350 | my $cleanup = $$options{cleanup}; | |
351 | my $convert = $$options{convert}; | |
40dcca8a | 352 | my $literal = $$options{literal}; |
b7ae008f | 353 | |
b7ae008f | 354 | # Cleanup just tidies up a few things, telling *roff that the hyphens are |
55595e83 SP |
355 | # hard, putting a bit of space between consecutive underscores, and |
356 | # escaping backslashes. Be careful not to mangle our character | |
357 | # translations by doing this before processing character translation. | |
b7ae008f | 358 | if ($cleanup) { |
55595e83 | 359 | $text =~ s/\\/\\e/g; |
b7ae008f SP |
360 | $text =~ s/-/\\-/g; |
361 | $text =~ s/_(?=_)/_\\|/g; | |
362 | } | |
363 | ||
55595e83 SP |
364 | # Normally we do character translation, but we won't even do that in |
365 | # <Data> blocks or if UTF-8 output is desired. | |
366 | if ($convert && !$$self{utf8} && ASCII) { | |
367 | $text =~ s/([^\x00-\x7F])/$ESCAPES{ord ($1)} || "X"/eg; | |
368 | } | |
369 | ||
40dcca8a RGS |
370 | # Ensure that *roff doesn't convert literal quotes to UTF-8 single quotes, |
371 | # but don't mess up our accept escapes. | |
372 | if ($literal) { | |
373 | $text =~ s/(?<!\\\*)\'/\\*\(Aq/g; | |
374 | $text =~ s/(?<!\\\*)\`/\\\`/g; | |
375 | } | |
376 | ||
b7ae008f SP |
377 | # If guesswork is asked for, do that. This involves more substantial |
378 | # formatting based on various heuristics that may only be appropriate for | |
379 | # particular documents. | |
380 | if ($guesswork) { | |
381 | $text = $self->guesswork ($text); | |
382 | } | |
383 | ||
384 | return $text; | |
385 | } | |
386 | ||
387 | # Handles C<> text, deciding whether to put \*C` around it or not. This is a | |
388 | # whole bunch of messy heuristics to try to avoid overquoting, originally from | |
389 | # Barrie Slaymaker. This largely duplicates similar code in Pod::Text. | |
390 | sub quote_literal { | |
391 | my $self = shift; | |
392 | local $_ = shift; | |
393 | ||
394 | # A regex that matches the portion of a variable reference that's the | |
395 | # array or hash index, separated out just because we want to use it in | |
396 | # several places in the following regex. | |
397 | my $index = '(?: \[.*\] | \{.*\} )?'; | |
398 | ||
399 | # Check for things that we don't want to quote, and if we find any of | |
400 | # them, return the string with just a font change and no quoting. | |
401 | m{ | |
402 | ^\s* | |
403 | (?: | |
404 | ( [\'\`\"] ) .* \1 # already quoted | |
40dcca8a RGS |
405 | | \\\*\(Aq .* \\\*\(Aq # quoted and escaped |
406 | | \\?\` .* ( \' | \\\*\(Aq ) # `quoted' | |
b7ae008f SP |
407 | | \$+ [\#^]? \S $index # special ($^Foo, $") |
408 | | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func | |
409 | | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call | |
410 | | [-+]? ( \d[\d.]* | \.\d+ ) (?: [eE][-+]?\d+ )? # a number | |
411 | | 0x [a-fA-F\d]+ # a hex constant | |
412 | ) | |
413 | \s*\z | |
414 | }xso and return '\f(FS' . $_ . '\f(FE'; | |
415 | ||
416 | # If we didn't return, go ahead and quote the text. | |
417 | return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"; | |
418 | } | |
419 | ||
420 | # Takes a text block to perform guesswork on. Returns the text block with | |
421 | # formatting codes added. This is the code that marks up various Perl | |
422 | # constructs and things commonly used in man pages without requiring the user | |
423 | # to add any explicit markup, and is applied to all non-literal text. We're | |
424 | # guaranteed that the text we're applying guesswork to does not contain any | |
425 | # *roff formatting codes. Note that the inserted font sequences must be | |
426 | # treated later with mapfonts or textmapfonts. | |
427 | # | |
428 | # This method is very fragile, both in the regular expressions it uses and in | |
429 | # the ordering of those modifications. Care and testing is required when | |
430 | # modifying it. | |
431 | sub guesswork { | |
432 | my $self = shift; | |
433 | local $_ = shift; | |
434 | DEBUG > 5 and print " Guesswork called on [$_]\n"; | |
435 | ||
436 | # By the time we reach this point, all hypens will be escaped by adding a | |
42ae9e1d RGS |
437 | # backslash. We want to undo that escaping if they're part of regular |
438 | # words and there's only a single dash, since that's a real hyphen that | |
439 | # *roff gets to consider a possible break point. Make sure that a dash | |
440 | # after the first character of a word stays non-breaking, however. | |
b7ae008f SP |
441 | # |
442 | # Note that this is not user-controllable; we pretty much have to do this | |
443 | # transformation or *roff will mangle the output in unacceptable ways. | |
444 | s{ | |
42ae9e1d RGS |
445 | ( (?:\G|^|\s) [\(\"]* [a-zA-Z] ) ( \\- )? |
446 | ( (?: [a-zA-Z\']+ \\-)+ ) | |
447 | ( [a-zA-Z\']+ ) (?= [\)\".?!,;:]* (?:\s|\Z|\\\ ) ) | |
b7ae008f SP |
448 | \b |
449 | } { | |
450 | my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4); | |
451 | $hyphen ||= ''; | |
452 | $main =~ s/\\-/-/g; | |
453 | $prefix . $hyphen . $main . $suffix; | |
454 | }egx; | |
455 | ||
456 | # Translate "--" into a real em-dash if it's used like one. This means | |
457 | # that it's either surrounded by whitespace, it follows a regular word, or | |
458 | # it occurs between two regular words. | |
459 | if ($$self{MAGIC_EMDASH}) { | |
460 | s{ (\s) \\-\\- (\s) } { $1 . '\*(--' . $2 }egx; | |
461 | s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx; | |
462 | } | |
463 | ||
464 | # Make words in all-caps a little bit smaller; they look better that way. | |
465 | # However, we don't want to change Perl code (like @ARGV), nor do we want | |
466 | # to fix the MIME in MIME-Version since it looks weird with the | |
467 | # full-height V. | |
468 | # | |
469 | # We change only a string of all caps (2) either at the beginning of the | |
470 | # line or following regular punctuation (like quotes) or whitespace (1), | |
471 | # and followed by either similar punctuation, an em-dash, or the end of | |
472 | # the line (3). | |
473 | if ($$self{MAGIC_SMALLCAPS}) { | |
474 | s{ | |
475 | ( ^ | [\s\(\"\'\`\[\{<>] | \\\ ) # (1) | |
476 | ( [A-Z] [A-Z] (?: [/A-Z+:\d_\$&] | \\- )* ) # (2) | |
477 | (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | \\\ | $ ) # (3) | |
478 | } { | |
479 | $1 . '\s-1' . $2 . '\s0' | |
480 | }egx; | |
481 | } | |
482 | ||
483 | # Note that from this point forward, we have to adjust for \s-1 and \s-0 | |
484 | # strings inserted around things that we've made small-caps if later | |
485 | # transforms should work on those strings. | |
486 | ||
487 | # Italize functions in the form func(), including functions that are in | |
488 | # all capitals, but don't italize if there's anything between the parens. | |
489 | # The function must start with an alphabetic character or underscore and | |
490 | # then consist of word characters or colons. | |
491 | if ($$self{MAGIC_FUNC}) { | |
492 | s{ | |
493 | ( \b | \\s-1 ) | |
494 | ( [A-Za-z_] ([:\w] | \\s-?[01])+ \(\) ) | |
495 | } { | |
496 | $1 . '\f(IS' . $2 . '\f(IE' | |
497 | }egx; | |
498 | } | |
499 | ||
500 | # Change references to manual pages to put the page name in italics but | |
501 | # the number in the regular font, with a thin space between the name and | |
502 | # the number. Only recognize func(n) where func starts with an alphabetic | |
503 | # character or underscore and contains only word characters, periods (for | |
504 | # configuration file man pages), or colons, and n is a single digit, | |
505 | # optionally followed by some number of lowercase letters. Note that this | |
506 | # does not recognize man page references like perl(l) or socket(3SOCKET). | |
507 | if ($$self{MAGIC_MANREF}) { | |
508 | s{ | |
509 | ( \b | \\s-1 ) | |
510 | ( [A-Za-z_] (?:[.:\w] | \\- | \\s-?[01])+ ) | |
511 | ( \( \d [a-z]* \) ) | |
512 | } { | |
513 | $1 . '\f(IS' . $2 . '\f(IE\|' . $3 | |
514 | }egx; | |
515 | } | |
516 | ||
517 | # Convert simple Perl variable references to a fixed-width font. Be | |
518 | # careful not to convert functions, though; there are too many subtleties | |
519 | # with them to want to perform this transformation. | |
520 | if ($$self{MAGIC_VARS}) { | |
521 | s{ | |
522 | ( ^ | \s+ ) | |
523 | ( [\$\@%] [\w:]+ ) | |
524 | (?! \( ) | |
525 | } { | |
526 | $1 . '\f(FS' . $2 . '\f(FE' | |
527 | }egx; | |
528 | } | |
529 | ||
530 | # Fix up double quotes. Unfortunately, we miss this transformation if the | |
531 | # quoted text contains any code with formatting codes and there's not much | |
532 | # we can effectively do about that, which makes it somewhat unclear if | |
533 | # this is really a good idea. | |
534 | s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx; | |
535 | ||
536 | # Make C++ into \*(C+, which is a squinched version. | |
537 | if ($$self{MAGIC_CPP}) { | |
538 | s{ \b C\+\+ } {\\*\(C+}gx; | |
539 | } | |
540 | ||
541 | # Done. | |
542 | DEBUG > 5 and print " Guesswork returning [$_]\n"; | |
543 | return $_; | |
544 | } | |
545 | ||
546 | ############################################################################## | |
547 | # Output | |
548 | ############################################################################## | |
549 | ||
550 | # When building up the *roff code, we don't use real *roff fonts. Instead, we | |
551 | # embed font codes of the form \f(<font>[SE] where <font> is one of B, I, or | |
552 | # F, S stands for start, and E stands for end. This method turns these into | |
553 | # the right start and end codes. | |
554 | # | |
555 | # We add this level of complexity because the old pod2man didn't get code like | |
556 | # B<someI<thing> else> right; after I<> it switched back to normal text rather | |
557 | # than bold. We take care of this by using variables that state whether bold, | |
558 | # italic, or fixed are turned on as a combined pointer to our current font | |
559 | # sequence, and set each to the number of current nestings of start tags for | |
560 | # that font. | |
561 | # | |
562 | # \fP changes to the previous font, but only one previous font is kept. We | |
563 | # don't know what the outside level font is; normally it's R, but if we're | |
564 | # inside a heading it could be something else. So arrange things so that the | |
565 | # outside font is always the "previous" font and end with \fP instead of \fR. | |
566 | # Idea from Zack Weinberg. | |
567 | sub mapfonts { | |
568 | my ($self, $text) = @_; | |
569 | my ($fixed, $bold, $italic) = (0, 0, 0); | |
570 | my %magic = (F => \$fixed, B => \$bold, I => \$italic); | |
571 | my $last = '\fR'; | |
572 | $text =~ s< | |
573 | \\f\((.)(.) | |
574 | > < | |
575 | my $sequence = ''; | |
576 | my $f; | |
577 | if ($last ne '\fR') { $sequence = '\fP' } | |
578 | ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1; | |
579 | $f = $$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) }; | |
580 | if ($f eq $last) { | |
581 | ''; | |
582 | } else { | |
583 | if ($f ne '\fR') { $sequence .= $f } | |
584 | $last = $f; | |
585 | $sequence; | |
586 | } | |
587 | >gxe; | |
588 | return $text; | |
589 | } | |
590 | ||
591 | # Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU | |
592 | # groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather | |
593 | # than R, presumably because \f(CW doesn't actually do a font change. To work | |
594 | # around this, use a separate textmapfonts for text blocks where the default | |
595 | # font is always R and only use the smart mapfonts for headings. | |
596 | sub textmapfonts { | |
597 | my ($self, $text) = @_; | |
598 | my ($fixed, $bold, $italic) = (0, 0, 0); | |
599 | my %magic = (F => \$fixed, B => \$bold, I => \$italic); | |
600 | $text =~ s< | |
601 | \\f\((.)(.) | |
602 | > < | |
603 | ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1; | |
604 | $$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) }; | |
605 | >gxe; | |
606 | return $text; | |
607 | } | |
608 | ||
609 | # Given a command and a single argument that may or may not contain double | |
610 | # quotes, handle double-quote formatting for it. If there are no double | |
611 | # quotes, just return the command followed by the argument in double quotes. | |
612 | # If there are double quotes, use an if statement to test for nroff, and for | |
613 | # nroff output the command followed by the argument in double quotes with | |
614 | # embedded double quotes doubled. For other formatters, remap paired double | |
615 | # quotes to LQUOTE and RQUOTE. | |
616 | sub switchquotes { | |
617 | my ($self, $command, $text, $extra) = @_; | |
618 | $text =~ s/\\\*\([LR]\"/\"/g; | |
619 | ||
620 | # We also have to deal with \*C` and \*C', which are used to add the | |
621 | # quotes around C<> text, since they may expand to " and if they do this | |
622 | # confuses the .SH macros and the like no end. Expand them ourselves. | |
623 | # Also separate troff from nroff if there are any fixed-width fonts in use | |
624 | # to work around problems with Solaris nroff. | |
625 | my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/); | |
626 | my $fixedpat = join '|', @{ $$self{FONTS} }{'100', '101', '110', '111'}; | |
627 | $fixedpat =~ s/\\/\\\\/g; | |
628 | $fixedpat =~ s/\(/\\\(/g; | |
629 | if ($text =~ m/\"/ || $text =~ m/$fixedpat/) { | |
630 | $text =~ s/\"/\"\"/g; | |
631 | my $nroff = $text; | |
632 | my $troff = $text; | |
633 | $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g; | |
634 | if ($c_is_quote and $text =~ m/\\\*\(C[\'\`]/) { | |
635 | $nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g; | |
636 | $nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g; | |
637 | $troff =~ s/\\\*\(C[\'\`]//g; | |
638 | } | |
639 | $nroff = qq("$nroff") . ($extra ? " $extra" : ''); | |
640 | $troff = qq("$troff") . ($extra ? " $extra" : ''); | |
641 | ||
642 | # Work around the Solaris nroff bug where \f(CW\fP leaves the font set | |
643 | # to Roman rather than the actual previous font when used in headings. | |
644 | # troff output may still be broken, but at least we can fix nroff by | |
645 | # just switching the font changes to the non-fixed versions. | |
55595e83 SP |
646 | $nroff =~ s/\Q$$self{FONTS}{100}\E(.*?)\\f[PR]/$1/g; |
647 | $nroff =~ s/\Q$$self{FONTS}{101}\E(.*?)\\f([PR])/\\fI$1\\f$2/g; | |
648 | $nroff =~ s/\Q$$self{FONTS}{110}\E(.*?)\\f([PR])/\\fB$1\\f$2/g; | |
649 | $nroff =~ s/\Q$$self{FONTS}{111}\E(.*?)\\f([PR])/\\f\(BI$1\\f$2/g; | |
b7ae008f SP |
650 | |
651 | # Now finally output the command. Bother with .ie only if the nroff | |
652 | # and troff output aren't the same. | |
653 | if ($nroff ne $troff) { | |
654 | return ".ie n $command $nroff\n.el $command $troff\n"; | |
655 | } else { | |
656 | return "$command $nroff\n"; | |
657 | } | |
658 | } else { | |
659 | $text = qq("$text") . ($extra ? " $extra" : ''); | |
660 | return "$command $text\n"; | |
661 | } | |
662 | } | |
663 | ||
664 | # Protect leading quotes and periods against interpretation as commands. Also | |
665 | # protect anything starting with a backslash, since it could expand or hide | |
666 | # something that *roff would interpret as a command. This is overkill, but | |
667 | # it's much simpler than trying to parse *roff here. | |
668 | sub protect { | |
669 | my ($self, $text) = @_; | |
670 | $text =~ s/^([.\'\\])/\\&$1/mg; | |
671 | return $text; | |
672 | } | |
673 | ||
674 | # Make vertical whitespace if NEEDSPACE is set, appropriate to the indentation | |
675 | # level the situation. This function is needed since in *roff one has to | |
676 | # create vertical whitespace after paragraphs and between some things, but | |
677 | # other macros create their own whitespace. Also close out a sequence of | |
678 | # repeated =items, since calling makespace means we're about to begin the item | |
679 | # body. | |
680 | sub makespace { | |
681 | my ($self) = @_; | |
682 | $self->output (".PD\n") if $$self{ITEMS} > 1; | |
683 | $$self{ITEMS} = 0; | |
684 | $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n") | |
685 | if $$self{NEEDSPACE}; | |
686 | } | |
687 | ||
688 | # Output any pending index entries, and optionally an index entry given as an | |
689 | # argument. Support multiple index entries in X<> separated by slashes, and | |
690 | # strip special escapes from index entries. | |
691 | sub outindex { | |
692 | my ($self, $section, $index) = @_; | |
693 | my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} }; | |
694 | return unless ($section || @entries); | |
695 | ||
696 | # We're about to output all pending entries, so clear our pending queue. | |
697 | $$self{INDEX} = []; | |
698 | ||
699 | # Build the output. Regular index entries are marked Xref, and headings | |
700 | # pass in their own section. Undo some *roff formatting on headings. | |
701 | my @output; | |
702 | if (@entries) { | |
703 | push @output, [ 'Xref', join (' ', @entries) ]; | |
704 | } | |
705 | if ($section) { | |
706 | $index =~ s/\\-/-/g; | |
707 | $index =~ s/\\(?:s-?\d|.\(..|.)//g; | |
708 | push @output, [ $section, $index ]; | |
709 | } | |
ab1f1d91 | 710 | |
b7ae008f SP |
711 | # Print out the .IX commands. |
712 | for (@output) { | |
713 | my ($type, $entry) = @$_; | |
714 | $entry =~ s/\"/\"\"/g; | |
715 | $self->output (".IX $type " . '"' . $entry . '"' . "\n"); | |
716 | } | |
9741dab0 GS |
717 | } |
718 | ||
b7ae008f SP |
719 | # Output some text, without any additional changes. |
720 | sub output { | |
721 | my ($self, @text) = @_; | |
722 | print { $$self{output_fh} } @text; | |
723 | } | |
9741dab0 | 724 | |
b7ae008f SP |
725 | ############################################################################## |
726 | # Document initialization | |
727 | ############################################################################## | |
bf202ccd | 728 | |
b7ae008f SP |
729 | # Handle the start of the document. Here we handle empty documents, as well |
730 | # as setting up our basic macros in a preamble and building the page title. | |
731 | sub start_document { | |
732 | my ($self, $attrs) = @_; | |
733 | if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) { | |
734 | DEBUG and print "Document is contentless\n"; | |
735 | $$self{CONTENTLESS} = 1; | |
736 | return; | |
9741dab0 GS |
737 | } |
738 | ||
9f2f055a SH |
739 | # If we were given the utf8 option, set an output encoding on our file |
740 | # handle. Wrap in an eval in case we're using a version of Perl too old | |
741 | # to understand this. | |
742 | # | |
743 | # This is evil because it changes the global state of a file handle that | |
744 | # we may not own. However, we can't just blindly encode all output, since | |
745 | # there may be a pre-applied output encoding (such as from PERL_UNICODE) | |
746 | # and then we would double-encode. This seems to be the least bad | |
747 | # approach. | |
748 | if ($$self{utf8}) { | |
749 | eval { binmode ($$self{output_fh}, ':encoding(UTF-8)') }; | |
750 | } | |
751 | ||
b7ae008f SP |
752 | # Determine information for the preamble and then output it. |
753 | my ($name, $section); | |
754 | if (defined $$self{name}) { | |
755 | $name = $$self{name}; | |
756 | $section = $$self{section} || 1; | |
757 | } else { | |
758 | ($name, $section) = $self->devise_title; | |
9741dab0 | 759 | } |
b7ae008f SP |
760 | my $date = $$self{date} || $self->devise_date; |
761 | $self->preamble ($name, $section, $date) | |
762 | unless $self->bare_output or DEBUG > 9; | |
9741dab0 | 763 | |
b7ae008f | 764 | # Initialize a few per-document variables. |
b616daaf JH |
765 | $$self{INDENT} = 0; # Current indentation level. |
766 | $$self{INDENTS} = []; # Stack of indentations. | |
767 | $$self{INDEX} = []; # Index keys waiting to be printed. | |
2da3dd12 | 768 | $$self{IN_NAME} = 0; # Whether processing the NAME section. |
b616daaf | 769 | $$self{ITEMS} = 0; # The number of consecutive =items. |
4213be12 | 770 | $$self{ITEMTYPES} = []; # Stack of =item types, one per list. |
b616daaf JH |
771 | $$self{SHIFTWAIT} = 0; # Whether there is a shift waiting. |
772 | $$self{SHIFTS} = []; # Stack of .RS shifts. | |
b7ae008f | 773 | $$self{PENDING} = [[]]; # Pending output. |
9741dab0 GS |
774 | } |
775 | ||
b7ae008f SP |
776 | # Handle the end of the document. This does nothing but print out a final |
777 | # comment at the end of the document under debugging. | |
778 | sub end_document { | |
779 | my ($self) = @_; | |
780 | return if $self->bare_output; | |
781 | return if ($$self{CONTENTLESS} && !$$self{ALWAYS_EMIT_SOMETHING}); | |
782 | $self->output (q(.\" [End document]) . "\n") if DEBUG; | |
783 | } | |
9741dab0 | 784 | |
b7ae008f SP |
785 | # Try to figure out the name and section from the file name and return them as |
786 | # a list, returning an empty name and section 1 if we can't find any better | |
787 | # information. Uses File::Basename and File::Spec as necessary. | |
788 | sub devise_title { | |
789 | my ($self) = @_; | |
790 | my $name = $self->source_filename || ''; | |
791 | my $section = $$self{section} || 1; | |
792 | $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i); | |
793 | $name =~ s/\.p(od|[lm])\z//i; | |
794 | ||
795 | # If the section isn't 3, then the name defaults to just the basename of | |
796 | # the file. Otherwise, assume we're dealing with a module. We want to | |
797 | # figure out the full module name from the path to the file, but we don't | |
798 | # want to include too much of the path into the module name. Lose | |
799 | # anything up to the first off: | |
800 | # | |
801 | # */lib/*perl*/ standard or site_perl module | |
802 | # */*perl*/lib/ from -Dprefix=/opt/perl | |
803 | # */*perl*/ random module hierarchy | |
804 | # | |
805 | # which works. Also strip off a leading site, site_perl, or vendor_perl | |
806 | # component, any OS-specific component, and any version number component, | |
807 | # and strip off an initial component of "lib" or "blib/lib" since that's | |
808 | # what ExtUtils::MakeMaker creates. splitdir requires at least File::Spec | |
809 | # 0.8. | |
810 | if ($section !~ /^3/) { | |
811 | require File::Basename; | |
812 | $name = uc File::Basename::basename ($name); | |
3c014959 | 813 | } else { |
b7ae008f SP |
814 | require File::Spec; |
815 | my ($volume, $dirs, $file) = File::Spec->splitpath ($name); | |
816 | my @dirs = File::Spec->splitdir ($dirs); | |
817 | my $cut = 0; | |
818 | my $i; | |
40dcca8a RGS |
819 | for ($i = 0; $i < @dirs; $i++) { |
820 | if ($dirs[$i] =~ /perl/) { | |
b7ae008f | 821 | $cut = $i + 1; |
40dcca8a | 822 | $cut++ if ($dirs[$i + 1] && $dirs[$i + 1] eq 'lib'); |
b7ae008f SP |
823 | last; |
824 | } | |
825 | } | |
826 | if ($cut > 0) { | |
827 | splice (@dirs, 0, $cut); | |
828 | shift @dirs if ($dirs[0] =~ /^(site|vendor)(_perl)?$/); | |
829 | shift @dirs if ($dirs[0] =~ /^[\d.]+$/); | |
830 | shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/); | |
831 | } | |
832 | shift @dirs if $dirs[0] eq 'lib'; | |
833 | splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib'); | |
834 | ||
835 | # Remove empty directories when building the module name; they | |
836 | # occur too easily on Unix by doubling slashes. | |
837 | $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file); | |
844b31e3 | 838 | } |
b7ae008f | 839 | return ($name, $section); |
9741dab0 GS |
840 | } |
841 | ||
b7ae008f SP |
842 | # Determine the modification date and return that, properly formatted in ISO |
843 | # format. If we can't get the modification date of the input, instead use the | |
fcf69717 SP |
844 | # current time. Pod::Simple returns a completely unuseful stringified file |
845 | # handle as the source_filename for input from a file handle, so we have to | |
846 | # deal with that as well. | |
b7ae008f SP |
847 | sub devise_date { |
848 | my ($self) = @_; | |
849 | my $input = $self->source_filename; | |
fcf69717 SP |
850 | my $time; |
851 | if ($input) { | |
852 | $time = (stat $input)[9] || time; | |
853 | } else { | |
854 | $time = time; | |
855 | } | |
b7ae008f | 856 | return strftime ('%Y-%m-%d', localtime $time); |
9741dab0 GS |
857 | } |
858 | ||
b7ae008f SP |
859 | # Print out the preamble and the title. The meaning of the arguments to .TH |
860 | # unfortunately vary by system; some systems consider the fourth argument to | |
861 | # be a "source" and others use it as a version number. Generally it's just | |
862 | # presented as the left-side footer, though, so it doesn't matter too much if | |
863 | # a particular system gives it another interpretation. | |
864 | # | |
865 | # The order of date and release used to be reversed in older versions of this | |
866 | # module, but this order is correct for both Solaris and Linux. | |
867 | sub preamble { | |
868 | my ($self, $name, $section, $date) = @_; | |
0e4e3f6e | 869 | my $preamble = $self->preamble_template (!$$self{utf8}); |
b7ae008f SP |
870 | |
871 | # Build the index line and make sure that it will be syntactically valid. | |
872 | my $index = "$name $section"; | |
873 | $index =~ s/\"/\"\"/g; | |
874 | ||
875 | # If name or section contain spaces, quote them (section really never | |
876 | # should, but we may as well be cautious). | |
877 | for ($name, $section) { | |
878 | if (/\s/) { | |
879 | s/\"/\"\"/g; | |
880 | $_ = '"' . $_ . '"'; | |
881 | } | |
882 | } | |
883 | ||
884 | # Double quotes in date, since it will be quoted. | |
885 | $date =~ s/\"/\"\"/g; | |
886 | ||
887 | # Substitute into the preamble the configuration options. | |
888 | $preamble =~ s/\@CFONT\@/$$self{fixed}/; | |
889 | $preamble =~ s/\@LQUOTE\@/$$self{LQUOTE}/; | |
890 | $preamble =~ s/\@RQUOTE\@/$$self{RQUOTE}/; | |
891 | chomp $preamble; | |
892 | ||
893 | # Get the version information. | |
894 | my $version = $self->version_report; | |
895 | ||
896 | # Finally output everything. | |
897 | $self->output (<<"----END OF HEADER----"); | |
898 | .\\" Automatically generated by $version | |
899 | .\\" | |
900 | .\\" Standard preamble: | |
901 | .\\" ======================================================================== | |
902 | $preamble | |
903 | .\\" ======================================================================== | |
904 | .\\" | |
905 | .IX Title "$index" | |
906 | .TH $name $section "$date" "$$self{release}" "$$self{center}" | |
42ae9e1d RGS |
907 | .\\" For nroff, turn off justification. Always turn off hyphenation; it makes |
908 | .\\" way too many mistakes in technical documents. | |
909 | .if n .ad l | |
910 | .nh | |
b7ae008f SP |
911 | ----END OF HEADER---- |
912 | $self->output (".\\\" [End of preamble]\n") if DEBUG; | |
913 | } | |
914 | ||
915 | ############################################################################## | |
916 | # Text blocks | |
917 | ############################################################################## | |
9741dab0 | 918 | |
b7ae008f SP |
919 | # Handle a basic block of text. The only tricky part of this is if this is |
920 | # the first paragraph of text after an =over, in which case we have to change | |
921 | # indentations for *roff. | |
922 | sub cmd_para { | |
923 | my ($self, $attrs, $text) = @_; | |
924 | my $line = $$attrs{start_line}; | |
bf202ccd JH |
925 | |
926 | # Output the paragraph. We also have to handle =over without =item. If | |
4213be12 HS |
927 | # there's an =over without =item, SHIFTWAIT will be set, and we need to |
928 | # handle creation of the indent here. Add the shift to SHIFTS so that it | |
929 | # will be cleaned up on =back. | |
5cdeb5a2 | 930 | $self->makespace; |
b616daaf | 931 | if ($$self{SHIFTWAIT}) { |
bf202ccd | 932 | $self->output (".RS $$self{INDENT}\n"); |
b616daaf JH |
933 | push (@{ $$self{SHIFTS} }, $$self{INDENT}); |
934 | $$self{SHIFTWAIT} = 0; | |
bf202ccd | 935 | } |
9741dab0 | 936 | |
b7ae008f SP |
937 | # Add the line number for debugging, but not in the NAME section just in |
938 | # case the comment would confuse apropos. | |
939 | $self->output (".\\\" [At source line $line]\n") | |
940 | if defined ($line) && DEBUG && !$$self{IN_NAME}; | |
9741dab0 | 941 | |
b7ae008f SP |
942 | # Force exactly one newline at the end and strip unwanted trailing |
943 | # whitespace at the end. | |
944 | $text =~ s/\s*$/\n/; | |
9741dab0 | 945 | |
b7ae008f SP |
946 | # Output the paragraph. |
947 | $self->output ($self->protect ($self->textmapfonts ($text))); | |
948 | $self->outindex; | |
949 | $$self{NEEDSPACE} = 1; | |
950 | return ''; | |
951 | } | |
5cdeb5a2 | 952 | |
b7ae008f SP |
953 | # Handle a verbatim paragraph. Put a null token at the beginning of each line |
954 | # to protect against commands and wrap in .Vb/.Ve (which we define in our | |
955 | # prelude). | |
956 | sub cmd_verbatim { | |
957 | my ($self, $attrs, $text) = @_; | |
958 | ||
959 | # Ignore an empty verbatim paragraph. | |
960 | return unless $text =~ /\S/; | |
961 | ||
962 | # Force exactly one newline at the end and strip unwanted trailing | |
963 | # whitespace at the end. | |
964 | $text =~ s/\s*$/\n/; | |
965 | ||
966 | # Get a count of the number of lines before the first blank line, which | |
967 | # we'll pass to .Vb as its parameter. This tells *roff to keep that many | |
968 | # lines together. We don't want to tell *roff to keep huge blocks | |
969 | # together. | |
970 | my @lines = split (/\n/, $text); | |
971 | my $unbroken = 0; | |
972 | for (@lines) { | |
973 | last if /^\s*$/; | |
974 | $unbroken++; | |
9741dab0 | 975 | } |
b7ae008f | 976 | $unbroken = 10 if ($unbroken > 12 && !$$self{MAGIC_VNOPAGEBREAK_LIMIT}); |
9741dab0 | 977 | |
b7ae008f SP |
978 | # Prepend a null token to each line. |
979 | $text =~ s/^/\\&/gm; | |
9741dab0 | 980 | |
b7ae008f SP |
981 | # Output the results. |
982 | $self->makespace; | |
983 | $self->output (".Vb $unbroken\n$text.Ve\n"); | |
984 | $$self{NEEDSPACE} = 1; | |
985 | return ''; | |
9741dab0 GS |
986 | } |
987 | ||
b7ae008f SP |
988 | # Handle literal text (produced by =for and similar constructs). Just output |
989 | # it with the minimum of changes. | |
990 | sub cmd_data { | |
991 | my ($self, $attrs, $text) = @_; | |
992 | $text =~ s/^\n+//; | |
993 | $text =~ s/\n{0,2}$/\n/; | |
994 | $self->output ($text); | |
995 | return ''; | |
996 | } | |
9741dab0 | 997 | |
3c014959 | 998 | ############################################################################## |
b7ae008f | 999 | # Headings |
3c014959 | 1000 | ############################################################################## |
9741dab0 | 1001 | |
b7ae008f SP |
1002 | # Common code for all headings. This is called before the actual heading is |
1003 | # output. It returns the cleaned up heading text (putting the heading all on | |
1004 | # one line) and may do other things, like closing bad =item blocks. | |
1005 | sub heading_common { | |
1006 | my ($self, $text, $line) = @_; | |
1007 | $text =~ s/\s+$//; | |
1008 | $text =~ s/\s*\n\s*/ /g; | |
9741dab0 | 1009 | |
b7ae008f SP |
1010 | # This should never happen; it means that we have a heading after =item |
1011 | # without an intervening =back. But just in case, handle it anyway. | |
5cdeb5a2 JH |
1012 | if ($$self{ITEMS} > 1) { |
1013 | $$self{ITEMS} = 0; | |
1014 | $self->output (".PD\n"); | |
1015 | } | |
b7ae008f SP |
1016 | |
1017 | # Output the current source line. | |
1018 | $self->output ( ".\\\" [At source line $line]\n" ) | |
1019 | if defined ($line) && DEBUG; | |
1020 | return $text; | |
1021 | } | |
1022 | ||
1023 | # First level heading. We can't output .IX in the NAME section due to a bug | |
1024 | # in some versions of catman, so don't output a .IX for that section. .SH | |
1025 | # already uses small caps, so remove \s0 and \s-1. Maintain IN_NAME as | |
1026 | # appropriate. | |
1027 | sub cmd_head1 { | |
1028 | my ($self, $attrs, $text) = @_; | |
1029 | $text =~ s/\\s-?\d//g; | |
1030 | $text = $self->heading_common ($text, $$attrs{start_line}); | |
1031 | my $isname = ($text eq 'NAME' || $text =~ /\(NAME\)/); | |
1032 | $self->output ($self->switchquotes ('.SH', $self->mapfonts ($text))); | |
1033 | $self->outindex ('Header', $text) unless $isname; | |
9741dab0 | 1034 | $$self{NEEDSPACE} = 0; |
b7ae008f SP |
1035 | $$self{IN_NAME} = $isname; |
1036 | return ''; | |
9741dab0 GS |
1037 | } |
1038 | ||
1039 | # Second level heading. | |
1040 | sub cmd_head2 { | |
b7ae008f SP |
1041 | my ($self, $attrs, $text) = @_; |
1042 | $text = $self->heading_common ($text, $$attrs{start_line}); | |
0e4e3f6e | 1043 | $self->output ($self->switchquotes ('.SS', $self->mapfonts ($text))); |
b7ae008f | 1044 | $self->outindex ('Subsection', $text); |
9741dab0 | 1045 | $$self{NEEDSPACE} = 0; |
b7ae008f | 1046 | return ''; |
9741dab0 GS |
1047 | } |
1048 | ||
b7ae008f SP |
1049 | # Third level heading. *roff doesn't have this concept, so just put the |
1050 | # heading in italics as a normal paragraph. | |
50a3fd2a | 1051 | sub cmd_head3 { |
b7ae008f SP |
1052 | my ($self, $attrs, $text) = @_; |
1053 | $text = $self->heading_common ($text, $$attrs{start_line}); | |
50a3fd2a | 1054 | $self->makespace; |
b7ae008f SP |
1055 | $self->output ($self->textmapfonts ('\f(IS' . $text . '\f(IE') . "\n"); |
1056 | $self->outindex ('Subsection', $text); | |
50a3fd2a | 1057 | $$self{NEEDSPACE} = 1; |
b7ae008f | 1058 | return ''; |
50a3fd2a RA |
1059 | } |
1060 | ||
b7ae008f SP |
1061 | # Fourth level heading. *roff doesn't have this concept, so just put the |
1062 | # heading as a normal paragraph. | |
50a3fd2a | 1063 | sub cmd_head4 { |
b7ae008f SP |
1064 | my ($self, $attrs, $text) = @_; |
1065 | $text = $self->heading_common ($text, $$attrs{start_line}); | |
50a3fd2a | 1066 | $self->makespace; |
b7ae008f SP |
1067 | $self->output ($self->textmapfonts ($text) . "\n"); |
1068 | $self->outindex ('Subsection', $text); | |
50a3fd2a | 1069 | $$self{NEEDSPACE} = 1; |
b7ae008f | 1070 | return ''; |
50a3fd2a RA |
1071 | } |
1072 | ||
b7ae008f SP |
1073 | ############################################################################## |
1074 | # Formatting codes | |
1075 | ############################################################################## | |
1076 | ||
1077 | # All of the formatting codes that aren't handled internally by the parser, | |
1078 | # other than L<> and X<>. | |
1079 | sub cmd_b { return '\f(BS' . $_[2] . '\f(BE' } | |
1080 | sub cmd_i { return '\f(IS' . $_[2] . '\f(IE' } | |
1081 | sub cmd_f { return '\f(IS' . $_[2] . '\f(IE' } | |
1082 | sub cmd_c { return $_[0]->quote_literal ($_[2]) } | |
1083 | ||
1084 | # Index entries are just added to the pending entries. | |
1085 | sub cmd_x { | |
1086 | my ($self, $attrs, $text) = @_; | |
1087 | push (@{ $$self{INDEX} }, $text); | |
1088 | return ''; | |
1089 | } | |
1090 | ||
1091 | # Links reduce to the text that we're given, wrapped in angle brackets if it's | |
1092 | # a URL. | |
1093 | sub cmd_l { | |
1094 | my ($self, $attrs, $text) = @_; | |
1095 | return $$attrs{type} eq 'url' ? "<$text>" : $text; | |
1096 | } | |
1097 | ||
1098 | ############################################################################## | |
1099 | # List handling | |
1100 | ############################################################################## | |
1101 | ||
1102 | # Handle the beginning of an =over block. Takes the type of the block as the | |
1103 | # first argument, and then the attr hash. This is called by the handlers for | |
1104 | # the four different types of lists (bullet, number, text, and block). | |
1105 | sub over_common_start { | |
1106 | my ($self, $type, $attrs) = @_; | |
1107 | my $line = $$attrs{start_line}; | |
1108 | my $indent = $$attrs{indent}; | |
1109 | DEBUG > 3 and print " Starting =over $type (line $line, indent ", | |
1110 | ($indent || '?'), "\n"; | |
1111 | ||
1112 | # Find the indentation level. | |
1113 | unless (defined ($indent) && $indent =~ /^[-+]?\d{1,4}\s*$/) { | |
1114 | $indent = $$self{indent}; | |
1115 | } | |
1116 | ||
1117 | # If we've gotten multiple indentations in a row, we need to emit the | |
1118 | # pending indentation for the last level that we saw and haven't acted on | |
1119 | # yet. SHIFTS is the stack of indentations that we've actually emitted | |
1120 | # code for. | |
b616daaf | 1121 | if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) { |
9741dab0 | 1122 | $self->output (".RS $$self{INDENT}\n"); |
b616daaf | 1123 | push (@{ $$self{SHIFTS} }, $$self{INDENT}); |
9741dab0 | 1124 | } |
b7ae008f SP |
1125 | |
1126 | # Now, do record-keeping. INDENTS is a stack of indentations that we've | |
1127 | # seen so far, and INDENT is the current level of indentation. ITEMTYPES | |
1128 | # is a stack of list types that we've seen. | |
9741dab0 | 1129 | push (@{ $$self{INDENTS} }, $$self{INDENT}); |
b7ae008f SP |
1130 | push (@{ $$self{ITEMTYPES} }, $type); |
1131 | $$self{INDENT} = $indent + 0; | |
b616daaf | 1132 | $$self{SHIFTWAIT} = 1; |
9741dab0 GS |
1133 | } |
1134 | ||
b7ae008f SP |
1135 | # End an =over block. Takes no options other than the class pointer. |
1136 | # Normally, once we close a block and therefore remove something from INDENTS, | |
1137 | # INDENTS will now be longer than SHIFTS, indicating that we also need to emit | |
1138 | # *roff code to close the indent. This isn't *always* true, depending on the | |
1139 | # circumstance. If we're still inside an indentation, we need to emit another | |
1140 | # .RE and then a new .RS to unconfuse *roff. | |
1141 | sub over_common_end { | |
1142 | my ($self) = @_; | |
1143 | DEBUG > 3 and print " Ending =over\n"; | |
9741dab0 | 1144 | $$self{INDENT} = pop @{ $$self{INDENTS} }; |
b7ae008f SP |
1145 | pop @{ $$self{ITEMTYPES} }; |
1146 | ||
1147 | # If we emitted code for that indentation, end it. | |
b616daaf | 1148 | if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) { |
9741dab0 | 1149 | $self->output (".RE\n"); |
b616daaf | 1150 | pop @{ $$self{SHIFTS} }; |
9741dab0 | 1151 | } |
b7ae008f SP |
1152 | |
1153 | # If we're still in an indentation, *roff will have now lost track of the | |
1154 | # right depth of that indentation, so fix that. | |
9741dab0 GS |
1155 | if (@{ $$self{INDENTS} } > 0) { |
1156 | $self->output (".RE\n"); | |
1157 | $self->output (".RS $$self{INDENT}\n"); | |
9741dab0 GS |
1158 | } |
1159 | $$self{NEEDSPACE} = 1; | |
b616daaf | 1160 | $$self{SHIFTWAIT} = 0; |
9741dab0 GS |
1161 | } |
1162 | ||
b7ae008f SP |
1163 | # Dispatch the start and end calls as appropriate. |
1164 | sub start_over_bullet { my $s = shift; $s->over_common_start ('bullet', @_) } | |
1165 | sub start_over_number { my $s = shift; $s->over_common_start ('number', @_) } | |
1166 | sub start_over_text { my $s = shift; $s->over_common_start ('text', @_) } | |
1167 | sub start_over_block { my $s = shift; $s->over_common_start ('block', @_) } | |
1168 | sub end_over_bullet { $_[0]->over_common_end } | |
1169 | sub end_over_number { $_[0]->over_common_end } | |
1170 | sub end_over_text { $_[0]->over_common_end } | |
1171 | sub end_over_block { $_[0]->over_common_end } | |
1172 | ||
1173 | # The common handler for all item commands. Takes the type of the item, the | |
1174 | # attributes, and then the text of the item. | |
1175 | # | |
1176 | # Emit an index entry for anything that's interesting, but don't emit index | |
1177 | # entries for things like bullets and numbers. Newlines in an item title are | |
1178 | # turned into spaces since *roff can't handle them embedded. | |
1179 | sub item_common { | |
1180 | my ($self, $type, $attrs, $text) = @_; | |
1181 | my $line = $$attrs{start_line}; | |
1182 | DEBUG > 3 and print " $type item (line $line): $text\n"; | |
1183 | ||
1184 | # Clean up the text. We want to end up with two variables, one ($text) | |
1185 | # which contains any body text after taking out the item portion, and | |
1186 | # another ($item) which contains the actual item text. | |
1187 | $text =~ s/\s+$//; | |
1188 | my ($item, $index); | |
1189 | if ($type eq 'bullet') { | |
1190 | $item = "\\\(bu"; | |
1191 | $text =~ s/\n*$/\n/; | |
1192 | } elsif ($type eq 'number') { | |
1193 | $item = $$attrs{number} . '.'; | |
1194 | } else { | |
1195 | $item = $text; | |
1196 | $item =~ s/\s*\n\s*/ /g; | |
1197 | $text = ''; | |
1198 | $index = $item if ($item =~ /\w/); | |
4213be12 | 1199 | } |
b7ae008f SP |
1200 | |
1201 | # Take care of the indentation. If shifts and indents are equal, close | |
1202 | # the top shift, since we're about to create an indentation with .IP. | |
1203 | # Also output .PD 0 to turn off spacing between items if this item is | |
1204 | # directly following another one. We only have to do that once for a | |
1205 | # whole chain of items so do it for the second item in the change. Note | |
1206 | # that makespace is what undoes this. | |
b616daaf | 1207 | if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) { |
9741dab0 | 1208 | $self->output (".RE\n"); |
b616daaf | 1209 | pop @{ $$self{SHIFTS} }; |
b7ae008f SP |
1210 | } |
1211 | $self->output (".PD 0\n") if ($$self{ITEMS} == 1); | |
3c014959 | 1212 | |
b7ae008f SP |
1213 | # Now, output the item tag itself. |
1214 | $item = $self->textmapfonts ($item); | |
1215 | $self->output ($self->switchquotes ('.IP', $item, $$self{INDENT})); | |
1216 | $$self{NEEDSPACE} = 0; | |
1217 | $$self{ITEMS}++; | |
1218 | $$self{SHIFTWAIT} = 0; | |
3c014959 | 1219 | |
b7ae008f SP |
1220 | # If body text for this item was included, go ahead and output that now. |
1221 | if ($text) { | |
1222 | $text =~ s/\s*$/\n/; | |
1223 | $self->makespace; | |
1224 | $self->output ($self->protect ($self->textmapfonts ($text))); | |
1225 | $$self{NEEDSPACE} = 1; | |
1226 | } | |
1227 | $self->outindex ($index ? ('Item', $index) : ()); | |
3c014959 JH |
1228 | } |
1229 | ||
b7ae008f SP |
1230 | # Dispatch the item commands to the appropriate place. |
1231 | sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) } | |
1232 | sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) } | |
1233 | sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) } | |
1234 | sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) } | |
9741dab0 | 1235 | |
3c014959 | 1236 | ############################################################################## |
8f202758 SP |
1237 | # Backward compatibility |
1238 | ############################################################################## | |
1239 | ||
1240 | # Reset the underlying Pod::Simple object between calls to parse_from_file so | |
1241 | # that the same object can be reused to convert multiple pages. | |
1242 | sub parse_from_file { | |
1243 | my $self = shift; | |
1244 | $self->reinit; | |
42ae9e1d RGS |
1245 | |
1246 | # Fake the old cutting option to Pod::Parser. This fiddings with internal | |
1247 | # Pod::Simple state and is quite ugly; we need a better approach. | |
1248 | if (ref ($_[0]) eq 'HASH') { | |
1249 | my $opts = shift @_; | |
1250 | if (defined ($$opts{-cutting}) && !$$opts{-cutting}) { | |
1251 | $$self{in_pod} = 1; | |
1252 | $$self{last_was_blank} = 1; | |
1253 | } | |
1254 | } | |
1255 | ||
1256 | # Do the work. | |
8f202758 | 1257 | my $retval = $self->SUPER::parse_from_file (@_); |
42ae9e1d RGS |
1258 | |
1259 | # Flush output, since Pod::Simple doesn't do this. Ideally we should also | |
1260 | # close the file descriptor if we had to open one, but we can't easily | |
1261 | # figure this out. | |
8f202758 SP |
1262 | my $fh = $self->output_fh (); |
1263 | my $oldfh = select $fh; | |
1264 | my $oldflush = $|; | |
1265 | $| = 1; | |
1266 | print $fh ''; | |
1267 | $| = $oldflush; | |
1268 | select $oldfh; | |
1269 | return $retval; | |
1270 | } | |
1271 | ||
fcf69717 SP |
1272 | # Pod::Simple failed to provide this backward compatibility function, so |
1273 | # implement it ourselves. File handles are one of the inputs that | |
1274 | # parse_from_file supports. | |
1275 | sub parse_from_filehandle { | |
1276 | my $self = shift; | |
1277 | $self->parse_from_file (@_); | |
1278 | } | |
1279 | ||
8f202758 | 1280 | ############################################################################## |
b7ae008f | 1281 | # Translation tables |
3c014959 | 1282 | ############################################################################## |
9741dab0 | 1283 | |
b7ae008f SP |
1284 | # The following table is adapted from Tom Christiansen's pod2man. It assumes |
1285 | # that the standard preamble has already been printed, since that's what | |
1286 | # defines all of the accent marks. We really want to do something better than | |
1287 | # this when *roff actually supports other character sets itself, since these | |
1288 | # results are pretty poor. | |
1289 | # | |
1290 | # This only works in an ASCII world. What to do in a non-ASCII world is very | |
0e4e3f6e | 1291 | # unclear -- hopefully we can assume UTF-8 and just leave well enough alone. |
b7ae008f SP |
1292 | @ESCAPES{0xA0 .. 0xFF} = ( |
1293 | "\\ ", undef, undef, undef, undef, undef, undef, undef, | |
1294 | undef, undef, undef, undef, undef, "\\%", undef, undef, | |
9741dab0 | 1295 | |
b7ae008f SP |
1296 | undef, undef, undef, undef, undef, undef, undef, undef, |
1297 | undef, undef, undef, undef, undef, undef, undef, undef, | |
9741dab0 | 1298 | |
b7ae008f SP |
1299 | "A\\*`", "A\\*'", "A\\*^", "A\\*~", "A\\*:", "A\\*o", "\\*(AE", "C\\*,", |
1300 | "E\\*`", "E\\*'", "E\\*^", "E\\*:", "I\\*`", "I\\*'", "I\\*^", "I\\*:", | |
9741dab0 | 1301 | |
b7ae008f SP |
1302 | "\\*(D-", "N\\*~", "O\\*`", "O\\*'", "O\\*^", "O\\*~", "O\\*:", undef, |
1303 | "O\\*/", "U\\*`", "U\\*'", "U\\*^", "U\\*:", "Y\\*'", "\\*(Th", "\\*8", | |
50a3fd2a | 1304 | |
b7ae008f SP |
1305 | "a\\*`", "a\\*'", "a\\*^", "a\\*~", "a\\*:", "a\\*o", "\\*(ae", "c\\*,", |
1306 | "e\\*`", "e\\*'", "e\\*^", "e\\*:", "i\\*`", "i\\*'", "i\\*^", "i\\*:", | |
3c014959 | 1307 | |
b7ae008f SP |
1308 | "\\*(d-", "n\\*~", "o\\*`", "o\\*'", "o\\*^", "o\\*~", "o\\*:", undef, |
1309 | "o\\*/" , "u\\*`", "u\\*'", "u\\*^", "u\\*:", "y\\*'", "\\*(th", "y\\*:", | |
1310 | ) if ASCII; | |
3c014959 | 1311 | |
b7ae008f SP |
1312 | ############################################################################## |
1313 | # Premable | |
1314 | ############################################################################## | |
1315 | ||
1316 | # The following is the static preamble which starts all *roff output we | |
0e4e3f6e SH |
1317 | # generate. Most is static except for the font to use as a fixed-width font, |
1318 | # which is designed by @CFONT@, and the left and right quotes to use for C<> | |
1319 | # text, designated by @LQOUTE@ and @RQUOTE@. However, the second part, which | |
1320 | # defines the accent marks, is only used if $escapes is set to true. | |
b7ae008f | 1321 | sub preamble_template { |
0e4e3f6e SH |
1322 | my ($self, $accents) = @_; |
1323 | my $preamble = <<'----END OF PREAMBLE----'; | |
b7ae008f SP |
1324 | .de Sp \" Vertical space (when we can't use .PP) |
1325 | .if t .sp .5v | |
1326 | .if n .sp | |
1327 | .. | |
1328 | .de Vb \" Begin verbatim text | |
1329 | .ft @CFONT@ | |
1330 | .nf | |
1331 | .ne \\$1 | |
1332 | .. | |
1333 | .de Ve \" End verbatim text | |
1334 | .ft R | |
1335 | .fi | |
1336 | .. | |
1337 | .\" Set up some character translations and predefined strings. \*(-- will | |
1338 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
42ae9e1d RGS |
1339 | .\" double quote, and \*(R" will give a right double quote. \*(C+ will |
1340 | .\" give a nicer C++. Capital omega is used to do unbreakable dashes and | |
1341 | .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, | |
1342 | .\" nothing in troff, for use with C<>. | |
1343 | .tr \(*W- | |
b7ae008f SP |
1344 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
1345 | .ie n \{\ | |
1346 | . ds -- \(*W- | |
1347 | . ds PI pi | |
1348 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
1349 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
1350 | . ds L" "" | |
1351 | . ds R" "" | |
1352 | . ds C` @LQUOTE@ | |
1353 | . ds C' @RQUOTE@ | |
1354 | 'br\} | |
1355 | .el\{\ | |
1356 | . ds -- \|\(em\| | |
1357 | . ds PI \(*p | |
1358 | . ds L" `` | |
1359 | . ds R" '' | |
1360 | 'br\} | |
1361 | .\" | |
40dcca8a RGS |
1362 | .\" Escape single quotes in literal strings from groff's Unicode transform. |
1363 | .ie \n(.g .ds Aq \(aq | |
1364 | .el .ds Aq ' | |
1365 | .\" | |
b7ae008f | 1366 | .\" If the F register is turned on, we'll generate index entries on stderr for |
0e4e3f6e | 1367 | .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index |
b7ae008f SP |
1368 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
1369 | .\" output yourself in some meaningful fashion. | |
40dcca8a | 1370 | .ie \nF \{\ |
b7ae008f SP |
1371 | . de IX |
1372 | . tm Index:\\$1\t\\n%\t"\\$2" | |
1373 | .. | |
1374 | . nr % 0 | |
1375 | . rr F | |
1376 | .\} | |
40dcca8a RGS |
1377 | .el \{\ |
1378 | . de IX | |
1379 | .. | |
1380 | .\} | |
0e4e3f6e SH |
1381 | ----END OF PREAMBLE---- |
1382 | ||
1383 | if ($accents) { | |
1384 | $preamble .= <<'----END OF PREAMBLE----' | |
b7ae008f | 1385 | .\" |
b7ae008f SP |
1386 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
1387 | .\" Fear. Run. Save yourself. No user-serviceable parts. | |
1388 | . \" fudge factors for nroff and troff | |
1389 | .if n \{\ | |
1390 | . ds #H 0 | |
1391 | . ds #V .8m | |
1392 | . ds #F .3m | |
1393 | . ds #[ \f1 | |
1394 | . ds #] \fP | |
1395 | .\} | |
1396 | .if t \{\ | |
1397 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
1398 | . ds #V .6m | |
1399 | . ds #F 0 | |
1400 | . ds #[ \& | |
1401 | . ds #] \& | |
1402 | .\} | |
1403 | . \" simple accents for nroff and troff | |
1404 | .if n \{\ | |
1405 | . ds ' \& | |
1406 | . ds ` \& | |
1407 | . ds ^ \& | |
1408 | . ds , \& | |
1409 | . ds ~ ~ | |
1410 | . ds / | |
1411 | .\} | |
1412 | .if t \{\ | |
1413 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
1414 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
1415 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
1416 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
1417 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
1418 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
1419 | .\} | |
1420 | . \" troff and (daisy-wheel) nroff accents | |
1421 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
1422 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
1423 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
1424 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
1425 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
1426 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
1427 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
1428 | .ds ae a\h'-(\w'a'u*4/10)'e | |
1429 | .ds Ae A\h'-(\w'A'u*4/10)'E | |
1430 | . \" corrections for vroff | |
1431 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
1432 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
1433 | . \" for low resolution devices (crt and lpr) | |
1434 | .if \n(.H>23 .if \n(.V>19 \ | |
1435 | \{\ | |
1436 | . ds : e | |
1437 | . ds 8 ss | |
1438 | . ds o a | |
1439 | . ds d- d\h'-1'\(ga | |
1440 | . ds D- D\h'-1'\(hy | |
1441 | . ds th \o'bp' | |
1442 | . ds Th \o'LP' | |
1443 | . ds ae ae | |
1444 | . ds Ae AE | |
1445 | .\} | |
1446 | .rm #[ #] #H #V #F C | |
1447 | ----END OF PREAMBLE---- | |
1448 | #`# for cperl-mode | |
0e4e3f6e SH |
1449 | } |
1450 | return $preamble; | |
50a3fd2a RA |
1451 | } |
1452 | ||
3c014959 | 1453 | ############################################################################## |
5e2effed | 1454 | # Module return value and documentation |
3c014959 | 1455 | ############################################################################## |
9741dab0 | 1456 | |
5e2effed JH |
1457 | 1; |
1458 | __END__ | |
1459 | ||
9741dab0 GS |
1460 | =head1 NAME |
1461 | ||
1462 | Pod::Man - Convert POD data to formatted *roff input | |
1463 | ||
0e4e3f6e | 1464 | =for stopwords |
bc9c7511 | 1465 | en em ALLCAPS teeny fixedbold fixeditalic fixedbolditalic stderr utf8 |
2504ae52 SH |
1466 | UTF-8 Allbery Sean Burke Ossanna Solaris formatters troff uppercased |
1467 | Christiansen | |
0e4e3f6e | 1468 | |
9741dab0 GS |
1469 | =head1 SYNOPSIS |
1470 | ||
1471 | use Pod::Man; | |
1472 | my $parser = Pod::Man->new (release => $VERSION, section => 8); | |
1473 | ||
1474 | # Read POD from STDIN and write to STDOUT. | |
b7ae008f | 1475 | $parser->parse_file (\*STDIN); |
9741dab0 GS |
1476 | |
1477 | # Read POD from file.pod and write to file.1. | |
1478 | $parser->parse_from_file ('file.pod', 'file.1'); | |
1479 | ||
1480 | =head1 DESCRIPTION | |
1481 | ||
1482 | Pod::Man is a module to convert documentation in the POD format (the | |
1483 | preferred language for documenting Perl) into *roff input using the man | |
1484 | macro set. The resulting *roff code is suitable for display on a terminal | |
bf202ccd JH |
1485 | using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>. |
1486 | It is conventionally invoked using the driver script B<pod2man>, but it can | |
1487 | also be used directly. | |
9741dab0 | 1488 | |
b7ae008f SP |
1489 | As a derived class from Pod::Simple, Pod::Man supports the same methods and |
1490 | interfaces. See L<Pod::Simple> for all the details. | |
9741dab0 GS |
1491 | |
1492 | new() can take options, in the form of key/value pairs that control the | |
1493 | behavior of the parser. See below for details. | |
1494 | ||
1495 | If no options are given, Pod::Man uses the name of the input file with any | |
1496 | trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to | |
1497 | section 1 unless the file ended in C<.pm> in which case it defaults to | |
1498 | section 3, to a centered title of "User Contributed Perl Documentation", to | |
1499 | a centered footer of the Perl version it is run with, and to a left-hand | |
1500 | footer of the modification date of its input (or the current date if given | |
0e4e3f6e | 1501 | C<STDIN> for input). |
9741dab0 GS |
1502 | |
1503 | Pod::Man assumes that your *roff formatters have a fixed-width font named | |
0e4e3f6e SH |
1504 | C<CW>. If yours is called something else (like C<CR>), use the C<fixed> |
1505 | option to specify it. This generally only matters for troff output for | |
1506 | printing. Similarly, you can set the fonts used for bold, italic, and | |
1507 | bold italic fixed-width output. | |
1508 | ||
1509 | Besides the obvious pod conversions, Pod::Man also takes care of | |
1510 | formatting func(), func(3), and simple variable references like $foo or | |
1511 | @bar so you don't have to use code escapes for them; complex expressions | |
1512 | like C<$fred{'stuff'}> will still need to be escaped, though. It also | |
1513 | translates dashes that aren't used as hyphens into en dashes, makes long | |
1514 | dashes--like this--into proper em dashes, fixes "paired quotes," makes C++ | |
1515 | look right, puts a little space between double underscores, makes ALLCAPS | |
1516 | a teeny bit smaller in B<troff>, and escapes stuff that *roff treats as | |
1517 | special so that you don't have to. | |
9741dab0 GS |
1518 | |
1519 | The recognized options to new() are as follows. All options take a single | |
1520 | argument. | |
1521 | ||
1522 | =over 4 | |
1523 | ||
1524 | =item center | |
1525 | ||
1526 | Sets the centered page header to use instead of "User Contributed Perl | |
1527 | Documentation". | |
1528 | ||
1529 | =item date | |
1530 | ||
1531 | Sets the left-hand footer. By default, the modification date of the input | |
1532 | file will be used, or the current date if stat() can't find that file (the | |
0e4e3f6e SH |
1533 | case if the input is from C<STDIN>), and the date will be formatted as |
1534 | C<YYYY-MM-DD>. | |
9741dab0 GS |
1535 | |
1536 | =item fixed | |
1537 | ||
0e4e3f6e SH |
1538 | The fixed-width font to use for verbatim text and code. Defaults to |
1539 | C<CW>. Some systems may want C<CR> instead. Only matters for B<troff> | |
1540 | output. | |
9741dab0 GS |
1541 | |
1542 | =item fixedbold | |
1543 | ||
0e4e3f6e SH |
1544 | Bold version of the fixed-width font. Defaults to C<CB>. Only matters |
1545 | for B<troff> output. | |
9741dab0 GS |
1546 | |
1547 | =item fixeditalic | |
1548 | ||
1549 | Italic version of the fixed-width font (actually, something of a misnomer, | |
1550 | since most fixed-width fonts only have an oblique version, not an italic | |
0e4e3f6e | 1551 | version). Defaults to C<CI>. Only matters for B<troff> output. |
9741dab0 GS |
1552 | |
1553 | =item fixedbolditalic | |
1554 | ||
1555 | Bold italic (probably actually oblique) version of the fixed-width font. | |
0e4e3f6e SH |
1556 | Pod::Man doesn't assume you have this, and defaults to C<CB>. Some |
1557 | systems (such as Solaris) have this font available as C<CX>. Only matters | |
1558 | for B<troff> output. | |
9741dab0 | 1559 | |
bf202ccd JH |
1560 | =item name |
1561 | ||
1562 | Set the name of the manual page. Without this option, the manual name is | |
1563 | set to the uppercased base name of the file being converted unless the | |
1564 | manual section is 3, in which case the path is parsed to see if it is a Perl | |
1565 | module path. If it is, a path like C<.../lib/Pod/Man.pm> is converted into | |
1566 | a name like C<Pod::Man>. This option, if given, overrides any automatic | |
1567 | determination of the name. | |
1568 | ||
ab1f1d91 JH |
1569 | =item quotes |
1570 | ||
1571 | Sets the quote marks used to surround CE<lt>> text. If the value is a | |
1572 | single character, it is used as both the left and right quote; if it is two | |
1573 | characters, the first character is used as the left quote and the second as | |
1574 | the right quoted; and if it is four characters, the first two are used as | |
1575 | the left quote and the second two as the right quote. | |
1576 | ||
1577 | This may also be set to the special value C<none>, in which case no quote | |
1578 | marks are added around CE<lt>> text (but the font is still changed for troff | |
1579 | output). | |
1580 | ||
9741dab0 GS |
1581 | =item release |
1582 | ||
1583 | Set the centered footer. By default, this is the version of Perl you run | |
bf202ccd | 1584 | Pod::Man under. Note that some system an macro sets assume that the |
9741dab0 GS |
1585 | centered footer will be a modification date and will prepend something like |
1586 | "Last modified: "; if this is the case, you may want to set C<release> to | |
1587 | the last modified date and C<date> to the version number. | |
1588 | ||
1589 | =item section | |
1590 | ||
1591 | Set the section for the C<.TH> macro. The standard section numbering | |
1592 | convention is to use 1 for user commands, 2 for system calls, 3 for | |
1593 | functions, 4 for devices, 5 for file formats, 6 for games, 7 for | |
1594 | miscellaneous information, and 8 for administrator commands. There is a lot | |
1595 | of variation here, however; some systems (like Solaris) use 4 for file | |
1596 | formats, 5 for miscellaneous information, and 7 for devices. Still others | |
1597 | use 1m instead of 8, or some mix of both. About the only section numbers | |
1598 | that are reliably consistent are 1, 2, and 3. | |
1599 | ||
0e4e3f6e SH |
1600 | By default, section 1 will be used unless the file ends in C<.pm> in which |
1601 | case section 3 will be selected. | |
9741dab0 | 1602 | |
bc9c7511 NC |
1603 | =item stderr |
1604 | ||
1605 | Send error messages about invalid POD to standard error instead of | |
1606 | appending a POD ERRORS section to the generated *roff output. | |
1607 | ||
55595e83 SP |
1608 | =item utf8 |
1609 | ||
1610 | By default, Pod::Man produces the most conservative possible *roff output | |
1611 | to try to ensure that it will work with as many different *roff | |
1612 | implementations as possible. Many *roff implementations cannot handle | |
1613 | non-ASCII characters, so this means all non-ASCII characters are converted | |
1614 | either to a *roff escape sequence that tries to create a properly accented | |
1615 | character (at least for troff output) or to C<X>. | |
1616 | ||
1617 | If this option is set, Pod::Man will instead output UTF-8. If your *roff | |
1618 | implementation can handle it, this is the best output format to use and | |
1619 | avoids corruption of documents containing non-ASCII characters. However, | |
1620 | be warned that *roff source with literal UTF-8 characters is not supported | |
1621 | by many implementations and may even result in segfaults and other bad | |
1622 | behavior. | |
1623 | ||
9f2f055a SH |
1624 | Be aware that, when using this option, the input encoding of your POD |
1625 | source must be properly declared unless it is US-ASCII or Latin-1. POD | |
1626 | input without an C<=encoding> command will be assumed to be in Latin-1, | |
1627 | and if it's actually in UTF-8, the output will be double-encoded. See | |
1628 | L<perlpod(1)> for more information on the C<=encoding> command. | |
1629 | ||
9741dab0 GS |
1630 | =back |
1631 | ||
b7ae008f | 1632 | The standard Pod::Simple method parse_file() takes one argument naming the |
0e4e3f6e SH |
1633 | POD file to read from. By default, the output is sent to C<STDOUT>, but |
1634 | this can be changed with the output_fd() method. | |
b7ae008f SP |
1635 | |
1636 | The standard Pod::Simple method parse_from_file() takes up to two | |
1637 | arguments, the first being the input file to read POD from and the second | |
1638 | being the file to write the formatted output to. | |
1639 | ||
1640 | You can also call parse_lines() to parse an array of lines or | |
1641 | parse_string_document() to parse a document already in memory. To put the | |
1642 | output into a string instead of a file handle, call the output_string() | |
1643 | method. See L<Pod::Simple> for the specific details. | |
9741dab0 GS |
1644 | |
1645 | =head1 DIAGNOSTICS | |
1646 | ||
1647 | =over 4 | |
1648 | ||
ab1f1d91 | 1649 | =item roff font should be 1 or 2 chars, not "%s" |
9741dab0 GS |
1650 | |
1651 | (F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that | |
1652 | wasn't either one or two characters. Pod::Man doesn't support *roff fonts | |
1653 | longer than two characters, although some *roff extensions do (the canonical | |
bf202ccd | 1654 | versions of B<nroff> and B<troff> don't either). |
9741dab0 | 1655 | |
ab1f1d91 JH |
1656 | =item Invalid quote specification "%s" |
1657 | ||
1658 | (F) The quote specification given (the quotes option to the constructor) was | |
1659 | invalid. A quote specification must be one, two, or four characters long. | |
1660 | ||
9741dab0 GS |
1661 | =back |
1662 | ||
1663 | =head1 BUGS | |
1664 | ||
9f2f055a SH |
1665 | Encoding handling assumes that PerlIO is available and does not work |
1666 | properly if it isn't. The C<utf8> option is therefore not supported | |
1667 | unless Perl is built with PerlIO support. | |
1668 | ||
b4558dc4 JH |
1669 | There is currently no way to turn off the guesswork that tries to format |
1670 | unmarked text appropriately, and sometimes it isn't wanted (particularly | |
b7ae008f | 1671 | when using POD to document something other than Perl). Most of the work |
9f2f055a | 1672 | toward fixing this has now been done, however, and all that's still needed |
b7ae008f | 1673 | is a user interface. |
9741dab0 GS |
1674 | |
1675 | The NAME section should be recognized specially and index entries emitted | |
1676 | for everything in that section. This would have to be deferred until the | |
1677 | next section, since extraneous things in NAME tends to confuse various man | |
b7ae008f SP |
1678 | page processors. Currently, no index entries are emitted for anything in |
1679 | NAME. | |
9741dab0 | 1680 | |
9741dab0 | 1681 | Pod::Man doesn't handle font names longer than two characters. Neither do |
bf202ccd | 1682 | most B<troff> implementations, but GNU troff does as an extension. It would |
9741dab0 GS |
1683 | be nice to support as an option for those who want to use it. |
1684 | ||
b7ae008f SP |
1685 | The preamble added to each output file is rather verbose, and most of it |
1686 | is only necessary in the presence of non-ASCII characters. It would | |
1687 | ideally be nice if all of those definitions were only output if needed, | |
1688 | perhaps on the fly as the characters are used. | |
9741dab0 | 1689 | |
9741dab0 GS |
1690 | Pod::Man is excessively slow. |
1691 | ||
b4558dc4 JH |
1692 | =head1 CAVEATS |
1693 | ||
9f2f055a SH |
1694 | If Pod::Man is given the C<utf8> option, the encoding of its output file |
1695 | handle will be forced to UTF-8 if possible, overriding any existing | |
1696 | encoding. This will be done even if the file handle is not created by | |
1697 | Pod::Man and was passed in from outside. This maintains consistency | |
1698 | regardless of PERL_UNICODE and other settings. | |
1699 | ||
b4558dc4 JH |
1700 | The handling of hyphens and em dashes is somewhat fragile, and one may get |
1701 | the wrong one under some circumstances. This should only matter for | |
1702 | B<troff> output. | |
1703 | ||
1704 | When and whether to use small caps is somewhat tricky, and Pod::Man doesn't | |
1705 | necessarily get it right. | |
1706 | ||
b7ae008f SP |
1707 | Converting neutral double quotes to properly matched double quotes doesn't |
1708 | work unless there are no formatting codes between the quote marks. This | |
1709 | only matters for troff output. | |
1710 | ||
1711 | =head1 AUTHOR | |
1712 | ||
1713 | Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original | |
1714 | B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>. The modifications to | |
1715 | work with Pod::Simple instead of Pod::Parser were originally contributed by | |
1716 | Sean Burke (but I've since hacked them beyond recognition and all bugs are | |
1717 | mine). | |
1718 | ||
1719 | =head1 COPYRIGHT AND LICENSE | |
1720 | ||
9d1cda07 | 1721 | Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
0e4e3f6e | 1722 | Russ Allbery <rra@stanford.edu>. |
b7ae008f SP |
1723 | |
1724 | This program is free software; you may redistribute it and/or modify it | |
1725 | under the same terms as Perl itself. | |
1726 | ||
9741dab0 GS |
1727 | =head1 SEE ALSO |
1728 | ||
b7ae008f | 1729 | L<Pod::Simple>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>, |
bf202ccd | 1730 | L<man(1)>, L<man(7)> |
9741dab0 GS |
1731 | |
1732 | Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual," | |
1733 | Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is | |
bf202ccd JH |
1734 | the best documentation of standard B<nroff> and B<troff>. At the time of |
1735 | this writing, it's available at | |
1736 | L<http://www.cs.bell-labs.com/cm/cs/cstr.html>. | |
9741dab0 | 1737 | |
bf202ccd JH |
1738 | The man page documenting the man macro set may be L<man(5)> instead of |
1739 | L<man(7)> on your system. Also, please see L<pod2man(1)> for extensive | |
1740 | documentation on writing manual pages if you've not done it before and | |
1741 | aren't familiar with the conventions. | |
9741dab0 | 1742 | |
fd20da51 JH |
1743 | The current version of this module is always available from its web site at |
1744 | L<http://www.eyrie.org/~eagle/software/podlators/>. It is also part of the | |
1745 | Perl core distribution as of 5.6.0. | |
1746 | ||
9741dab0 | 1747 | =cut |