This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate change #18420 from maint-5.8:
[perl5.git] / lib / Pod / Text.pm
CommitLineData
6055f9d4 1# Pod::Text -- Convert POD data to formatted ASCII text.
11f72409 2# $Id: Text.pm,v 2.21 2002/08/04 03:34:58 eagle Exp $
6055f9d4 3#
f011ec7d 4# Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>
6055f9d4 5#
3c014959 6# This program is free software; you may redistribute it and/or modify it
6055f9d4
GS
7# under the same terms as Perl itself.
8#
5ec554fb
JH
9# This module converts POD to formatted text. It replaces the old Pod::Text
10# module that came with versions of Perl prior to 5.6.0 and attempts to match
11# its output except for some specific circumstances where other decisions
12# seemed to produce better output. It uses Pod::Parser and is designed to be
13# very easy to subclass.
3c014959
JH
14#
15# Perl core hackers, please note that this module is also separately
16# maintained outside of the Perl core as part of the podlators. Please send
17# me any patches at the address above in addition to sending them to the
18# standard Perl mailing lists.
6055f9d4 19
3c014959 20##############################################################################
6055f9d4 21# Modules and declarations
3c014959 22##############################################################################
69e00e79 23
6055f9d4 24package Pod::Text;
69e00e79 25
6055f9d4
GS
26require 5.004;
27
27f805f4 28use Carp qw(carp croak);
2e20e14f 29use Exporter ();
bf202ccd 30use Pod::ParseLink qw(parselink);
27f805f4 31use Pod::Select ();
6055f9d4
GS
32
33use strict;
2e20e14f 34use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
6055f9d4 35
3c014959
JH
36# We inherit from Pod::Select instead of Pod::Parser so that we can be used by
37# Pod::Usage.
2e20e14f 38@ISA = qw(Pod::Select Exporter);
6055f9d4 39
2e20e14f
GS
40# We have to export pod2text for backward compatibility.
41@EXPORT = qw(pod2text);
42
3c014959
JH
43# Don't use the CVS revision as the version, since this module is also in Perl
44# core and too many things could munge CVS magic revision strings. This
45# number should ideally be the same as the CVS revision in podlators, however.
11f72409 46$VERSION = 2.21;
6055f9d4
GS
47
48
3c014959 49##############################################################################
6055f9d4 50# Table of supported E<> escapes
3c014959 51##############################################################################
6055f9d4 52
3c014959
JH
53# This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
54# got it near verbatim from the original Pod::Text. It is therefore credited
55# to Tom Christiansen, and I'm glad I didn't have to write it. :) "iexcl" to
56# "divide" added by Tim Jenness.
6055f9d4
GS
57%ESCAPES = (
58 'amp' => '&', # ampersand
bf202ccd 59 'apos' => "'", # apostrophe
6055f9d4
GS
60 'lt' => '<', # left chevron, less-than
61 'gt' => '>', # right chevron, greater-than
62 'quot' => '"', # double quote
ab1f1d91 63 'sol' => '/', # solidus (forward slash)
be3174d2 64 'verbar' => '|', # vertical bar
5cdeb5a2 65
6055f9d4
GS
66 "Aacute" => "\xC1", # capital A, acute accent
67 "aacute" => "\xE1", # small a, acute accent
68 "Acirc" => "\xC2", # capital A, circumflex accent
69 "acirc" => "\xE2", # small a, circumflex accent
70 "AElig" => "\xC6", # capital AE diphthong (ligature)
71 "aelig" => "\xE6", # small ae diphthong (ligature)
72 "Agrave" => "\xC0", # capital A, grave accent
73 "agrave" => "\xE0", # small a, grave accent
74 "Aring" => "\xC5", # capital A, ring
75 "aring" => "\xE5", # small a, ring
76 "Atilde" => "\xC3", # capital A, tilde
77 "atilde" => "\xE3", # small a, tilde
78 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
79 "auml" => "\xE4", # small a, dieresis or umlaut mark
80 "Ccedil" => "\xC7", # capital C, cedilla
81 "ccedil" => "\xE7", # small c, cedilla
82 "Eacute" => "\xC9", # capital E, acute accent
83 "eacute" => "\xE9", # small e, acute accent
84 "Ecirc" => "\xCA", # capital E, circumflex accent
85 "ecirc" => "\xEA", # small e, circumflex accent
86 "Egrave" => "\xC8", # capital E, grave accent
87 "egrave" => "\xE8", # small e, grave accent
88 "ETH" => "\xD0", # capital Eth, Icelandic
89 "eth" => "\xF0", # small eth, Icelandic
90 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
91 "euml" => "\xEB", # small e, dieresis or umlaut mark
ee89c1da
JH
92 "Iacute" => "\xCD", # capital I, acute accent
93 "iacute" => "\xED", # small i, acute accent
6055f9d4
GS
94 "Icirc" => "\xCE", # capital I, circumflex accent
95 "icirc" => "\xEE", # small i, circumflex accent
ee89c1da
JH
96 "Igrave" => "\xCC", # capital I, grave accent
97 "igrave" => "\xEC", # small i, grave accent
6055f9d4
GS
98 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
99 "iuml" => "\xEF", # small i, dieresis or umlaut mark
100 "Ntilde" => "\xD1", # capital N, tilde
101 "ntilde" => "\xF1", # small n, tilde
102 "Oacute" => "\xD3", # capital O, acute accent
103 "oacute" => "\xF3", # small o, acute accent
104 "Ocirc" => "\xD4", # capital O, circumflex accent
105 "ocirc" => "\xF4", # small o, circumflex accent
106 "Ograve" => "\xD2", # capital O, grave accent
107 "ograve" => "\xF2", # small o, grave accent
108 "Oslash" => "\xD8", # capital O, slash
109 "oslash" => "\xF8", # small o, slash
110 "Otilde" => "\xD5", # capital O, tilde
111 "otilde" => "\xF5", # small o, tilde
112 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
113 "ouml" => "\xF6", # small o, dieresis or umlaut mark
114 "szlig" => "\xDF", # small sharp s, German (sz ligature)
115 "THORN" => "\xDE", # capital THORN, Icelandic
116 "thorn" => "\xFE", # small thorn, Icelandic
117 "Uacute" => "\xDA", # capital U, acute accent
118 "uacute" => "\xFA", # small u, acute accent
119 "Ucirc" => "\xDB", # capital U, circumflex accent
120 "ucirc" => "\xFB", # small u, circumflex accent
121 "Ugrave" => "\xD9", # capital U, grave accent
122 "ugrave" => "\xF9", # small u, grave accent
123 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
124 "uuml" => "\xFC", # small u, dieresis or umlaut mark
125 "Yacute" => "\xDD", # capital Y, acute accent
126 "yacute" => "\xFD", # small y, acute accent
127 "yuml" => "\xFF", # small y, dieresis or umlaut mark
5cdeb5a2 128
a3e04946
GS
129 "laquo" => "\xAB", # left pointing double angle quotation mark
130 "lchevron" => "\xAB", # synonym (backwards compatibility)
131 "raquo" => "\xBB", # right pointing double angle quotation mark
132 "rchevron" => "\xBB", # synonym (backwards compatibility)
133
134 "iexcl" => "\xA1", # inverted exclamation mark
135 "cent" => "\xA2", # cent sign
136 "pound" => "\xA3", # (UK) pound sign
137 "curren" => "\xA4", # currency sign
138 "yen" => "\xA5", # yen sign
139 "brvbar" => "\xA6", # broken vertical bar
140 "sect" => "\xA7", # section sign
141 "uml" => "\xA8", # diaresis
142 "copy" => "\xA9", # Copyright symbol
143 "ordf" => "\xAA", # feminine ordinal indicator
144 "not" => "\xAC", # not sign
bf202ccd 145 "shy" => '', # soft (discretionary) hyphen
a3e04946
GS
146 "reg" => "\xAE", # registered trademark
147 "macr" => "\xAF", # macron, overline
148 "deg" => "\xB0", # degree sign
149 "plusmn" => "\xB1", # plus-minus sign
150 "sup2" => "\xB2", # superscript 2
151 "sup3" => "\xB3", # superscript 3
152 "acute" => "\xB4", # acute accent
153 "micro" => "\xB5", # micro sign
154 "para" => "\xB6", # pilcrow sign = paragraph sign
155 "middot" => "\xB7", # middle dot = Georgian comma
156 "cedil" => "\xB8", # cedilla
157 "sup1" => "\xB9", # superscript 1
158 "ordm" => "\xBA", # masculine ordinal indicator
159 "frac14" => "\xBC", # vulgar fraction one quarter
160 "frac12" => "\xBD", # vulgar fraction one half
161 "frac34" => "\xBE", # vulgar fraction three quarters
162 "iquest" => "\xBF", # inverted question mark
163 "times" => "\xD7", # multiplication sign
164 "divide" => "\xF7", # division sign
bf202ccd
JH
165
166 "nbsp" => "\x01", # non-breaking space
6055f9d4 167);
69e00e79 168
69e00e79 169
3c014959 170##############################################################################
6055f9d4 171# Initialization
3c014959 172##############################################################################
69e00e79 173
6055f9d4
GS
174# Initialize the object. Must be sure to call our parent initializer.
175sub initialize {
176 my $self = shift;
69e00e79 177
6055f9d4
GS
178 $$self{alt} = 0 unless defined $$self{alt};
179 $$self{indent} = 4 unless defined $$self{indent};
11f72409 180 $$self{margin} = 0 unless defined $$self{margin};
6055f9d4
GS
181 $$self{loose} = 0 unless defined $$self{loose};
182 $$self{sentence} = 0 unless defined $$self{sentence};
183 $$self{width} = 76 unless defined $$self{width};
69e00e79 184
ab1f1d91 185 # Figure out what quotes we'll be using for C<> text.
50a3fd2a 186 $$self{quotes} ||= '"';
ab1f1d91
JH
187 if ($$self{quotes} eq 'none') {
188 $$self{LQUOTE} = $$self{RQUOTE} = '';
189 } elsif (length ($$self{quotes}) == 1) {
190 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
191 } elsif ($$self{quotes} =~ /^(.)(.)$/
192 || $$self{quotes} =~ /^(..)(..)$/) {
193 $$self{LQUOTE} = $1;
194 $$self{RQUOTE} = $2;
195 } else {
196 croak qq(Invalid quote specification "$$self{quotes}");
197 }
198
11f72409
RA
199 # Stack of indentations.
200 $$self{INDENTS} = [];
201
202 # Current left margin.
203 $$self{MARGIN} = $$self{indent} + $$self{margin};
69e00e79 204
6055f9d4 205 $self->SUPER::initialize;
59548eca
JH
206
207 # Tell Pod::Parser that we want the non-POD stuff too if code was set.
208 $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
6055f9d4 209}
69e00e79 210
69e00e79 211
3c014959 212##############################################################################
6055f9d4 213# Core overrides
3c014959 214##############################################################################
6055f9d4
GS
215
216# Called for each command paragraph. Gets the command, the associated
217# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
218# the command to a method named the same as the command. =cut is handled
219# internally by Pod::Parser.
220sub command {
221 my $self = shift;
222 my $command = shift;
223 return if $command eq 'pod';
224 return if ($$self{EXCLUDE} && $command ne 'end');
ab1f1d91
JH
225 if ($self->can ('cmd_' . $command)) {
226 $command = 'cmd_' . $command;
227 $self->$command (@_);
228 } else {
229 my ($text, $line, $paragraph) = @_;
5cdeb5a2
JH
230 my $file;
231 ($file, $line) = $paragraph->file_line;
ab1f1d91
JH
232 $text =~ s/\n+\z//;
233 $text = " $text" if ($text =~ /^\S/);
2da3dd12 234 warn qq($file:$line: Unknown command paragraph: =$command$text\n);
ab1f1d91
JH
235 return;
236 }
6055f9d4 237}
69e00e79 238
3c014959
JH
239# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
240# Pod::Paragraph object. Just output it verbatim, but with tabs converted to
241# spaces.
6055f9d4
GS
242sub verbatim {
243 my $self = shift;
244 return if $$self{EXCLUDE};
245 $self->item if defined $$self{ITEM};
246 local $_ = shift;
247 return if /^\s*$/;
248 s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
249 $self->output ($_);
250}
69e00e79 251
3c014959
JH
252# Called for a regular text block. Gets the paragraph, the line number, and a
253# Pod::Paragraph object. Perform interpolation and output the results.
6055f9d4 254sub textblock {
27f805f4 255 my $self = shift;
6055f9d4 256 return if $$self{EXCLUDE};
27f805f4
GS
257 $self->output ($_[0]), return if $$self{VERBATIM};
258 local $_ = shift;
259 my $line = shift;
6055f9d4 260
bf202ccd 261 # Interpolate and output the paragraph.
6055f9d4
GS
262 $_ = $self->interpolate ($_, $line);
263 s/\s+$/\n/;
264 if (defined $$self{ITEM}) {
265 $self->item ($_ . "\n");
266 } else {
267 $self->output ($self->reformat ($_ . "\n"));
268 }
269}
69e00e79 270
5ec554fb 271# Called for a formatting code. Gets the command, argument, and a
6055f9d4 272# Pod::InteriorSequence object and is expected to return the resulting text.
5ec554fb
JH
273# Calls methods for code, bold, italic, file, and link to handle those types
274# of codes, and handles S<>, E<>, X<>, and Z<> directly.
6055f9d4 275sub interior_sequence {
bf202ccd
JH
276 local $_;
277 my ($self, $command, $seq);
278 ($self, $command, $_, $seq) = @_;
279
280 # We have to defer processing of the inside of an L<> formatting code. If
5ec554fb
JH
281 # this code is nested inside an L<> code, return the literal raw text of
282 # it.
bf202ccd
JH
283 my $parent = $seq->nested;
284 while (defined $parent) {
285 return $seq->raw_text if ($parent->cmd_name eq 'L');
286 $parent = $parent->nested;
287 }
288
289 # Index entries are ignored in plain text.
6055f9d4 290 return '' if ($command eq 'X' || $command eq 'Z');
69e00e79 291
59548eca 292 # Expand escapes into the actual character now, warning if invalid.
6055f9d4 293 if ($command eq 'E') {
2e20e14f
GS
294 if (/^\d+$/) {
295 return chr;
296 } else {
297 return $ESCAPES{$_} if defined $ESCAPES{$_};
59548eca
JH
298 my ($file, $line) = $seq->file_line;
299 warn "$file:$line: Unknown escape: E<$_>\n";
2e20e14f
GS
300 return "E<$_>";
301 }
6055f9d4 302 }
69e00e79 303
5ec554fb 304 # For all the other formatting codes, empty content produces no output.
27f805f4 305 return if $_ eq '';
69e00e79 306
6055f9d4
GS
307 # For S<>, compress all internal whitespace and then map spaces to \01.
308 # When we output the text, we'll map this back.
309 if ($command eq 'S') {
bf202ccd 310 s/\s+/ /g;
6055f9d4
GS
311 tr/ /\01/;
312 return $_;
313 }
69e00e79 314
6055f9d4
GS
315 # Anything else needs to get dispatched to another method.
316 if ($command eq 'B') { return $self->seq_b ($_) }
317 elsif ($command eq 'C') { return $self->seq_c ($_) }
318 elsif ($command eq 'F') { return $self->seq_f ($_) }
319 elsif ($command eq 'I') { return $self->seq_i ($_) }
bf202ccd 320 elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
59548eca 321 else {
59548eca 322 my ($file, $line) = $seq->file_line;
2da3dd12 323 warn "$file:$line: Unknown formatting code: $command<$_>\n";
59548eca 324 }
6055f9d4 325}
f02a87df 326
6055f9d4 327# Called for each paragraph that's actually part of the POD. We take
59548eca
JH
328# advantage of this opportunity to untabify the input. Also, if given the
329# code option, we may see paragraphs that aren't part of the POD and need to
330# output them directly.
6055f9d4
GS
331sub preprocess_paragraph {
332 my $self = shift;
333 local $_ = shift;
334 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
59548eca 335 $self->output_code ($_) if $self->cutting;
6055f9d4
GS
336 $_;
337}
3ec07288 338
69e00e79 339
3c014959 340##############################################################################
6055f9d4 341# Command paragraphs
3c014959 342##############################################################################
f2506fb2 343
6055f9d4 344# All command paragraphs take the paragraph and the line number.
69e00e79 345
6055f9d4
GS
346# First level heading.
347sub cmd_head1 {
b616daaf
JH
348 my ($self, $text, $line) = @_;
349 $self->heading ($text, $line, 0, '====');
6055f9d4 350}
69e00e79 351
6055f9d4
GS
352# Second level heading.
353sub cmd_head2 {
b616daaf
JH
354 my ($self, $text, $line) = @_;
355 $self->heading ($text, $line, $$self{indent} / 2, '== ');
6055f9d4 356}
69e00e79 357
50a3fd2a
RA
358# Third level heading.
359sub cmd_head3 {
b616daaf
JH
360 my ($self, $text, $line) = @_;
361 $self->heading ($text, $line, $$self{indent} * 2 / 3 + 0.5, '= ');
50a3fd2a
RA
362}
363
364# Third level heading.
365sub cmd_head4 {
b616daaf
JH
366 my ($self, $text, $line) = @_;
367 $self->heading ($text, $line, $$self{indent} * 3 / 4 + 0.5, '- ');
50a3fd2a
RA
368}
369
6055f9d4
GS
370# Start a list.
371sub cmd_over {
372 my $self = shift;
373 local $_ = shift;
b616daaf 374 $self->item ("\n\n") if defined $$self{ITEM};
6055f9d4
GS
375 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
376 push (@{ $$self{INDENTS} }, $$self{MARGIN});
377 $$self{MARGIN} += ($_ + 0);
378}
69e00e79 379
6055f9d4
GS
380# End a list.
381sub cmd_back {
59548eca 382 my ($self, $text, $line, $paragraph) = @_;
b616daaf 383 $self->item ("\n\n") if defined $$self{ITEM};
6055f9d4
GS
384 $$self{MARGIN} = pop @{ $$self{INDENTS} };
385 unless (defined $$self{MARGIN}) {
59548eca
JH
386 my $file;
387 ($file, $line) = $paragraph->file_line;
388 warn "$file:$line: Unmatched =back\n";
6055f9d4
GS
389 $$self{MARGIN} = $$self{indent};
390 }
69e00e79 391}
392
6055f9d4
GS
393# An individual list item.
394sub cmd_item {
395 my $self = shift;
396 if (defined $$self{ITEM}) { $self->item }
397 local $_ = shift;
398 s/\s+$//;
b616daaf 399 $$self{ITEM} = $_ ? $self->interpolate ($_) : '*';
6055f9d4 400}
69e00e79 401
27f805f4
GS
402# Begin a block for a particular translator. Setting VERBATIM triggers
403# special handling in textblock().
6055f9d4
GS
404sub cmd_begin {
405 my $self = shift;
406 local $_ = shift;
407 my ($kind) = /^(\S+)/ or return;
27f805f4
GS
408 if ($kind eq 'text') {
409 $$self{VERBATIM} = 1;
410 } else {
411 $$self{EXCLUDE} = 1;
412 }
6055f9d4 413}
f2506fb2 414
6055f9d4 415# End a block for a particular translator. We assume that all =begin/=end
27f805f4 416# pairs are properly closed.
6055f9d4
GS
417sub cmd_end {
418 my $self = shift;
27f805f4
GS
419 $$self{EXCLUDE} = 0;
420 $$self{VERBATIM} = 0;
5cdeb5a2 421}
6055f9d4
GS
422
423# One paragraph for a particular translator. Ignore it unless it's intended
27f805f4 424# for text, in which case we treat it as a verbatim text block.
6055f9d4
GS
425sub cmd_for {
426 my $self = shift;
427 local $_ = shift;
428 my $line = shift;
27f805f4
GS
429 return unless s/^text\b[ \t]*\n?//;
430 $self->verbatim ($_, $line);
6055f9d4 431}
f2506fb2 432
69e00e79 433
3c014959 434##############################################################################
5ec554fb 435# Formatting codes
3c014959 436##############################################################################
69e00e79 437
5ec554fb
JH
438# The simple ones. These are here mostly so that subclasses can override them
439# and do more complicated things.
27f805f4 440sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
27f805f4 441sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
6055f9d4 442sub seq_i { return '*' . $_[1] . '*' }
3c014959
JH
443
444# Apply a whole bunch of messy heuristics to not quote things that don't
445# benefit from being quoted. These originally come from Barrie Slaymaker and
446# largely duplicate code in Pod::Man.
ab1f1d91 447sub seq_c {
3c014959
JH
448 my $self = shift;
449 local $_ = shift;
450
451 # A regex that matches the portion of a variable reference that's the
452 # array or hash index, separated out just because we want to use it in
453 # several places in the following regex.
454 my $index = '(?: \[.*\] | \{.*\} )?';
455
456 # Check for things that we don't want to quote, and if we find any of
457 # them, return the string with just a font change and no quoting.
458 m{
459 ^\s*
460 (?:
461 ( [\'\`\"] ) .* \1 # already quoted
462 | \` .* \' # `quoted'
463 | \$+ [\#^]? \S $index # special ($^Foo, $")
464 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
465 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
f011ec7d 466 | [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number
3c014959
JH
467 | 0x [a-fA-F\d]+ # a hex constant
468 )
469 \s*\z
470 }xo && return $_;
471
472 # If we didn't return, go ahead and quote the text.
473 return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
ab1f1d91 474}
69e00e79 475
bf202ccd
JH
476# Handle links. Since this is plain text, we can't actually make any real
477# links, so this is all to figure out what text we print out. Most of the
478# work is done by Pod::ParseLink.
6055f9d4 479sub seq_l {
bf202ccd
JH
480 my ($self, $link, $seq) = @_;
481 my ($text, $type) = (parselink ($link))[1,4];
482 my ($file, $line) = $seq->file_line;
483 $text = $self->interpolate ($text, $line);
484 $text = '<' . $text . '>' if $type eq 'url';
485 return $text || '';
69e00e79 486}
487
6055f9d4 488
3c014959 489##############################################################################
b616daaf
JH
490# Header handling
491##############################################################################
492
493# The common code for handling all headers. Takes the interpolated header
494# text, the line number, the indentation, and the surrounding marker for the
495# alt formatting method.
496sub heading {
497 my ($self, $text, $line, $indent, $marker) = @_;
498 $self->item ("\n\n") if defined $$self{ITEM};
499 $text =~ s/\s+$//;
500 $text = $self->interpolate ($text, $line);
501 if ($$self{alt}) {
502 my $closemark = reverse (split (//, $marker));
11f72409
RA
503 my $margin = ' ' x $$self{margin};
504 $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
b616daaf
JH
505 } else {
506 $text .= "\n" if $$self{loose};
11f72409
RA
507 my $margin = ' ' x ($$self{margin} + $indent);
508 $self->output ($margin . $text . "\n");
b616daaf
JH
509 }
510}
511
512
513##############################################################################
6055f9d4 514# List handling
3c014959
JH
515##############################################################################
516
517# This method is called whenever an =item command is complete (in other words,
518# we've seen its associated paragraph or know for certain that it doesn't have
519# one). It gets the paragraph associated with the item as an argument. If
520# that argument is empty, just output the item tag; if it contains a newline,
521# output the item tag followed by the newline. Otherwise, see if there's
522# enough room for us to output the item tag in the margin of the text or if we
523# have to put it on a separate line.
6055f9d4
GS
524sub item {
525 my $self = shift;
526 local $_ = shift;
527 my $tag = $$self{ITEM};
528 unless (defined $tag) {
59548eca 529 carp "Item called without tag";
6055f9d4 530 return;
69e00e79 531 }
6055f9d4
GS
532 undef $$self{ITEM};
533 my $indent = $$self{INDENTS}[-1];
534 unless (defined $indent) { $indent = $$self{indent} }
11f72409 535 my $margin = ' ' x $$self{margin};
6055f9d4 536 if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
11f72409 537 my $realindent = $$self{MARGIN};
27f805f4
GS
538 $$self{MARGIN} = $indent;
539 my $output = $self->reformat ($tag);
11f72409 540 $output =~ s/^$margin /$margin:/ if ($$self{alt} && $indent > 0);
27f805f4 541 $output =~ s/\n*$/\n/;
b616daaf
JH
542
543 # If the text is just whitespace, we have an empty item paragraph;
544 # this can result from =over/=item/=back without any intermixed
545 # paragraphs. Insert some whitespace to keep the =item from merging
546 # into the next paragraph.
547 $output .= "\n" if $_ && $_ =~ /^\s*$/;
548
27f805f4 549 $self->output ($output);
11f72409 550 $$self{MARGIN} = $realindent;
b616daaf 551 $self->output ($self->reformat ($_)) if $_ && /\S/;
6055f9d4 552 } else {
11f72409
RA
553 my $space = ' ' x $indent;
554 $space =~ s/^$margin /$margin:/ if $$self{alt};
6055f9d4 555 $_ = $self->reformat ($_);
11f72409 556 s/^$margin /$margin:/ if ($$self{alt} && $indent > 0);
6055f9d4
GS
557 my $tagspace = ' ' x length $tag;
558 s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
559 $self->output ($_);
69e00e79 560 }
561}
562
69e00e79 563
3c014959 564##############################################################################
6055f9d4 565# Output formatting
3c014959 566##############################################################################
69e00e79 567
3c014959
JH
568# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
569# because it plays games with tabs. We can't use formline, even though we'd
570# really like to, because it screws up non-printing characters. So we have to
571# do the wrapping ourselves.
6055f9d4
GS
572sub wrap {
573 my $self = shift;
574 local $_ = shift;
575 my $output = '';
576 my $spaces = ' ' x $$self{MARGIN};
577 my $width = $$self{width} - $$self{MARGIN};
578 while (length > $width) {
579 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
580 $output .= $spaces . $1 . "\n";
581 } else {
582 last;
583 }
69e00e79 584 }
6055f9d4
GS
585 $output .= $spaces . $_;
586 $output =~ s/\s+$/\n\n/;
587 $output;
69e00e79 588}
589
6055f9d4
GS
590# Reformat a paragraph of text for the current margin. Takes the text to
591# reformat and returns the formatted text.
592sub reformat {
593 my $self = shift;
594 local $_ = shift;
69e00e79 595
3c014959
JH
596 # If we're trying to preserve two spaces after sentences, do some munging
597 # to support that. Otherwise, smash all repeated whitespace.
6055f9d4
GS
598 if ($$self{sentence}) {
599 s/ +$//mg;
600 s/\.\n/. \n/g;
601 s/\n/ /g;
602 s/ +/ /g;
69e00e79 603 } else {
6055f9d4 604 s/\s+/ /g;
69e00e79 605 }
6055f9d4 606 $self->wrap ($_);
69e00e79 607}
608
6055f9d4
GS
609# Output text to the output device.
610sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
69e00e79 611
59548eca
JH
612# Output a block of code (something that isn't part of the POD text). Called
613# by preprocess_paragraph only if we were given the code option. Exists here
614# only so that it can be overridden by subclasses.
615sub output_code { $_[0]->output ($_[1]) }
616
69e00e79 617
3c014959 618##############################################################################
27f805f4 619# Backwards compatibility
3c014959 620##############################################################################
27f805f4
GS
621
622# The old Pod::Text module did everything in a pod2text() function. This
623# tries to provide the same interface for legacy applications.
624sub pod2text {
625 my @args;
626
627 # This is really ugly; I hate doing option parsing in the middle of a
628 # module. But the old Pod::Text module supported passing flags to its
629 # entry function, so handle -a and -<number>.
630 while ($_[0] =~ /^-/) {
631 my $flag = shift;
632 if ($flag eq '-a') { push (@args, alt => 1) }
633 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
634 else {
635 unshift (@_, $flag);
636 last;
637 }
638 }
639
640 # Now that we know what arguments we're using, create the parser.
641 my $parser = Pod::Text->new (@args);
642
643 # If two arguments were given, the second argument is going to be a file
3c014959
JH
644 # handle. That means we want to call parse_from_filehandle(), which means
645 # we need to turn the first argument into a file handle. Magic open will
646 # handle the <&STDIN case automagically.
27f805f4 647 if (defined $_[1]) {
ab1f1d91 648 my @fhs = @_;
27f805f4 649 local *IN;
ab1f1d91
JH
650 unless (open (IN, $fhs[0])) {
651 croak ("Can't open $fhs[0] for reading: $!\n");
27f805f4
GS
652 return;
653 }
ab1f1d91
JH
654 $fhs[0] = \*IN;
655 return $parser->parse_from_filehandle (@fhs);
27f805f4
GS
656 } else {
657 return $parser->parse_from_file (@_);
658 }
659}
660
661
3c014959 662##############################################################################
6055f9d4 663# Module return value and documentation
3c014959 664##############################################################################
69e00e79 665
6055f9d4
GS
6661;
667__END__
69e00e79 668
6055f9d4 669=head1 NAME
69e00e79 670
6055f9d4 671Pod::Text - Convert POD data to formatted ASCII text
69e00e79 672
6055f9d4 673=head1 SYNOPSIS
69e00e79 674
6055f9d4
GS
675 use Pod::Text;
676 my $parser = Pod::Text->new (sentence => 0, width => 78);
69e00e79 677
6055f9d4
GS
678 # Read POD from STDIN and write to STDOUT.
679 $parser->parse_from_filehandle;
69e00e79 680
6055f9d4
GS
681 # Read POD from file.pod and write to file.txt.
682 $parser->parse_from_file ('file.pod', 'file.txt');
69e00e79 683
6055f9d4 684=head1 DESCRIPTION
5491a304 685
27f805f4
GS
686Pod::Text is a module that can convert documentation in the POD format (the
687preferred language for documenting Perl) into formatted ASCII. It uses no
688special formatting controls or codes whatsoever, and its output is therefore
689suitable for nearly any device.
69e00e79 690
27f805f4
GS
691As a derived class from Pod::Parser, Pod::Text supports the same methods and
692interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
bf202ccd 693new parser with C<< Pod::Text->new() >> and then calls either
27f805f4 694parse_from_filehandle() or parse_from_file().
6055f9d4 695
27f805f4 696new() can take options, in the form of key/value pairs, that control the
6055f9d4
GS
697behavior of the parser. The currently recognized options are:
698
699=over 4
700
701=item alt
702
703If set to a true value, selects an alternate output format that, among other
704things, uses a different heading style and marks C<=item> entries with a
705colon in the left margin. Defaults to false.
706
59548eca
JH
707=item code
708
709If set to a true value, the non-POD parts of the input file will be included
710in the output. Useful for viewing code documented with POD blocks with the
711POD rendered and the code left intact.
712
6055f9d4
GS
713=item indent
714
715The number of spaces to indent regular text, and the default indentation for
716C<=over> blocks. Defaults to 4.
717
718=item loose
719
720If set to a true value, a blank line is printed after a C<=head1> heading.
721If set to false (the default), no blank line is printed after C<=head1>,
722although one is still printed after C<=head2>. This is the default because
723it's the expected formatting for manual pages; if you're formatting
724arbitrary text documents, setting this to true may result in more pleasing
725output.
726
11f72409
RA
727=item margin
728
729The width of the left margin in spaces. Defaults to 0. This is the margin
730for all text, including headings, not the amount by which regular text is
731indented; for the latter, see the I<indent> option. To set the right
732margin, see the I<width> option.
733
ab1f1d91
JH
734=item quotes
735
736Sets the quote marks used to surround CE<lt>> text. If the value is a
737single character, it is used as both the left and right quote; if it is two
738characters, the first character is used as the left quote and the second as
739the right quoted; and if it is four characters, the first two are used as
740the left quote and the second two as the right quote.
741
742This may also be set to the special value C<none>, in which case no quote
743marks are added around CE<lt>> text.
744
6055f9d4
GS
745=item sentence
746
27f805f4
GS
747If set to a true value, Pod::Text will assume that each sentence ends in two
748spaces, and will try to preserve that spacing. If set to false, all
6055f9d4
GS
749consecutive whitespace in non-verbatim paragraphs is compressed into a
750single space. Defaults to true.
751
752=item width
753
754The column at which to wrap text on the right-hand side. Defaults to 76.
755
756=back
757
27f805f4 758The standard Pod::Parser method parse_from_filehandle() takes up to two
6055f9d4
GS
759arguments, the first being the file handle to read POD from and the second
760being the file handle to write the formatted output to. The first defaults
761to STDIN if not given, and the second defaults to STDOUT. The method
27f805f4
GS
762parse_from_file() is almost identical, except that its two arguments are the
763input and output disk files instead. See L<Pod::Parser> for the specific
764details.
6055f9d4
GS
765
766=head1 DIAGNOSTICS
767
768=over 4
769
27f805f4
GS
770=item Bizarre space in item
771
59548eca
JH
772=item Item called without tag
773
774(W) Something has gone wrong in internal C<=item> processing. These
775messages indicate a bug in Pod::Text; you should never see them.
27f805f4
GS
776
777=item Can't open %s for reading: %s
778
779(F) Pod::Text was invoked via the compatibility mode pod2text() interface
780and the input file it was given could not be opened.
781
ab1f1d91
JH
782=item Invalid quote specification "%s"
783
784(F) The quote specification given (the quotes option to the constructor) was
785invalid. A quote specification must be one, two, or four characters long.
786
2da3dd12 787=item %s:%d: Unknown command paragraph: %s
ab1f1d91
JH
788
789(W) The POD source contained a non-standard command paragraph (something of
790the form C<=command args>) that Pod::Man didn't know about. It was ignored.
791
59548eca 792=item %s:%d: Unknown escape: %s
6055f9d4 793
27f805f4
GS
794(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
795know about.
6055f9d4 796
5ec554fb 797=item %s:%d: Unknown formatting code: %s
6055f9d4 798
5ec554fb 799(W) The POD source contained a non-standard formatting code (something of
27f805f4 800the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
6055f9d4 801
59548eca 802=item %s:%d: Unmatched =back
6055f9d4 803
27f805f4 804(W) Pod::Text encountered a C<=back> command that didn't correspond to an
6055f9d4
GS
805C<=over> command.
806
807=back
808
27f805f4
GS
809=head1 RESTRICTIONS
810
811Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
812output, due to an internal implementation detail.
813
6055f9d4
GS
814=head1 NOTES
815
27f805f4
GS
816This is a replacement for an earlier Pod::Text module written by Tom
817Christiansen. It has a revamped interface, since it now uses Pod::Parser,
818but an interface roughly compatible with the old Pod::Text::pod2text()
819function is still available. Please change to the new calling convention,
820though.
6055f9d4
GS
821
822The original Pod::Text contained code to do formatting via termcap
823sequences, although it wasn't turned on by default and it was problematic to
27f805f4 824get it to work at all. This rewrite doesn't even try to do that, but a
bf202ccd 825subclass of it does. Look for L<Pod::Text::Termcap>.
6055f9d4
GS
826
827=head1 SEE ALSO
828
bf202ccd 829L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
6055f9d4 830
fd20da51
JH
831The current version of this module is always available from its web site at
832L<http://www.eyrie.org/~eagle/software/podlators/>. It is also part of the
833Perl core distribution as of 5.6.0.
834
6055f9d4
GS
835=head1 AUTHOR
836
bf202ccd
JH
837Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
838Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
839Pod::Parser by Brad Appleton <bradapp@enteract.com>.
6055f9d4 840
3c014959
JH
841=head1 COPYRIGHT AND LICENSE
842
f011ec7d 843Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>.
3c014959
JH
844
845This program is free software; you may redistribute it and/or modify it
846under the same terms as Perl itself.
847
6055f9d4 848=cut