This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to podlators 1.22.
[perl5.git] / lib / Pod / Text.pm
CommitLineData
6055f9d4 1# Pod::Text -- Convert POD data to formatted ASCII text.
f011ec7d 2# $Id: Text.pm,v 2.19 2002/06/23 19:16:21 eagle Exp $
6055f9d4 3#
f011ec7d 4# Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>
6055f9d4 5#
3c014959 6# This program is free software; you may redistribute it and/or modify it
6055f9d4
GS
7# under the same terms as Perl itself.
8#
5ec554fb
JH
9# This module converts POD to formatted text. It replaces the old Pod::Text
10# module that came with versions of Perl prior to 5.6.0 and attempts to match
11# its output except for some specific circumstances where other decisions
12# seemed to produce better output. It uses Pod::Parser and is designed to be
13# very easy to subclass.
3c014959
JH
14#
15# Perl core hackers, please note that this module is also separately
16# maintained outside of the Perl core as part of the podlators. Please send
17# me any patches at the address above in addition to sending them to the
18# standard Perl mailing lists.
6055f9d4 19
3c014959 20##############################################################################
6055f9d4 21# Modules and declarations
3c014959 22##############################################################################
69e00e79 23
6055f9d4 24package Pod::Text;
69e00e79 25
6055f9d4
GS
26require 5.004;
27
27f805f4 28use Carp qw(carp croak);
2e20e14f 29use Exporter ();
bf202ccd 30use Pod::ParseLink qw(parselink);
27f805f4 31use Pod::Select ();
6055f9d4
GS
32
33use strict;
2e20e14f 34use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
6055f9d4 35
3c014959
JH
36# We inherit from Pod::Select instead of Pod::Parser so that we can be used by
37# Pod::Usage.
2e20e14f 38@ISA = qw(Pod::Select Exporter);
6055f9d4 39
2e20e14f
GS
40# We have to export pod2text for backward compatibility.
41@EXPORT = qw(pod2text);
42
3c014959
JH
43# Don't use the CVS revision as the version, since this module is also in Perl
44# core and too many things could munge CVS magic revision strings. This
45# number should ideally be the same as the CVS revision in podlators, however.
f011ec7d 46$VERSION = 2.19;
6055f9d4
GS
47
48
3c014959 49##############################################################################
6055f9d4 50# Table of supported E<> escapes
3c014959 51##############################################################################
6055f9d4 52
3c014959
JH
53# This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
54# got it near verbatim from the original Pod::Text. It is therefore credited
55# to Tom Christiansen, and I'm glad I didn't have to write it. :) "iexcl" to
56# "divide" added by Tim Jenness.
6055f9d4
GS
57%ESCAPES = (
58 'amp' => '&', # ampersand
bf202ccd 59 'apos' => "'", # apostrophe
6055f9d4
GS
60 'lt' => '<', # left chevron, less-than
61 'gt' => '>', # right chevron, greater-than
62 'quot' => '"', # double quote
ab1f1d91 63 'sol' => '/', # solidus (forward slash)
be3174d2 64 'verbar' => '|', # vertical bar
5cdeb5a2 65
6055f9d4
GS
66 "Aacute" => "\xC1", # capital A, acute accent
67 "aacute" => "\xE1", # small a, acute accent
68 "Acirc" => "\xC2", # capital A, circumflex accent
69 "acirc" => "\xE2", # small a, circumflex accent
70 "AElig" => "\xC6", # capital AE diphthong (ligature)
71 "aelig" => "\xE6", # small ae diphthong (ligature)
72 "Agrave" => "\xC0", # capital A, grave accent
73 "agrave" => "\xE0", # small a, grave accent
74 "Aring" => "\xC5", # capital A, ring
75 "aring" => "\xE5", # small a, ring
76 "Atilde" => "\xC3", # capital A, tilde
77 "atilde" => "\xE3", # small a, tilde
78 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
79 "auml" => "\xE4", # small a, dieresis or umlaut mark
80 "Ccedil" => "\xC7", # capital C, cedilla
81 "ccedil" => "\xE7", # small c, cedilla
82 "Eacute" => "\xC9", # capital E, acute accent
83 "eacute" => "\xE9", # small e, acute accent
84 "Ecirc" => "\xCA", # capital E, circumflex accent
85 "ecirc" => "\xEA", # small e, circumflex accent
86 "Egrave" => "\xC8", # capital E, grave accent
87 "egrave" => "\xE8", # small e, grave accent
88 "ETH" => "\xD0", # capital Eth, Icelandic
89 "eth" => "\xF0", # small eth, Icelandic
90 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
91 "euml" => "\xEB", # small e, dieresis or umlaut mark
ee89c1da
JH
92 "Iacute" => "\xCD", # capital I, acute accent
93 "iacute" => "\xED", # small i, acute accent
6055f9d4
GS
94 "Icirc" => "\xCE", # capital I, circumflex accent
95 "icirc" => "\xEE", # small i, circumflex accent
ee89c1da
JH
96 "Igrave" => "\xCC", # capital I, grave accent
97 "igrave" => "\xEC", # small i, grave accent
6055f9d4
GS
98 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
99 "iuml" => "\xEF", # small i, dieresis or umlaut mark
100 "Ntilde" => "\xD1", # capital N, tilde
101 "ntilde" => "\xF1", # small n, tilde
102 "Oacute" => "\xD3", # capital O, acute accent
103 "oacute" => "\xF3", # small o, acute accent
104 "Ocirc" => "\xD4", # capital O, circumflex accent
105 "ocirc" => "\xF4", # small o, circumflex accent
106 "Ograve" => "\xD2", # capital O, grave accent
107 "ograve" => "\xF2", # small o, grave accent
108 "Oslash" => "\xD8", # capital O, slash
109 "oslash" => "\xF8", # small o, slash
110 "Otilde" => "\xD5", # capital O, tilde
111 "otilde" => "\xF5", # small o, tilde
112 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
113 "ouml" => "\xF6", # small o, dieresis or umlaut mark
114 "szlig" => "\xDF", # small sharp s, German (sz ligature)
115 "THORN" => "\xDE", # capital THORN, Icelandic
116 "thorn" => "\xFE", # small thorn, Icelandic
117 "Uacute" => "\xDA", # capital U, acute accent
118 "uacute" => "\xFA", # small u, acute accent
119 "Ucirc" => "\xDB", # capital U, circumflex accent
120 "ucirc" => "\xFB", # small u, circumflex accent
121 "Ugrave" => "\xD9", # capital U, grave accent
122 "ugrave" => "\xF9", # small u, grave accent
123 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
124 "uuml" => "\xFC", # small u, dieresis or umlaut mark
125 "Yacute" => "\xDD", # capital Y, acute accent
126 "yacute" => "\xFD", # small y, acute accent
127 "yuml" => "\xFF", # small y, dieresis or umlaut mark
5cdeb5a2 128
a3e04946
GS
129 "laquo" => "\xAB", # left pointing double angle quotation mark
130 "lchevron" => "\xAB", # synonym (backwards compatibility)
131 "raquo" => "\xBB", # right pointing double angle quotation mark
132 "rchevron" => "\xBB", # synonym (backwards compatibility)
133
134 "iexcl" => "\xA1", # inverted exclamation mark
135 "cent" => "\xA2", # cent sign
136 "pound" => "\xA3", # (UK) pound sign
137 "curren" => "\xA4", # currency sign
138 "yen" => "\xA5", # yen sign
139 "brvbar" => "\xA6", # broken vertical bar
140 "sect" => "\xA7", # section sign
141 "uml" => "\xA8", # diaresis
142 "copy" => "\xA9", # Copyright symbol
143 "ordf" => "\xAA", # feminine ordinal indicator
144 "not" => "\xAC", # not sign
bf202ccd 145 "shy" => '', # soft (discretionary) hyphen
a3e04946
GS
146 "reg" => "\xAE", # registered trademark
147 "macr" => "\xAF", # macron, overline
148 "deg" => "\xB0", # degree sign
149 "plusmn" => "\xB1", # plus-minus sign
150 "sup2" => "\xB2", # superscript 2
151 "sup3" => "\xB3", # superscript 3
152 "acute" => "\xB4", # acute accent
153 "micro" => "\xB5", # micro sign
154 "para" => "\xB6", # pilcrow sign = paragraph sign
155 "middot" => "\xB7", # middle dot = Georgian comma
156 "cedil" => "\xB8", # cedilla
157 "sup1" => "\xB9", # superscript 1
158 "ordm" => "\xBA", # masculine ordinal indicator
159 "frac14" => "\xBC", # vulgar fraction one quarter
160 "frac12" => "\xBD", # vulgar fraction one half
161 "frac34" => "\xBE", # vulgar fraction three quarters
162 "iquest" => "\xBF", # inverted question mark
163 "times" => "\xD7", # multiplication sign
164 "divide" => "\xF7", # division sign
bf202ccd
JH
165
166 "nbsp" => "\x01", # non-breaking space
6055f9d4 167);
69e00e79 168
69e00e79 169
3c014959 170##############################################################################
6055f9d4 171# Initialization
3c014959 172##############################################################################
69e00e79 173
6055f9d4
GS
174# Initialize the object. Must be sure to call our parent initializer.
175sub initialize {
176 my $self = shift;
69e00e79 177
6055f9d4
GS
178 $$self{alt} = 0 unless defined $$self{alt};
179 $$self{indent} = 4 unless defined $$self{indent};
180 $$self{loose} = 0 unless defined $$self{loose};
181 $$self{sentence} = 0 unless defined $$self{sentence};
182 $$self{width} = 76 unless defined $$self{width};
69e00e79 183
ab1f1d91 184 # Figure out what quotes we'll be using for C<> text.
50a3fd2a 185 $$self{quotes} ||= '"';
ab1f1d91
JH
186 if ($$self{quotes} eq 'none') {
187 $$self{LQUOTE} = $$self{RQUOTE} = '';
188 } elsif (length ($$self{quotes}) == 1) {
189 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
190 } elsif ($$self{quotes} =~ /^(.)(.)$/
191 || $$self{quotes} =~ /^(..)(..)$/) {
192 $$self{LQUOTE} = $1;
193 $$self{RQUOTE} = $2;
194 } else {
195 croak qq(Invalid quote specification "$$self{quotes}");
196 }
197
6055f9d4
GS
198 $$self{INDENTS} = []; # Stack of indentations.
199 $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
69e00e79 200
6055f9d4 201 $self->SUPER::initialize;
59548eca
JH
202
203 # Tell Pod::Parser that we want the non-POD stuff too if code was set.
204 $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
6055f9d4 205}
69e00e79 206
69e00e79 207
3c014959 208##############################################################################
6055f9d4 209# Core overrides
3c014959 210##############################################################################
6055f9d4
GS
211
212# Called for each command paragraph. Gets the command, the associated
213# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
214# the command to a method named the same as the command. =cut is handled
215# internally by Pod::Parser.
216sub command {
217 my $self = shift;
218 my $command = shift;
219 return if $command eq 'pod';
220 return if ($$self{EXCLUDE} && $command ne 'end');
ab1f1d91
JH
221 if ($self->can ('cmd_' . $command)) {
222 $command = 'cmd_' . $command;
223 $self->$command (@_);
224 } else {
225 my ($text, $line, $paragraph) = @_;
5cdeb5a2
JH
226 my $file;
227 ($file, $line) = $paragraph->file_line;
ab1f1d91
JH
228 $text =~ s/\n+\z//;
229 $text = " $text" if ($text =~ /^\S/);
2da3dd12 230 warn qq($file:$line: Unknown command paragraph: =$command$text\n);
ab1f1d91
JH
231 return;
232 }
6055f9d4 233}
69e00e79 234
3c014959
JH
235# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
236# Pod::Paragraph object. Just output it verbatim, but with tabs converted to
237# spaces.
6055f9d4
GS
238sub verbatim {
239 my $self = shift;
240 return if $$self{EXCLUDE};
241 $self->item if defined $$self{ITEM};
242 local $_ = shift;
243 return if /^\s*$/;
244 s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
245 $self->output ($_);
246}
69e00e79 247
3c014959
JH
248# Called for a regular text block. Gets the paragraph, the line number, and a
249# Pod::Paragraph object. Perform interpolation and output the results.
6055f9d4 250sub textblock {
27f805f4 251 my $self = shift;
6055f9d4 252 return if $$self{EXCLUDE};
27f805f4
GS
253 $self->output ($_[0]), return if $$self{VERBATIM};
254 local $_ = shift;
255 my $line = shift;
6055f9d4 256
bf202ccd 257 # Interpolate and output the paragraph.
6055f9d4
GS
258 $_ = $self->interpolate ($_, $line);
259 s/\s+$/\n/;
260 if (defined $$self{ITEM}) {
261 $self->item ($_ . "\n");
262 } else {
263 $self->output ($self->reformat ($_ . "\n"));
264 }
265}
69e00e79 266
5ec554fb 267# Called for a formatting code. Gets the command, argument, and a
6055f9d4 268# Pod::InteriorSequence object and is expected to return the resulting text.
5ec554fb
JH
269# Calls methods for code, bold, italic, file, and link to handle those types
270# of codes, and handles S<>, E<>, X<>, and Z<> directly.
6055f9d4 271sub interior_sequence {
bf202ccd
JH
272 local $_;
273 my ($self, $command, $seq);
274 ($self, $command, $_, $seq) = @_;
275
276 # We have to defer processing of the inside of an L<> formatting code. If
5ec554fb
JH
277 # this code is nested inside an L<> code, return the literal raw text of
278 # it.
bf202ccd
JH
279 my $parent = $seq->nested;
280 while (defined $parent) {
281 return $seq->raw_text if ($parent->cmd_name eq 'L');
282 $parent = $parent->nested;
283 }
284
285 # Index entries are ignored in plain text.
6055f9d4 286 return '' if ($command eq 'X' || $command eq 'Z');
69e00e79 287
59548eca 288 # Expand escapes into the actual character now, warning if invalid.
6055f9d4 289 if ($command eq 'E') {
2e20e14f
GS
290 if (/^\d+$/) {
291 return chr;
292 } else {
293 return $ESCAPES{$_} if defined $ESCAPES{$_};
59548eca
JH
294 my ($file, $line) = $seq->file_line;
295 warn "$file:$line: Unknown escape: E<$_>\n";
2e20e14f
GS
296 return "E<$_>";
297 }
6055f9d4 298 }
69e00e79 299
5ec554fb 300 # For all the other formatting codes, empty content produces no output.
27f805f4 301 return if $_ eq '';
69e00e79 302
6055f9d4
GS
303 # For S<>, compress all internal whitespace and then map spaces to \01.
304 # When we output the text, we'll map this back.
305 if ($command eq 'S') {
bf202ccd 306 s/\s+/ /g;
6055f9d4
GS
307 tr/ /\01/;
308 return $_;
309 }
69e00e79 310
6055f9d4
GS
311 # Anything else needs to get dispatched to another method.
312 if ($command eq 'B') { return $self->seq_b ($_) }
313 elsif ($command eq 'C') { return $self->seq_c ($_) }
314 elsif ($command eq 'F') { return $self->seq_f ($_) }
315 elsif ($command eq 'I') { return $self->seq_i ($_) }
bf202ccd 316 elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
59548eca 317 else {
59548eca 318 my ($file, $line) = $seq->file_line;
2da3dd12 319 warn "$file:$line: Unknown formatting code: $command<$_>\n";
59548eca 320 }
6055f9d4 321}
f02a87df 322
6055f9d4 323# Called for each paragraph that's actually part of the POD. We take
59548eca
JH
324# advantage of this opportunity to untabify the input. Also, if given the
325# code option, we may see paragraphs that aren't part of the POD and need to
326# output them directly.
6055f9d4
GS
327sub preprocess_paragraph {
328 my $self = shift;
329 local $_ = shift;
330 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
59548eca 331 $self->output_code ($_) if $self->cutting;
6055f9d4
GS
332 $_;
333}
3ec07288 334
69e00e79 335
3c014959 336##############################################################################
6055f9d4 337# Command paragraphs
3c014959 338##############################################################################
f2506fb2 339
6055f9d4 340# All command paragraphs take the paragraph and the line number.
69e00e79 341
6055f9d4
GS
342# First level heading.
343sub cmd_head1 {
b616daaf
JH
344 my ($self, $text, $line) = @_;
345 $self->heading ($text, $line, 0, '====');
6055f9d4 346}
69e00e79 347
6055f9d4
GS
348# Second level heading.
349sub cmd_head2 {
b616daaf
JH
350 my ($self, $text, $line) = @_;
351 $self->heading ($text, $line, $$self{indent} / 2, '== ');
6055f9d4 352}
69e00e79 353
50a3fd2a
RA
354# Third level heading.
355sub cmd_head3 {
b616daaf
JH
356 my ($self, $text, $line) = @_;
357 $self->heading ($text, $line, $$self{indent} * 2 / 3 + 0.5, '= ');
50a3fd2a
RA
358}
359
360# Third level heading.
361sub cmd_head4 {
b616daaf
JH
362 my ($self, $text, $line) = @_;
363 $self->heading ($text, $line, $$self{indent} * 3 / 4 + 0.5, '- ');
50a3fd2a
RA
364}
365
6055f9d4
GS
366# Start a list.
367sub cmd_over {
368 my $self = shift;
369 local $_ = shift;
b616daaf 370 $self->item ("\n\n") if defined $$self{ITEM};
6055f9d4
GS
371 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
372 push (@{ $$self{INDENTS} }, $$self{MARGIN});
373 $$self{MARGIN} += ($_ + 0);
374}
69e00e79 375
6055f9d4
GS
376# End a list.
377sub cmd_back {
59548eca 378 my ($self, $text, $line, $paragraph) = @_;
b616daaf 379 $self->item ("\n\n") if defined $$self{ITEM};
6055f9d4
GS
380 $$self{MARGIN} = pop @{ $$self{INDENTS} };
381 unless (defined $$self{MARGIN}) {
59548eca
JH
382 my $file;
383 ($file, $line) = $paragraph->file_line;
384 warn "$file:$line: Unmatched =back\n";
6055f9d4
GS
385 $$self{MARGIN} = $$self{indent};
386 }
69e00e79 387}
388
6055f9d4
GS
389# An individual list item.
390sub cmd_item {
391 my $self = shift;
392 if (defined $$self{ITEM}) { $self->item }
393 local $_ = shift;
394 s/\s+$//;
b616daaf 395 $$self{ITEM} = $_ ? $self->interpolate ($_) : '*';
6055f9d4 396}
69e00e79 397
27f805f4
GS
398# Begin a block for a particular translator. Setting VERBATIM triggers
399# special handling in textblock().
6055f9d4
GS
400sub cmd_begin {
401 my $self = shift;
402 local $_ = shift;
403 my ($kind) = /^(\S+)/ or return;
27f805f4
GS
404 if ($kind eq 'text') {
405 $$self{VERBATIM} = 1;
406 } else {
407 $$self{EXCLUDE} = 1;
408 }
6055f9d4 409}
f2506fb2 410
6055f9d4 411# End a block for a particular translator. We assume that all =begin/=end
27f805f4 412# pairs are properly closed.
6055f9d4
GS
413sub cmd_end {
414 my $self = shift;
27f805f4
GS
415 $$self{EXCLUDE} = 0;
416 $$self{VERBATIM} = 0;
5cdeb5a2 417}
6055f9d4
GS
418
419# One paragraph for a particular translator. Ignore it unless it's intended
27f805f4 420# for text, in which case we treat it as a verbatim text block.
6055f9d4
GS
421sub cmd_for {
422 my $self = shift;
423 local $_ = shift;
424 my $line = shift;
27f805f4
GS
425 return unless s/^text\b[ \t]*\n?//;
426 $self->verbatim ($_, $line);
6055f9d4 427}
f2506fb2 428
69e00e79 429
3c014959 430##############################################################################
5ec554fb 431# Formatting codes
3c014959 432##############################################################################
69e00e79 433
5ec554fb
JH
434# The simple ones. These are here mostly so that subclasses can override them
435# and do more complicated things.
27f805f4 436sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
27f805f4 437sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
6055f9d4 438sub seq_i { return '*' . $_[1] . '*' }
3c014959
JH
439
440# Apply a whole bunch of messy heuristics to not quote things that don't
441# benefit from being quoted. These originally come from Barrie Slaymaker and
442# largely duplicate code in Pod::Man.
ab1f1d91 443sub seq_c {
3c014959
JH
444 my $self = shift;
445 local $_ = shift;
446
447 # A regex that matches the portion of a variable reference that's the
448 # array or hash index, separated out just because we want to use it in
449 # several places in the following regex.
450 my $index = '(?: \[.*\] | \{.*\} )?';
451
452 # Check for things that we don't want to quote, and if we find any of
453 # them, return the string with just a font change and no quoting.
454 m{
455 ^\s*
456 (?:
457 ( [\'\`\"] ) .* \1 # already quoted
458 | \` .* \' # `quoted'
459 | \$+ [\#^]? \S $index # special ($^Foo, $")
460 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
461 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
f011ec7d 462 | [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number
3c014959
JH
463 | 0x [a-fA-F\d]+ # a hex constant
464 )
465 \s*\z
466 }xo && return $_;
467
468 # If we didn't return, go ahead and quote the text.
469 return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
ab1f1d91 470}
69e00e79 471
bf202ccd
JH
472# Handle links. Since this is plain text, we can't actually make any real
473# links, so this is all to figure out what text we print out. Most of the
474# work is done by Pod::ParseLink.
6055f9d4 475sub seq_l {
bf202ccd
JH
476 my ($self, $link, $seq) = @_;
477 my ($text, $type) = (parselink ($link))[1,4];
478 my ($file, $line) = $seq->file_line;
479 $text = $self->interpolate ($text, $line);
480 $text = '<' . $text . '>' if $type eq 'url';
481 return $text || '';
69e00e79 482}
483
6055f9d4 484
3c014959 485##############################################################################
b616daaf
JH
486# Header handling
487##############################################################################
488
489# The common code for handling all headers. Takes the interpolated header
490# text, the line number, the indentation, and the surrounding marker for the
491# alt formatting method.
492sub heading {
493 my ($self, $text, $line, $indent, $marker) = @_;
494 $self->item ("\n\n") if defined $$self{ITEM};
495 $text =~ s/\s+$//;
496 $text = $self->interpolate ($text, $line);
497 if ($$self{alt}) {
498 my $closemark = reverse (split (//, $marker));
499 $self->output ("\n" . "$marker $text $closemark" . "\n\n");
500 } else {
501 $text .= "\n" if $$self{loose};
502 $self->output (' ' x $indent . $text . "\n");
503 }
504}
505
506
507##############################################################################
6055f9d4 508# List handling
3c014959
JH
509##############################################################################
510
511# This method is called whenever an =item command is complete (in other words,
512# we've seen its associated paragraph or know for certain that it doesn't have
513# one). It gets the paragraph associated with the item as an argument. If
514# that argument is empty, just output the item tag; if it contains a newline,
515# output the item tag followed by the newline. Otherwise, see if there's
516# enough room for us to output the item tag in the margin of the text or if we
517# have to put it on a separate line.
6055f9d4
GS
518sub item {
519 my $self = shift;
520 local $_ = shift;
521 my $tag = $$self{ITEM};
522 unless (defined $tag) {
59548eca 523 carp "Item called without tag";
6055f9d4 524 return;
69e00e79 525 }
6055f9d4
GS
526 undef $$self{ITEM};
527 my $indent = $$self{INDENTS}[-1];
528 unless (defined $indent) { $indent = $$self{indent} }
529 my $space = ' ' x $indent;
530 $space =~ s/^ /:/ if $$self{alt};
531 if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
27f805f4
GS
532 my $margin = $$self{MARGIN};
533 $$self{MARGIN} = $indent;
534 my $output = $self->reformat ($tag);
535 $output =~ s/\n*$/\n/;
b616daaf
JH
536
537 # If the text is just whitespace, we have an empty item paragraph;
538 # this can result from =over/=item/=back without any intermixed
539 # paragraphs. Insert some whitespace to keep the =item from merging
540 # into the next paragraph.
541 $output .= "\n" if $_ && $_ =~ /^\s*$/;
542
27f805f4
GS
543 $self->output ($output);
544 $$self{MARGIN} = $margin;
b616daaf 545 $self->output ($self->reformat ($_)) if $_ && /\S/;
6055f9d4
GS
546 } else {
547 $_ = $self->reformat ($_);
548 s/^ /:/ if ($$self{alt} && $indent > 0);
549 my $tagspace = ' ' x length $tag;
550 s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
551 $self->output ($_);
69e00e79 552 }
553}
554
69e00e79 555
3c014959 556##############################################################################
6055f9d4 557# Output formatting
3c014959 558##############################################################################
69e00e79 559
3c014959
JH
560# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
561# because it plays games with tabs. We can't use formline, even though we'd
562# really like to, because it screws up non-printing characters. So we have to
563# do the wrapping ourselves.
6055f9d4
GS
564sub wrap {
565 my $self = shift;
566 local $_ = shift;
567 my $output = '';
568 my $spaces = ' ' x $$self{MARGIN};
569 my $width = $$self{width} - $$self{MARGIN};
570 while (length > $width) {
571 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
572 $output .= $spaces . $1 . "\n";
573 } else {
574 last;
575 }
69e00e79 576 }
6055f9d4
GS
577 $output .= $spaces . $_;
578 $output =~ s/\s+$/\n\n/;
579 $output;
69e00e79 580}
581
6055f9d4
GS
582# Reformat a paragraph of text for the current margin. Takes the text to
583# reformat and returns the formatted text.
584sub reformat {
585 my $self = shift;
586 local $_ = shift;
69e00e79 587
3c014959
JH
588 # If we're trying to preserve two spaces after sentences, do some munging
589 # to support that. Otherwise, smash all repeated whitespace.
6055f9d4
GS
590 if ($$self{sentence}) {
591 s/ +$//mg;
592 s/\.\n/. \n/g;
593 s/\n/ /g;
594 s/ +/ /g;
69e00e79 595 } else {
6055f9d4 596 s/\s+/ /g;
69e00e79 597 }
6055f9d4 598 $self->wrap ($_);
69e00e79 599}
600
6055f9d4
GS
601# Output text to the output device.
602sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
69e00e79 603
59548eca
JH
604# Output a block of code (something that isn't part of the POD text). Called
605# by preprocess_paragraph only if we were given the code option. Exists here
606# only so that it can be overridden by subclasses.
607sub output_code { $_[0]->output ($_[1]) }
608
69e00e79 609
3c014959 610##############################################################################
27f805f4 611# Backwards compatibility
3c014959 612##############################################################################
27f805f4
GS
613
614# The old Pod::Text module did everything in a pod2text() function. This
615# tries to provide the same interface for legacy applications.
616sub pod2text {
617 my @args;
618
619 # This is really ugly; I hate doing option parsing in the middle of a
620 # module. But the old Pod::Text module supported passing flags to its
621 # entry function, so handle -a and -<number>.
622 while ($_[0] =~ /^-/) {
623 my $flag = shift;
624 if ($flag eq '-a') { push (@args, alt => 1) }
625 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
626 else {
627 unshift (@_, $flag);
628 last;
629 }
630 }
631
632 # Now that we know what arguments we're using, create the parser.
633 my $parser = Pod::Text->new (@args);
634
635 # If two arguments were given, the second argument is going to be a file
3c014959
JH
636 # handle. That means we want to call parse_from_filehandle(), which means
637 # we need to turn the first argument into a file handle. Magic open will
638 # handle the <&STDIN case automagically.
27f805f4 639 if (defined $_[1]) {
ab1f1d91 640 my @fhs = @_;
27f805f4 641 local *IN;
ab1f1d91
JH
642 unless (open (IN, $fhs[0])) {
643 croak ("Can't open $fhs[0] for reading: $!\n");
27f805f4
GS
644 return;
645 }
ab1f1d91
JH
646 $fhs[0] = \*IN;
647 return $parser->parse_from_filehandle (@fhs);
27f805f4
GS
648 } else {
649 return $parser->parse_from_file (@_);
650 }
651}
652
653
3c014959 654##############################################################################
6055f9d4 655# Module return value and documentation
3c014959 656##############################################################################
69e00e79 657
6055f9d4
GS
6581;
659__END__
69e00e79 660
6055f9d4 661=head1 NAME
69e00e79 662
6055f9d4 663Pod::Text - Convert POD data to formatted ASCII text
69e00e79 664
6055f9d4 665=head1 SYNOPSIS
69e00e79 666
6055f9d4
GS
667 use Pod::Text;
668 my $parser = Pod::Text->new (sentence => 0, width => 78);
69e00e79 669
6055f9d4
GS
670 # Read POD from STDIN and write to STDOUT.
671 $parser->parse_from_filehandle;
69e00e79 672
6055f9d4
GS
673 # Read POD from file.pod and write to file.txt.
674 $parser->parse_from_file ('file.pod', 'file.txt');
69e00e79 675
6055f9d4 676=head1 DESCRIPTION
5491a304 677
27f805f4
GS
678Pod::Text is a module that can convert documentation in the POD format (the
679preferred language for documenting Perl) into formatted ASCII. It uses no
680special formatting controls or codes whatsoever, and its output is therefore
681suitable for nearly any device.
69e00e79 682
27f805f4
GS
683As a derived class from Pod::Parser, Pod::Text supports the same methods and
684interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
bf202ccd 685new parser with C<< Pod::Text->new() >> and then calls either
27f805f4 686parse_from_filehandle() or parse_from_file().
6055f9d4 687
27f805f4 688new() can take options, in the form of key/value pairs, that control the
6055f9d4
GS
689behavior of the parser. The currently recognized options are:
690
691=over 4
692
693=item alt
694
695If set to a true value, selects an alternate output format that, among other
696things, uses a different heading style and marks C<=item> entries with a
697colon in the left margin. Defaults to false.
698
59548eca
JH
699=item code
700
701If set to a true value, the non-POD parts of the input file will be included
702in the output. Useful for viewing code documented with POD blocks with the
703POD rendered and the code left intact.
704
6055f9d4
GS
705=item indent
706
707The number of spaces to indent regular text, and the default indentation for
708C<=over> blocks. Defaults to 4.
709
710=item loose
711
712If set to a true value, a blank line is printed after a C<=head1> heading.
713If set to false (the default), no blank line is printed after C<=head1>,
714although one is still printed after C<=head2>. This is the default because
715it's the expected formatting for manual pages; if you're formatting
716arbitrary text documents, setting this to true may result in more pleasing
717output.
718
ab1f1d91
JH
719=item quotes
720
721Sets the quote marks used to surround CE<lt>> text. If the value is a
722single character, it is used as both the left and right quote; if it is two
723characters, the first character is used as the left quote and the second as
724the right quoted; and if it is four characters, the first two are used as
725the left quote and the second two as the right quote.
726
727This may also be set to the special value C<none>, in which case no quote
728marks are added around CE<lt>> text.
729
6055f9d4
GS
730=item sentence
731
27f805f4
GS
732If set to a true value, Pod::Text will assume that each sentence ends in two
733spaces, and will try to preserve that spacing. If set to false, all
6055f9d4
GS
734consecutive whitespace in non-verbatim paragraphs is compressed into a
735single space. Defaults to true.
736
737=item width
738
739The column at which to wrap text on the right-hand side. Defaults to 76.
740
741=back
742
27f805f4 743The standard Pod::Parser method parse_from_filehandle() takes up to two
6055f9d4
GS
744arguments, the first being the file handle to read POD from and the second
745being the file handle to write the formatted output to. The first defaults
746to STDIN if not given, and the second defaults to STDOUT. The method
27f805f4
GS
747parse_from_file() is almost identical, except that its two arguments are the
748input and output disk files instead. See L<Pod::Parser> for the specific
749details.
6055f9d4
GS
750
751=head1 DIAGNOSTICS
752
753=over 4
754
27f805f4
GS
755=item Bizarre space in item
756
59548eca
JH
757=item Item called without tag
758
759(W) Something has gone wrong in internal C<=item> processing. These
760messages indicate a bug in Pod::Text; you should never see them.
27f805f4
GS
761
762=item Can't open %s for reading: %s
763
764(F) Pod::Text was invoked via the compatibility mode pod2text() interface
765and the input file it was given could not be opened.
766
ab1f1d91
JH
767=item Invalid quote specification "%s"
768
769(F) The quote specification given (the quotes option to the constructor) was
770invalid. A quote specification must be one, two, or four characters long.
771
2da3dd12 772=item %s:%d: Unknown command paragraph: %s
ab1f1d91
JH
773
774(W) The POD source contained a non-standard command paragraph (something of
775the form C<=command args>) that Pod::Man didn't know about. It was ignored.
776
59548eca 777=item %s:%d: Unknown escape: %s
6055f9d4 778
27f805f4
GS
779(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
780know about.
6055f9d4 781
5ec554fb 782=item %s:%d: Unknown formatting code: %s
6055f9d4 783
5ec554fb 784(W) The POD source contained a non-standard formatting code (something of
27f805f4 785the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
6055f9d4 786
59548eca 787=item %s:%d: Unmatched =back
6055f9d4 788
27f805f4 789(W) Pod::Text encountered a C<=back> command that didn't correspond to an
6055f9d4
GS
790C<=over> command.
791
792=back
793
27f805f4
GS
794=head1 RESTRICTIONS
795
796Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
797output, due to an internal implementation detail.
798
6055f9d4
GS
799=head1 NOTES
800
27f805f4
GS
801This is a replacement for an earlier Pod::Text module written by Tom
802Christiansen. It has a revamped interface, since it now uses Pod::Parser,
803but an interface roughly compatible with the old Pod::Text::pod2text()
804function is still available. Please change to the new calling convention,
805though.
6055f9d4
GS
806
807The original Pod::Text contained code to do formatting via termcap
808sequences, although it wasn't turned on by default and it was problematic to
27f805f4 809get it to work at all. This rewrite doesn't even try to do that, but a
bf202ccd 810subclass of it does. Look for L<Pod::Text::Termcap>.
6055f9d4
GS
811
812=head1 SEE ALSO
813
bf202ccd 814L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
6055f9d4
GS
815
816=head1 AUTHOR
817
bf202ccd
JH
818Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
819Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
820Pod::Parser by Brad Appleton <bradapp@enteract.com>.
6055f9d4 821
3c014959
JH
822=head1 COPYRIGHT AND LICENSE
823
f011ec7d 824Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>.
3c014959
JH
825
826This program is free software; you may redistribute it and/or modify it
827under the same terms as Perl itself.
828
6055f9d4 829=cut