This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH] 5.004_04 or 5.004_64: Benchmark.pm: add run-for-some-time
[perl5.git] / lib / Pod / Text.pm
CommitLineData
69e00e79 1package Pod::Text;
2
69e00e79 3=head1 NAME
4
5Pod::Text - convert POD data to formatted ASCII text
6
7=head1 SYNOPSIS
8
9 use Pod::Text;
10
11 pod2text("perlfunc.pod");
12
13Also:
14
f2506fb2 15 pod2text [B<-a>] [B<->I<width>] < input.pod
69e00e79 16
17=head1 DESCRIPTION
18
19Pod::Text is a module that can convert documentation in the POD format (such
20as can be found throughout the Perl distribution) into formatted ASCII.
21Termcap is optionally supported for boldface/underline, and can enabled via
22C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
23will be used to simulate bold and underlined text.
24
25A separate F<pod2text> program is included that is primarily a wrapper for
26Pod::Text.
27
f2506fb2
HF
28The single function C<pod2text()> can take the optional options B<-a>
29for an alternative output format, then a B<->I<width> option with the
30max terminal width, followed by one or two arguments. The first
1fef88e7 31should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from
69e00e79 32STDIN. A second argument, if provided, should be a filehandle glob where
33output should be sent.
34
35=head1 AUTHOR
36
1fef88e7 37Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt>
69e00e79 38
39=head1 TODO
40
41Cleanup work. The input and output locations need to be more flexible,
42termcap shouldn't be a global variable, and the terminal speed needs to
43be properly calculated.
44
45=cut
46
47use Term::Cap;
48require Exporter;
49@ISA = Exporter;
50@EXPORT = qw(pod2text);
51
f02a87df 52use vars qw($VERSION);
f2506fb2 53$VERSION = "1.0203";
f02a87df 54
69e00e79 55$termcap=0;
56
f2506fb2
HF
57$opt_alt_format = 0;
58
69e00e79 59#$use_format=1;
60
61$UNDL = "\x1b[4m";
62$INV = "\x1b[7m";
63$BOLD = "\x1b[1m";
64$NORM = "\x1b[0m";
65
66sub pod2text {
f2506fb2 67shift if $opt_alt_format = ($_[0] eq '-a');
69e00e79 68
69if($termcap and !$setuptermcap) {
70 $setuptermcap=1;
71
72 my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
73 $UNDL = $term->{'_us'};
74 $INV = $term->{'_mr'};
75 $BOLD = $term->{'_md'};
76 $NORM = $term->{'_me'};
77}
78
79$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
69e00e79 80 || $ENV{COLUMNS}
36477c24 81 || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
39e571d4 82 || ($^O ne 'MSWin32' && $^O ne 'dos' && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0])
69e00e79 83 || 72;
84
f2506fb2
HF
85@_ = ("<&STDIN") unless @_;
86local($file,*OUTPUT) = @_;
87*OUTPUT = *STDOUT if @_<2;
88
89local $: = $:;
90$: = " \n" if $opt_alt_format; # Do not break ``-L/lib/'' into ``- L/lib/''.
91
69e00e79 92$/ = "";
93
94$FANCY = 0;
95
96$cutting = 1;
97$DEF_INDENT = 4;
98$indent = $DEF_INDENT;
99$needspace = 0;
8c634b6e 100$begun = "";
69e00e79 101
2e917f57 102open(IN, $file) || die "Couldn't open $file: $!";
69e00e79 103
104POD_DIRECTIVE: while (<IN>) {
105 if ($cutting) {
106 next unless /^=/;
107 $cutting = 0;
108 }
8c634b6e
KA
109 if ($begun) {
110 if (/^=end\s+$begun/) {
111 $begun = "";
112 }
113 elsif ($begun eq "text") {
78ff9ed7 114 print OUTPUT $_;
8c634b6e
KA
115 }
116 next;
117 }
69e00e79 118 1 while s{^(.*?)(\t+)(.*)$}{
119 $1
120 . (' ' x (length($2) * 8 - length($1) % 8))
121 . $3
122 }me;
123 # Translate verbatim paragraph
124 if (/^\s/) {
69e00e79 125 output($_);
126 next;
127 }
128
f02a87df 129 if (/^=for\s+(\S+)\s*(.*)/s) {
8c634b6e 130 if ($1 eq "text") {
78ff9ed7 131 print OUTPUT $2,"";
8c634b6e
KA
132 } else {
133 # ignore unknown for
134 }
135 next;
136 }
f02a87df 137 elsif (/^=begin\s+(\S+)\s*(.*)/s) {
8c634b6e
KA
138 $begun = $1;
139 if ($1 eq "text") {
78ff9ed7 140 print OUTPUT $2."";
8c634b6e
KA
141 }
142 next;
143 }
144
69e00e79 145sub prepare_for_output {
146
147 s/\s*$/\n/;
148 &init_noremap;
149
150 # need to hide E<> first; they're processed in clear_noremap
151 s/(E<[^<>]+>)/noremap($1)/ge;
152 $maxnest = 10;
153 while ($maxnest-- && /[A-Z]</) {
154 unless ($FANCY) {
f2506fb2
HF
155 if ($opt_alt_format) {
156 s/[BC]<(.*?)>/``$1''/sg;
157 s/F<(.*?)>/"$1"/sg;
158 } else {
159 s/C<(.*?)>/`$1'/sg;
160 }
69e00e79 161 } else {
55497cff 162 s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge;
69e00e79 163 }
164 # s/[IF]<(.*?)>/italic($1)/ge;
55497cff 165 s/I<(.*?)>/*$1*/sg;
69e00e79 166 # s/[CB]<(.*?)>/bold($1)/ge;
55497cff 167 s/X<.*?>//sg;
b74bceb9
AB
168
169 # LREF: a la HREF L<show this text|man/section>
170 s:L<([^|>]+)\|[^>]+>:$1:g;
171
69e00e79 172 # LREF: a manpage(3f)
173 s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
174 # LREF: an =item on another manpage
175 s{
176 L<
177 ([^/]+)
178 /
179 (
180 [:\w]+
181 (\(\))?
182 )
183 >
184 } {the "$2" entry in the $1 manpage}gx;
185
186 # LREF: an =item on this manpage
187 s{
188 ((?:
189 L<
190 /
191 (
192 [:\w]+
193 (\(\))?
194 )
195 >
196 (,?\s+(and\s+)?)?
197 )+)
198 } { internal_lrefs($1) }gex;
199
200 # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
201 # the "func" can disambiguate
202 s{
203 L<
204 (?:
205 ([a-zA-Z]\S+?) /
206 )?
207 "?(.*?)"?
208 >
209 }{
210 do {
211 $1 # if no $1, assume it means on this page.
212 ? "the section on \"$2\" in the $1 manpage"
213 : "the section on \"$2\""
214 }
55497cff 215 }sgex;
69e00e79 216
55497cff 217 s/[A-Z]<(.*?)>/$1/sg;
69e00e79 218 }
219 clear_noremap(1);
220}
221
222 &prepare_for_output;
223
224 if (s/^=//) {
225 # $needspace = 0; # Assume this.
226 # s/\n/ /g;
227 ($Cmd, $_) = split(' ', $_, 2);
228 # clear_noremap(1);
229 if ($Cmd eq 'cut') {
230 $cutting = 1;
231 }
78ff9ed7
RS
232 elsif ($Cmd eq 'pod') {
233 $cutting = 0;
234 }
69e00e79 235 elsif ($Cmd eq 'head1') {
236 makespace();
f2506fb2
HF
237 if ($opt_alt_format) {
238 print OUTPUT "\n";
239 s/^(.+?)[ \t]*$/==== $1 ====/;
240 }
69e00e79 241 print OUTPUT;
242 # print OUTPUT uc($_);
f2506fb2 243 $needspace = $opt_alt_format;
69e00e79 244 }
245 elsif ($Cmd eq 'head2') {
246 makespace();
247 # s/(\w+)/\u\L$1/g;
248 #print ' ' x $DEF_INDENT, $_;
249 # print "\xA7";
250 s/(\w)/\xA7 $1/ if $FANCY;
f2506fb2
HF
251 if ($opt_alt_format) {
252 s/^(.+?)[ \t]*$/== $1 ==/;
253 print OUTPUT "\n", $_;
254 } else {
255 print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
256 }
257 $needspace = $opt_alt_format;
69e00e79 258 }
259 elsif ($Cmd eq 'over') {
260 push(@indent,$indent);
261 $indent += ($_ + 0) || $DEF_INDENT;
262 }
263 elsif ($Cmd eq 'back') {
264 $indent = pop(@indent);
265 warn "Unmatched =back\n" unless defined $indent;
69e00e79 266 }
267 elsif ($Cmd eq 'item') {
268 makespace();
269 # s/\A(\s*)\*/$1\xb7/ if $FANCY;
270 # s/^(\s*\*\s+)/$1 /;
271 {
272 if (length() + 3 < $indent) {
273 my $paratag = $_;
2e917f57 274 $_ = <IN>;
69e00e79 275 if (/^=/) { # tricked!
276 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
277 output($paratag);
278 redo POD_DIRECTIVE;
279 }
280 &prepare_for_output;
281 IP_output($paratag, $_);
282 } else {
283 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
f2506fb2 284 output($_, 0);
69e00e79 285 }
286 }
287 }
288 else {
289 warn "Unrecognized directive: $Cmd\n";
290 }
291 }
292 else {
293 # clear_noremap(1);
294 makespace();
295 output($_, 1);
296 }
297}
298
299close(IN);
300
301}
302
303#########################################################################
304
305sub makespace {
306 if ($needspace) {
307 print OUTPUT "\n";
308 $needspace = 0;
309 }
310}
311
312sub bold {
313 my $line = shift;
314 return $line if $use_format;
315 if($termcap) {
316 $line = "$BOLD$line$NORM";
317 } else {
318 $line =~ s/(.)/$1\b$1/g;
319 }
320# $line = "$BOLD$line$NORM" if $ansify;
321 return $line;
322}
323
324sub italic {
325 my $line = shift;
326 return $line if $use_format;
327 if($termcap) {
328 $line = "$UNDL$line$NORM";
329 } else {
330 $line =~ s/(.)/$1\b_/g;
331 }
332# $line = "$UNDL$line$NORM" if $ansify;
333 return $line;
334}
335
336# Fill a paragraph including underlined and overstricken chars.
337# It's not perfect for words longer than the margin, and it's probably
338# slow, but it works.
339sub fill {
340 local $_ = shift;
341 my $par = "";
342 my $indent_space = " " x $indent;
343 my $marg = $SCREEN-$indent;
344 my $line = $indent_space;
345 my $line_length;
346 foreach (split) {
347 my $word_length = length;
348 $word_length -= 2 while /\010/g; # Subtract backspaces
349
350 if ($line_length + $word_length > $marg) {
351 $par .= $line . "\n";
352 $line= $indent_space . $_;
353 $line_length = $word_length;
354 }
355 else {
356 if ($line_length) {
357 $line_length++;
358 $line .= " ";
359 }
360 $line_length += $word_length;
361 $line .= $_;
362 }
363 }
364 $par .= "$line\n" if $line;
365 $par .= "\n";
366 return $par;
367}
368
369sub IP_output {
370 local($tag, $_) = @_;
371 local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT;
372 $tag_cols = $SCREEN - $tag_indent;
373 $cols = $SCREEN - $indent;
374 $tag =~ s/\s*$//;
375 s/\s+/ /g;
376 s/^ //;
377 $str = "format OUTPUT = \n"
f2506fb2
HF
378 . (($opt_alt_format && $tag_indent > 1)
379 ? ":" . " " x ($tag_indent - 1)
380 : " " x ($tag_indent))
69e00e79 381 . '@' . ('<' x ($indent - $tag_indent - 1))
382 . "^" . ("<" x ($cols - 1)) . "\n"
383 . '$tag, $_'
384 . "\n~~"
385 . (" " x ($indent-2))
386 . "^" . ("<" x ($cols - 5)) . "\n"
387 . '$_' . "\n\n.\n1";
388 #warn $str; warn "tag is $tag, _ is $_";
389 eval $str || die;
390 write OUTPUT;
391}
392
393sub output {
394 local($_, $reformat) = @_;
395 if ($reformat) {
396 $cols = $SCREEN - $indent;
397 s/\s+/ /g;
398 s/^ //;
399 $str = "format OUTPUT = \n~~"
400 . (" " x ($indent-2))
401 . "^" . ("<" x ($cols - 5)) . "\n"
402 . '$_' . "\n\n.\n1";
403 eval $str || die;
404 write OUTPUT;
405 } else {
406 s/^/' ' x $indent/gem;
407 s/^\s+\n$/\n/gm;
f2506fb2 408 s/^ /: /s if defined($reformat) && $opt_alt_format;
69e00e79 409 print OUTPUT;
410 }
411}
412
413sub noremap {
414 local($thing_to_hide) = shift;
415 $thing_to_hide =~ tr/\000-\177/\200-\377/;
416 return $thing_to_hide;
417}
418
419sub init_noremap {
420 die "unmatched init" if $mapready++;
26fb054b 421 #mask off high bit characters in input stream
422 s/([\200-\377])/"E<".ord($1).">"/ge;
69e00e79 423}
424
425sub clear_noremap {
426 my $ready_to_print = $_[0];
427 die "unmatched clear" unless $mapready--;
428 tr/\200-\377/\000-\177/;
429 # now for the E<>s, which have been hidden until now
430 # otherwise the interative \w<> processing would have
431 # been hosed by the E<gt>
432 s {
26fb054b 433 E<
434 (
435 ( \d+ )
436 | ( [A-Za-z]+ )
437 )
69e00e79 438 >
439 } {
440 do {
26fb054b 441 defined $2
442 ? chr($2)
443 :
444 defined $HTML_Escapes{$3}
445 ? do { $HTML_Escapes{$3} }
69e00e79 446 : do {
f02a87df 447 warn "Unknown escape: E<$1> in $_";
69e00e79 448 "E<$1>";
449 }
450 }
451 }egx if $ready_to_print;
452}
453
454sub internal_lrefs {
455 local($_) = shift;
456 s{L</([^>]+)>}{$1}g;
457 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
458 my $retstr = "the ";
459 my $i;
460 for ($i = 0; $i <= $#items; $i++) {
461 $retstr .= "C<$items[$i]>";
462 $retstr .= ", " if @items > 2 && $i != $#items;
463 $retstr .= " and " if $i+2 == @items;
464 }
465
466 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
467 . " elsewhere in this document ";
468
469 return $retstr;
470
471}
472
473BEGIN {
474
475%HTML_Escapes = (
476 'amp' => '&', # ampersand
477 'lt' => '<', # left chevron, less-than
478 'gt' => '>', # right chevron, greater-than
479 'quot' => '"', # double quote
480
481 "Aacute" => "\xC1", # capital A, acute accent
482 "aacute" => "\xE1", # small a, acute accent
483 "Acirc" => "\xC2", # capital A, circumflex accent
484 "acirc" => "\xE2", # small a, circumflex accent
485 "AElig" => "\xC6", # capital AE diphthong (ligature)
486 "aelig" => "\xE6", # small ae diphthong (ligature)
487 "Agrave" => "\xC0", # capital A, grave accent
488 "agrave" => "\xE0", # small a, grave accent
489 "Aring" => "\xC5", # capital A, ring
490 "aring" => "\xE5", # small a, ring
491 "Atilde" => "\xC3", # capital A, tilde
492 "atilde" => "\xE3", # small a, tilde
493 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
494 "auml" => "\xE4", # small a, dieresis or umlaut mark
495 "Ccedil" => "\xC7", # capital C, cedilla
496 "ccedil" => "\xE7", # small c, cedilla
497 "Eacute" => "\xC9", # capital E, acute accent
498 "eacute" => "\xE9", # small e, acute accent
499 "Ecirc" => "\xCA", # capital E, circumflex accent
500 "ecirc" => "\xEA", # small e, circumflex accent
501 "Egrave" => "\xC8", # capital E, grave accent
502 "egrave" => "\xE8", # small e, grave accent
503 "ETH" => "\xD0", # capital Eth, Icelandic
504 "eth" => "\xF0", # small eth, Icelandic
505 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
506 "euml" => "\xEB", # small e, dieresis or umlaut mark
507 "Iacute" => "\xCD", # capital I, acute accent
508 "iacute" => "\xED", # small i, acute accent
509 "Icirc" => "\xCE", # capital I, circumflex accent
510 "icirc" => "\xEE", # small i, circumflex accent
511 "Igrave" => "\xCD", # capital I, grave accent
512 "igrave" => "\xED", # small i, grave accent
513 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
514 "iuml" => "\xEF", # small i, dieresis or umlaut mark
515 "Ntilde" => "\xD1", # capital N, tilde
516 "ntilde" => "\xF1", # small n, tilde
517 "Oacute" => "\xD3", # capital O, acute accent
518 "oacute" => "\xF3", # small o, acute accent
519 "Ocirc" => "\xD4", # capital O, circumflex accent
520 "ocirc" => "\xF4", # small o, circumflex accent
521 "Ograve" => "\xD2", # capital O, grave accent
522 "ograve" => "\xF2", # small o, grave accent
523 "Oslash" => "\xD8", # capital O, slash
524 "oslash" => "\xF8", # small o, slash
525 "Otilde" => "\xD5", # capital O, tilde
526 "otilde" => "\xF5", # small o, tilde
527 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
528 "ouml" => "\xF6", # small o, dieresis or umlaut mark
529 "szlig" => "\xDF", # small sharp s, German (sz ligature)
530 "THORN" => "\xDE", # capital THORN, Icelandic
531 "thorn" => "\xFE", # small thorn, Icelandic
532 "Uacute" => "\xDA", # capital U, acute accent
533 "uacute" => "\xFA", # small u, acute accent
534 "Ucirc" => "\xDB", # capital U, circumflex accent
535 "ucirc" => "\xFB", # small u, circumflex accent
536 "Ugrave" => "\xD9", # capital U, grave accent
537 "ugrave" => "\xF9", # small u, grave accent
538 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
539 "uuml" => "\xFC", # small u, dieresis or umlaut mark
540 "Yacute" => "\xDD", # capital Y, acute accent
541 "yacute" => "\xFD", # small y, acute accent
542 "yuml" => "\xFF", # small y, dieresis or umlaut mark
543
544 "lchevron" => "\xAB", # left chevron (double less than)
545 "rchevron" => "\xBB", # right chevron (double greater than)
546);
547}
548
5491;