Commit | Line | Data |
---|---|---|
69e00e79 | 1 | package Pod::Text; |
2 | ||
69e00e79 | 3 | =head1 NAME |
4 | ||
5 | Pod::Text - convert POD data to formatted ASCII text | |
6 | ||
7 | =head1 SYNOPSIS | |
8 | ||
9 | use Pod::Text; | |
10 | ||
11 | pod2text("perlfunc.pod"); | |
12 | ||
13 | Also: | |
14 | ||
f2506fb2 | 15 | pod2text [B<-a>] [B<->I<width>] < input.pod |
69e00e79 | 16 | |
17 | =head1 DESCRIPTION | |
18 | ||
19 | Pod::Text is a module that can convert documentation in the POD format (such | |
20 | as can be found throughout the Perl distribution) into formatted ASCII. | |
21 | Termcap is optionally supported for boldface/underline, and can enabled via | |
22 | C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces | |
23 | will be used to simulate bold and underlined text. | |
24 | ||
25 | A separate F<pod2text> program is included that is primarily a wrapper for | |
26 | Pod::Text. | |
27 | ||
f2506fb2 HF |
28 | The single function C<pod2text()> can take the optional options B<-a> |
29 | for an alternative output format, then a B<->I<width> option with the | |
30 | max terminal width, followed by one or two arguments. The first | |
1fef88e7 | 31 | should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from |
69e00e79 | 32 | STDIN. A second argument, if provided, should be a filehandle glob where |
33 | output should be sent. | |
34 | ||
35 | =head1 AUTHOR | |
36 | ||
1fef88e7 | 37 | Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt> |
69e00e79 | 38 | |
39 | =head1 TODO | |
40 | ||
41 | Cleanup work. The input and output locations need to be more flexible, | |
42 | termcap shouldn't be a global variable, and the terminal speed needs to | |
43 | be properly calculated. | |
44 | ||
45 | =cut | |
46 | ||
47 | use Term::Cap; | |
48 | require Exporter; | |
49 | @ISA = Exporter; | |
50 | @EXPORT = qw(pod2text); | |
51 | ||
f02a87df | 52 | use vars qw($VERSION); |
f2506fb2 | 53 | $VERSION = "1.0203"; |
f02a87df | 54 | |
69e00e79 | 55 | $termcap=0; |
56 | ||
f2506fb2 HF |
57 | $opt_alt_format = 0; |
58 | ||
69e00e79 | 59 | #$use_format=1; |
60 | ||
61 | $UNDL = "\x1b[4m"; | |
62 | $INV = "\x1b[7m"; | |
63 | $BOLD = "\x1b[1m"; | |
64 | $NORM = "\x1b[0m"; | |
65 | ||
66 | sub pod2text { | |
f2506fb2 | 67 | shift if $opt_alt_format = ($_[0] eq '-a'); |
69e00e79 | 68 | |
69 | if($termcap and !$setuptermcap) { | |
70 | $setuptermcap=1; | |
71 | ||
72 | my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 }; | |
73 | $UNDL = $term->{'_us'}; | |
74 | $INV = $term->{'_mr'}; | |
75 | $BOLD = $term->{'_md'}; | |
76 | $NORM = $term->{'_me'}; | |
77 | } | |
78 | ||
79 | $SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1)) | |
69e00e79 | 80 | || $ENV{COLUMNS} |
36477c24 | 81 | || ($ENV{TERMCAP} =~ /co#(\d+)/)[0] |
39e571d4 | 82 | || ($^O ne 'MSWin32' && $^O ne 'dos' && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]) |
69e00e79 | 83 | || 72; |
84 | ||
f2506fb2 HF |
85 | @_ = ("<&STDIN") unless @_; |
86 | local($file,*OUTPUT) = @_; | |
87 | *OUTPUT = *STDOUT if @_<2; | |
88 | ||
89 | local $: = $:; | |
90 | $: = " \n" if $opt_alt_format; # Do not break ``-L/lib/'' into ``- L/lib/''. | |
91 | ||
69e00e79 | 92 | $/ = ""; |
93 | ||
94 | $FANCY = 0; | |
95 | ||
96 | $cutting = 1; | |
97 | $DEF_INDENT = 4; | |
98 | $indent = $DEF_INDENT; | |
99 | $needspace = 0; | |
8c634b6e | 100 | $begun = ""; |
69e00e79 | 101 | |
2e917f57 | 102 | open(IN, $file) || die "Couldn't open $file: $!"; |
69e00e79 | 103 | |
104 | POD_DIRECTIVE: while (<IN>) { | |
105 | if ($cutting) { | |
106 | next unless /^=/; | |
107 | $cutting = 0; | |
108 | } | |
8c634b6e KA |
109 | if ($begun) { |
110 | if (/^=end\s+$begun/) { | |
111 | $begun = ""; | |
112 | } | |
113 | elsif ($begun eq "text") { | |
78ff9ed7 | 114 | print OUTPUT $_; |
8c634b6e KA |
115 | } |
116 | next; | |
117 | } | |
69e00e79 | 118 | 1 while s{^(.*?)(\t+)(.*)$}{ |
119 | $1 | |
120 | . (' ' x (length($2) * 8 - length($1) % 8)) | |
121 | . $3 | |
122 | }me; | |
123 | # Translate verbatim paragraph | |
124 | if (/^\s/) { | |
69e00e79 | 125 | output($_); |
126 | next; | |
127 | } | |
128 | ||
f02a87df | 129 | if (/^=for\s+(\S+)\s*(.*)/s) { |
8c634b6e | 130 | if ($1 eq "text") { |
78ff9ed7 | 131 | print OUTPUT $2,""; |
8c634b6e KA |
132 | } else { |
133 | # ignore unknown for | |
134 | } | |
135 | next; | |
136 | } | |
f02a87df | 137 | elsif (/^=begin\s+(\S+)\s*(.*)/s) { |
8c634b6e KA |
138 | $begun = $1; |
139 | if ($1 eq "text") { | |
78ff9ed7 | 140 | print OUTPUT $2.""; |
8c634b6e KA |
141 | } |
142 | next; | |
143 | } | |
144 | ||
69e00e79 | 145 | sub prepare_for_output { |
146 | ||
147 | s/\s*$/\n/; | |
148 | &init_noremap; | |
149 | ||
150 | # need to hide E<> first; they're processed in clear_noremap | |
151 | s/(E<[^<>]+>)/noremap($1)/ge; | |
152 | $maxnest = 10; | |
153 | while ($maxnest-- && /[A-Z]</) { | |
154 | unless ($FANCY) { | |
f2506fb2 HF |
155 | if ($opt_alt_format) { |
156 | s/[BC]<(.*?)>/``$1''/sg; | |
157 | s/F<(.*?)>/"$1"/sg; | |
158 | } else { | |
159 | s/C<(.*?)>/`$1'/sg; | |
160 | } | |
69e00e79 | 161 | } else { |
55497cff | 162 | s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge; |
69e00e79 | 163 | } |
164 | # s/[IF]<(.*?)>/italic($1)/ge; | |
55497cff | 165 | s/I<(.*?)>/*$1*/sg; |
69e00e79 | 166 | # s/[CB]<(.*?)>/bold($1)/ge; |
55497cff | 167 | s/X<.*?>//sg; |
b74bceb9 AB |
168 | |
169 | # LREF: a la HREF L<show this text|man/section> | |
170 | s:L<([^|>]+)\|[^>]+>:$1:g; | |
171 | ||
69e00e79 | 172 | # LREF: a manpage(3f) |
173 | s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g; | |
174 | # LREF: an =item on another manpage | |
175 | s{ | |
176 | L< | |
177 | ([^/]+) | |
178 | / | |
179 | ( | |
180 | [:\w]+ | |
181 | (\(\))? | |
182 | ) | |
183 | > | |
184 | } {the "$2" entry in the $1 manpage}gx; | |
185 | ||
186 | # LREF: an =item on this manpage | |
187 | s{ | |
188 | ((?: | |
189 | L< | |
190 | / | |
191 | ( | |
192 | [:\w]+ | |
193 | (\(\))? | |
194 | ) | |
195 | > | |
196 | (,?\s+(and\s+)?)? | |
197 | )+) | |
198 | } { internal_lrefs($1) }gex; | |
199 | ||
200 | # LREF: a =head2 (head1?), maybe on a manpage, maybe right here | |
201 | # the "func" can disambiguate | |
202 | s{ | |
203 | L< | |
204 | (?: | |
205 | ([a-zA-Z]\S+?) / | |
206 | )? | |
207 | "?(.*?)"? | |
208 | > | |
209 | }{ | |
210 | do { | |
211 | $1 # if no $1, assume it means on this page. | |
212 | ? "the section on \"$2\" in the $1 manpage" | |
213 | : "the section on \"$2\"" | |
214 | } | |
55497cff | 215 | }sgex; |
69e00e79 | 216 | |
55497cff | 217 | s/[A-Z]<(.*?)>/$1/sg; |
69e00e79 | 218 | } |
219 | clear_noremap(1); | |
220 | } | |
221 | ||
222 | &prepare_for_output; | |
223 | ||
224 | if (s/^=//) { | |
225 | # $needspace = 0; # Assume this. | |
226 | # s/\n/ /g; | |
227 | ($Cmd, $_) = split(' ', $_, 2); | |
228 | # clear_noremap(1); | |
229 | if ($Cmd eq 'cut') { | |
230 | $cutting = 1; | |
231 | } | |
78ff9ed7 RS |
232 | elsif ($Cmd eq 'pod') { |
233 | $cutting = 0; | |
234 | } | |
69e00e79 | 235 | elsif ($Cmd eq 'head1') { |
236 | makespace(); | |
f2506fb2 HF |
237 | if ($opt_alt_format) { |
238 | print OUTPUT "\n"; | |
239 | s/^(.+?)[ \t]*$/==== $1 ====/; | |
240 | } | |
69e00e79 | 241 | print OUTPUT; |
242 | # print OUTPUT uc($_); | |
f2506fb2 | 243 | $needspace = $opt_alt_format; |
69e00e79 | 244 | } |
245 | elsif ($Cmd eq 'head2') { | |
246 | makespace(); | |
247 | # s/(\w+)/\u\L$1/g; | |
248 | #print ' ' x $DEF_INDENT, $_; | |
249 | # print "\xA7"; | |
250 | s/(\w)/\xA7 $1/ if $FANCY; | |
f2506fb2 HF |
251 | if ($opt_alt_format) { |
252 | s/^(.+?)[ \t]*$/== $1 ==/; | |
253 | print OUTPUT "\n", $_; | |
254 | } else { | |
255 | print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n"; | |
256 | } | |
257 | $needspace = $opt_alt_format; | |
69e00e79 | 258 | } |
259 | elsif ($Cmd eq 'over') { | |
260 | push(@indent,$indent); | |
261 | $indent += ($_ + 0) || $DEF_INDENT; | |
262 | } | |
263 | elsif ($Cmd eq 'back') { | |
264 | $indent = pop(@indent); | |
265 | warn "Unmatched =back\n" unless defined $indent; | |
69e00e79 | 266 | } |
267 | elsif ($Cmd eq 'item') { | |
268 | makespace(); | |
269 | # s/\A(\s*)\*/$1\xb7/ if $FANCY; | |
270 | # s/^(\s*\*\s+)/$1 /; | |
271 | { | |
272 | if (length() + 3 < $indent) { | |
273 | my $paratag = $_; | |
2e917f57 | 274 | $_ = <IN>; |
69e00e79 | 275 | if (/^=/) { # tricked! |
276 | local($indent) = $indent[$#index - 1] || $DEF_INDENT; | |
277 | output($paratag); | |
278 | redo POD_DIRECTIVE; | |
279 | } | |
280 | &prepare_for_output; | |
281 | IP_output($paratag, $_); | |
282 | } else { | |
283 | local($indent) = $indent[$#index - 1] || $DEF_INDENT; | |
f2506fb2 | 284 | output($_, 0); |
69e00e79 | 285 | } |
286 | } | |
287 | } | |
288 | else { | |
289 | warn "Unrecognized directive: $Cmd\n"; | |
290 | } | |
291 | } | |
292 | else { | |
293 | # clear_noremap(1); | |
294 | makespace(); | |
295 | output($_, 1); | |
296 | } | |
297 | } | |
298 | ||
299 | close(IN); | |
300 | ||
301 | } | |
302 | ||
303 | ######################################################################### | |
304 | ||
305 | sub makespace { | |
306 | if ($needspace) { | |
307 | print OUTPUT "\n"; | |
308 | $needspace = 0; | |
309 | } | |
310 | } | |
311 | ||
312 | sub bold { | |
313 | my $line = shift; | |
314 | return $line if $use_format; | |
315 | if($termcap) { | |
316 | $line = "$BOLD$line$NORM"; | |
317 | } else { | |
318 | $line =~ s/(.)/$1\b$1/g; | |
319 | } | |
320 | # $line = "$BOLD$line$NORM" if $ansify; | |
321 | return $line; | |
322 | } | |
323 | ||
324 | sub italic { | |
325 | my $line = shift; | |
326 | return $line if $use_format; | |
327 | if($termcap) { | |
328 | $line = "$UNDL$line$NORM"; | |
329 | } else { | |
330 | $line =~ s/(.)/$1\b_/g; | |
331 | } | |
332 | # $line = "$UNDL$line$NORM" if $ansify; | |
333 | return $line; | |
334 | } | |
335 | ||
336 | # Fill a paragraph including underlined and overstricken chars. | |
337 | # It's not perfect for words longer than the margin, and it's probably | |
338 | # slow, but it works. | |
339 | sub fill { | |
340 | local $_ = shift; | |
341 | my $par = ""; | |
342 | my $indent_space = " " x $indent; | |
343 | my $marg = $SCREEN-$indent; | |
344 | my $line = $indent_space; | |
345 | my $line_length; | |
346 | foreach (split) { | |
347 | my $word_length = length; | |
348 | $word_length -= 2 while /\010/g; # Subtract backspaces | |
349 | ||
350 | if ($line_length + $word_length > $marg) { | |
351 | $par .= $line . "\n"; | |
352 | $line= $indent_space . $_; | |
353 | $line_length = $word_length; | |
354 | } | |
355 | else { | |
356 | if ($line_length) { | |
357 | $line_length++; | |
358 | $line .= " "; | |
359 | } | |
360 | $line_length += $word_length; | |
361 | $line .= $_; | |
362 | } | |
363 | } | |
364 | $par .= "$line\n" if $line; | |
365 | $par .= "\n"; | |
366 | return $par; | |
367 | } | |
368 | ||
369 | sub IP_output { | |
370 | local($tag, $_) = @_; | |
371 | local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT; | |
372 | $tag_cols = $SCREEN - $tag_indent; | |
373 | $cols = $SCREEN - $indent; | |
374 | $tag =~ s/\s*$//; | |
375 | s/\s+/ /g; | |
376 | s/^ //; | |
377 | $str = "format OUTPUT = \n" | |
f2506fb2 HF |
378 | . (($opt_alt_format && $tag_indent > 1) |
379 | ? ":" . " " x ($tag_indent - 1) | |
380 | : " " x ($tag_indent)) | |
69e00e79 | 381 | . '@' . ('<' x ($indent - $tag_indent - 1)) |
382 | . "^" . ("<" x ($cols - 1)) . "\n" | |
383 | . '$tag, $_' | |
384 | . "\n~~" | |
385 | . (" " x ($indent-2)) | |
386 | . "^" . ("<" x ($cols - 5)) . "\n" | |
387 | . '$_' . "\n\n.\n1"; | |
388 | #warn $str; warn "tag is $tag, _ is $_"; | |
389 | eval $str || die; | |
390 | write OUTPUT; | |
391 | } | |
392 | ||
393 | sub output { | |
394 | local($_, $reformat) = @_; | |
395 | if ($reformat) { | |
396 | $cols = $SCREEN - $indent; | |
397 | s/\s+/ /g; | |
398 | s/^ //; | |
399 | $str = "format OUTPUT = \n~~" | |
400 | . (" " x ($indent-2)) | |
401 | . "^" . ("<" x ($cols - 5)) . "\n" | |
402 | . '$_' . "\n\n.\n1"; | |
403 | eval $str || die; | |
404 | write OUTPUT; | |
405 | } else { | |
406 | s/^/' ' x $indent/gem; | |
407 | s/^\s+\n$/\n/gm; | |
f2506fb2 | 408 | s/^ /: /s if defined($reformat) && $opt_alt_format; |
69e00e79 | 409 | print OUTPUT; |
410 | } | |
411 | } | |
412 | ||
413 | sub noremap { | |
414 | local($thing_to_hide) = shift; | |
415 | $thing_to_hide =~ tr/\000-\177/\200-\377/; | |
416 | return $thing_to_hide; | |
417 | } | |
418 | ||
419 | sub init_noremap { | |
420 | die "unmatched init" if $mapready++; | |
26fb054b | 421 | #mask off high bit characters in input stream |
422 | s/([\200-\377])/"E<".ord($1).">"/ge; | |
69e00e79 | 423 | } |
424 | ||
425 | sub clear_noremap { | |
426 | my $ready_to_print = $_[0]; | |
427 | die "unmatched clear" unless $mapready--; | |
428 | tr/\200-\377/\000-\177/; | |
429 | # now for the E<>s, which have been hidden until now | |
430 | # otherwise the interative \w<> processing would have | |
431 | # been hosed by the E<gt> | |
432 | s { | |
26fb054b | 433 | E< |
434 | ( | |
435 | ( \d+ ) | |
436 | | ( [A-Za-z]+ ) | |
437 | ) | |
69e00e79 | 438 | > |
439 | } { | |
440 | do { | |
26fb054b | 441 | defined $2 |
442 | ? chr($2) | |
443 | : | |
444 | defined $HTML_Escapes{$3} | |
445 | ? do { $HTML_Escapes{$3} } | |
69e00e79 | 446 | : do { |
f02a87df | 447 | warn "Unknown escape: E<$1> in $_"; |
69e00e79 | 448 | "E<$1>"; |
449 | } | |
450 | } | |
451 | }egx if $ready_to_print; | |
452 | } | |
453 | ||
454 | sub internal_lrefs { | |
455 | local($_) = shift; | |
456 | s{L</([^>]+)>}{$1}g; | |
457 | my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); | |
458 | my $retstr = "the "; | |
459 | my $i; | |
460 | for ($i = 0; $i <= $#items; $i++) { | |
461 | $retstr .= "C<$items[$i]>"; | |
462 | $retstr .= ", " if @items > 2 && $i != $#items; | |
463 | $retstr .= " and " if $i+2 == @items; | |
464 | } | |
465 | ||
466 | $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) | |
467 | . " elsewhere in this document "; | |
468 | ||
469 | return $retstr; | |
470 | ||
471 | } | |
472 | ||
473 | BEGIN { | |
474 | ||
475 | %HTML_Escapes = ( | |
476 | 'amp' => '&', # ampersand | |
477 | 'lt' => '<', # left chevron, less-than | |
478 | 'gt' => '>', # right chevron, greater-than | |
479 | 'quot' => '"', # double quote | |
480 | ||
481 | "Aacute" => "\xC1", # capital A, acute accent | |
482 | "aacute" => "\xE1", # small a, acute accent | |
483 | "Acirc" => "\xC2", # capital A, circumflex accent | |
484 | "acirc" => "\xE2", # small a, circumflex accent | |
485 | "AElig" => "\xC6", # capital AE diphthong (ligature) | |
486 | "aelig" => "\xE6", # small ae diphthong (ligature) | |
487 | "Agrave" => "\xC0", # capital A, grave accent | |
488 | "agrave" => "\xE0", # small a, grave accent | |
489 | "Aring" => "\xC5", # capital A, ring | |
490 | "aring" => "\xE5", # small a, ring | |
491 | "Atilde" => "\xC3", # capital A, tilde | |
492 | "atilde" => "\xE3", # small a, tilde | |
493 | "Auml" => "\xC4", # capital A, dieresis or umlaut mark | |
494 | "auml" => "\xE4", # small a, dieresis or umlaut mark | |
495 | "Ccedil" => "\xC7", # capital C, cedilla | |
496 | "ccedil" => "\xE7", # small c, cedilla | |
497 | "Eacute" => "\xC9", # capital E, acute accent | |
498 | "eacute" => "\xE9", # small e, acute accent | |
499 | "Ecirc" => "\xCA", # capital E, circumflex accent | |
500 | "ecirc" => "\xEA", # small e, circumflex accent | |
501 | "Egrave" => "\xC8", # capital E, grave accent | |
502 | "egrave" => "\xE8", # small e, grave accent | |
503 | "ETH" => "\xD0", # capital Eth, Icelandic | |
504 | "eth" => "\xF0", # small eth, Icelandic | |
505 | "Euml" => "\xCB", # capital E, dieresis or umlaut mark | |
506 | "euml" => "\xEB", # small e, dieresis or umlaut mark | |
507 | "Iacute" => "\xCD", # capital I, acute accent | |
508 | "iacute" => "\xED", # small i, acute accent | |
509 | "Icirc" => "\xCE", # capital I, circumflex accent | |
510 | "icirc" => "\xEE", # small i, circumflex accent | |
511 | "Igrave" => "\xCD", # capital I, grave accent | |
512 | "igrave" => "\xED", # small i, grave accent | |
513 | "Iuml" => "\xCF", # capital I, dieresis or umlaut mark | |
514 | "iuml" => "\xEF", # small i, dieresis or umlaut mark | |
515 | "Ntilde" => "\xD1", # capital N, tilde | |
516 | "ntilde" => "\xF1", # small n, tilde | |
517 | "Oacute" => "\xD3", # capital O, acute accent | |
518 | "oacute" => "\xF3", # small o, acute accent | |
519 | "Ocirc" => "\xD4", # capital O, circumflex accent | |
520 | "ocirc" => "\xF4", # small o, circumflex accent | |
521 | "Ograve" => "\xD2", # capital O, grave accent | |
522 | "ograve" => "\xF2", # small o, grave accent | |
523 | "Oslash" => "\xD8", # capital O, slash | |
524 | "oslash" => "\xF8", # small o, slash | |
525 | "Otilde" => "\xD5", # capital O, tilde | |
526 | "otilde" => "\xF5", # small o, tilde | |
527 | "Ouml" => "\xD6", # capital O, dieresis or umlaut mark | |
528 | "ouml" => "\xF6", # small o, dieresis or umlaut mark | |
529 | "szlig" => "\xDF", # small sharp s, German (sz ligature) | |
530 | "THORN" => "\xDE", # capital THORN, Icelandic | |
531 | "thorn" => "\xFE", # small thorn, Icelandic | |
532 | "Uacute" => "\xDA", # capital U, acute accent | |
533 | "uacute" => "\xFA", # small u, acute accent | |
534 | "Ucirc" => "\xDB", # capital U, circumflex accent | |
535 | "ucirc" => "\xFB", # small u, circumflex accent | |
536 | "Ugrave" => "\xD9", # capital U, grave accent | |
537 | "ugrave" => "\xF9", # small u, grave accent | |
538 | "Uuml" => "\xDC", # capital U, dieresis or umlaut mark | |
539 | "uuml" => "\xFC", # small u, dieresis or umlaut mark | |
540 | "Yacute" => "\xDD", # capital Y, acute accent | |
541 | "yacute" => "\xFD", # small y, acute accent | |
542 | "yuml" => "\xFF", # small y, dieresis or umlaut mark | |
543 | ||
544 | "lchevron" => "\xAB", # left chevron (double less than) | |
545 | "rchevron" => "\xBB", # right chevron (double greater than) | |
546 | ); | |
547 | } | |
548 | ||
549 | 1; |