This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Handle characters with high bit set
[perl5.git] / pod / pod2man.PL
CommitLineData
4633a7c4
LW
1#!/usr/local/bin/perl
2
3use Config;
4use File::Basename qw(&basename &dirname);
5
6# List explicitly here the variables you want Configure to
7# generate. Metaconfig only looks for shell variables, so you
8# have to mention them as if they were shell variables, not
9# %Config entries. Thus you write
10# $startperl
11# to ensure Configure will look for $Config{startperl}.
12
13# This forces PL files to create target in same directory as PL file.
14# This is so that make depend always knows where to find PL derivatives.
15chdir(dirname($0));
16($file = basename($0)) =~ s/\.PL$//;
17$file =~ s/\.pl$//
f360dba1 18 if ($^O eq 'VMS' or $^O eq 'os2'); # "case-forgiving"
4633a7c4
LW
19
20open OUT,">$file" or die "Can't create $file: $!";
21
22print "Extracting $file (with variable substitutions)\n";
23
24# In this section, perl variables will be expanded during extraction.
25# You can use $Config{...} to use Configure variables.
26
27print OUT <<"!GROK!THIS!";
28$Config{'startperl'}
5d94fbed
AD
29!GROK!THIS!
30
4633a7c4
LW
31# In the following, perl variables are not expanded during extraction.
32
33print OUT <<'!NO!SUBS!';
cb1a09d0
AD
34eval 'exec perl -S $0 "$@"'
35 if 0;
36
37=head1 NAME
38
39pod2man - translate embedded Perl pod directives into man pages
40
41=head1 SYNOPSIS
42
43B<pod2man>
44[ B<--section=>I<manext> ]
45[ B<--release=>I<relpatch> ]
46[ B<--center=>I<string> ]
47[ B<--date=>I<string> ]
48[ B<--fixed=>I<font> ]
49[ B<--official> ]
50I<inputfile>
51
52=head1 DESCRIPTION
53
54B<pod2man> converts its input file containing embedded pod directives (see
55L<perlpod>) into nroff source suitable for viewing with nroff(1) or
56troff(1) using the man(7) macro set.
57
58Besides the obvious pod conversions, B<pod2man> also takes care of
59func(), func(n), and simple variable references like $foo or @bar so
60you don't have to use code escapes for them; complex expressions like
61C<$fred{'stuff'}> will still need to be escaped, though. Other nagging
62little roffish things that it catches include translating the minus in
63something like foo-bar, making a long dash--like this--into a real em
64dash, fixing up "paired quotes", putting a little space after the
65parens in something like func(), making C++ and PI look right, making
66double underbars have a little tiny space between them, making ALLCAPS
67a teeny bit smaller in troff(1), and escaping backslashes so you don't
68have to.
69
70=head1 OPTIONS
71
72=over 8
73
74=item center
75
76Set the centered header to a specific string. The default is
77"User Contributed Perl Documentation", unless the C<--official> flag is
78given, in which case the default is "Perl Programmers Reference Guide".
79
80=item date
81
82Set the left-hand footer string to this value. By default,
83the modification date of the input file will be used.
84
85=item fixed
86
87The fixed font to use for code refs. Defaults to CW.
88
89=item official
90
91Set the default header to indicate that this page is of
92the standard release in case C<--center> is not given.
93
94=item release
95
96Set the centered footer. By default, this is the current
97perl release.
98
99=item section
100
101Set the section for the C<.TH> macro. The standard conventions on
102sections are to use 1 for user commands, 2 for system calls, 3 for
103functions, 4 for devices, 5 for file formats, 6 for games, 7 for
104miscellaneous information, and 8 for administrator commands. This works
105best if you put your Perl man pages in a separate tree, like
106F</usr/local/perl/man/>. By default, section 1 will be used
107unless the file ends in F<.pm> in which case section 3 will be selected.
108
109=back
110
111=head1 Anatomy of a Proper Man Page
112
113For those not sure of the proper layout of a man page, here's
114an example of the skeleton of a proper man page. Head of the
115major headers should be setout as a C<=head1> directive, and
116are historically written in the rather startling ALL UPPER CASE
117format, although this is not mandatory.
118Minor headers may be included using C<=head2>, and are
119typically in mixed case.
120
121=over 10
122
123=item NAME
124
125Mandatory section; should be a comma-separated list of programs or
126functions documented by this podpage, such as:
127
128 foo, bar - programs to do something
129
130=item SYNOPSIS
131
132A short usage summary for programs and functions, which
133may someday be deemed mandatory.
134
135=item DESCRIPTION
136
137Long drawn out discussion of the program. It's a good idea to break this
138up into subsections using the C<=head2> directives, like
139
140 =head2 A Sample Subection
141
142 =head2 Yet Another Sample Subection
143
144=item OPTIONS
145
146Some people make this separate from the description.
147
148=item RETURN VALUE
149
150What the program or function returns if successful.
151
152=item ERRORS
153
154Exceptions, return codes, exit stati, and errno settings.
155
156=item EXAMPLES
157
158Give some example uses of the program.
159
160=item ENVIRONMENT
161
162Envariables this program might care about.
163
164=item FILES
165
166All files used by the program. You should probably use the FE<lt>E<gt>
167for these.
168
169=item SEE ALSO
170
171Other man pages to check out, like man(1), man(7), makewhatis(8), or catman(8).
172
173=item NOTES
174
175Miscellaneous commentary.
176
177=item CAVEATS
178
179Things to take special care with; sometimes called WARNINGS.
180
181=item DIAGNOSTICS
182
183All possible messages the program can print out--and
184what they mean.
185
186=item BUGS
187
188Things that are broken or just don't work quite right.
189
190=item RESTRICTIONS
191
192Bugs you don't plan to fix :-)
193
194=item AUTHOR
195
196Who wrote it (or AUTHORS if multiple).
197
198=item HISTORY
199
200Programs derived from other sources sometimes have this, or
201you might keep a modification long here.
202
203=back
204
205=head1 EXAMPLES
206
207 pod2man program > program.1
208 pod2man some_module.pm > /usr/perl/man/man3/some_module.3
209 pod2man --section=7 note.pod > note.7
210
211=head1 DIAGNOSTICS
212
213The following diagnostics are generated by B<pod2man>. Items
214marked "(W)" are non-fatal, whereas the "(F)" errors will cause
215B<pod2man> to immediately exit with a non-zero status.
216
217=over 4
218
219=item bad option in paragraph %d of %s: ``%s'' should be [%s]<%s>
220
221(W) If you start include an option, you should set it off
222as bold, italic, or code.
223
224=item can't open %s: %s
225
226(F) The input file wasn't available for the given reason.
227
228=item high bit char in input stream
229
230(W) You can't use high-bit characters in the input stream,
231because the translator uses them for its own nefarious purposes.
232Use an HTML entity in angle brackets instead.
233
234=item Improper man page - no dash in NAME header in paragraph %d of %s
235
236(W) The NAME header did not have an isolated dash in it. This is
237considered important.
238
239=item Invalid man page - no NAME line in %s
240
241(F) You did not include a NAME header, which is essential.
242
243=item roff font should be 1 or 2 chars, not `%s' (F)
244
245(F) The font specified with the C<--fixed> option was not
246a one- or two-digit roff font.
247
248=item %s is missing required section: %s
249
250(W) Required sections include NAME, DESCRIPTION, and if you're
251using a section starting with a 3, also a SYNOPSIS. Actually,
252not having a NAME is a fatal.
253
254=item Unknown escape: %s in %s
255
256(W) An unknown HTML entity (probably for an 8-bit character) was given via
257a C<E<lt>E<gt>> directive. Besides amp, lt, gt, and quot, recognized
258entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave,
259Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute,
260Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc,
261icirc, Igrave, igrave, Iuml, iuml, Ntilde, ntilde, Oacute, oacute, Ocirc,
262ocirc, Ograve, ograve, Oslash, oslash, Otilde, otilde, Ouml, ouml, szlig,
263THORN, thorn, Uacute, uacute, Ucirc, ucirc, Ugrave, ugrave, Uuml, uuml,
264Yacute, yacute, and yuml.
265
266=item Unmatched =back
267
268(W) You have a C<=back> without a corresponding C<=over>.
269
270=item Unrecognized pod directive: %s
271
272(W) You specified a pod directive that isn't in the known list of
273C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>.
274
275
276=back
277
278=head1 NOTES
279
280If you would like to print out a lot of man page continuously, you
281probably want to set the C and D registers to set contiguous page
282numbering and even/odd paging, at least one some versions of man(7).
283Settting the F register will get you some additional experimental
284indexing:
285
286 troff -man -rC1 -rD1 -rF1 perl.1 perldata.1 perlsyn.1 ...
287
288The indexing merely outputs messages via C<.tm> for each
289major page, section, subsection, item, and any C<XE<lt>E<gt>>
290directives.
291
292
293=head1 RESTRICTIONS
294
295You shouldn't use 8-bit characters in the input stream, as these
296will be used by the translator.
297
298=head1 BUGS
299
300The =over and =back directives don't really work right. They
301take absolute positions instead of offsets, don't nest well, and
302making people count is suboptimal in any event.
303
304=head1 AUTHORS
305
306Original prototype by Larry Wall, but so massively hacked over by
307Tom Christiansen such that Larry probably doesn't recognize it anymore.
308
309=cut
a0d0e21e
LW
310
311$/ = "";
312$cutting = 1;
313
cb1a09d0
AD
314($version,$patch) = `\PATH=.:..:\$PATH; perl -v` =~ /version (\d\.\d{3}(?: +)(?:\S+)?)(?:.*patchlevel (\d\S*))?/s;
315$DEF_RELEASE = "perl $version";
316$DEF_RELEASE .= ", patch $patch" if $patch;
317
318
319sub makedate {
320 my $secs = shift;
321 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($secs);
322 my $mname = (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec})[$mon];
323 return "$mday/$mname/$year";
a0d0e21e
LW
324}
325
cb1a09d0
AD
326use Getopt::Long;
327
328$DEF_SECTION = 1;
329$DEF_CENTER = "User Contributed Perl Documentation";
330$STD_CENTER = "Perl Programmers Reference Guide";
331$DEF_FIXED = 'CW';
332
333sub usage {
334 warn "$0: @_\n" if @_;
335 die <<EOF;
336usage: $0 [options] podpage
337Options are:
338 --section=manext (default "$DEF_SECTION")
339 --release=relpatch (default "$DEF_RELEASE")
340 --center=string (default "$DEF_CENTER")
341 --date=string (default "$DEF_DATE")
342 --fixed=font (default "$DEF_FIXED")
343 --official (default NOT)
344EOF
345}
346
347$uok = GetOptions( qw(
348 section=s
349 release=s
350 center=s
351 date=s
352 fixed=s
353 official
354 help));
355
356$DEF_DATE = makedate((stat($ARGV[0]))[9] || time());
357
358usage("Usage error!") unless $uok;
359usage() if $opt_help;
360usage("Need one and only one podpage argument") unless @ARGV == 1;
361
362$section = $opt_section || ($ARGV[0] =~ /\.pm$/ ? 3 : $DEF_SECTION);
363$RP = $opt_release || $DEF_RELEASE;
364$center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER);
365
366$CFont = $opt_fixed || $DEF_FIXED;
367
a0d0e21e
LW
368if (length($CFont) == 2) {
369 $CFont_embed = "\\f($CFont";
cb1a09d0 370}
a0d0e21e
LW
371elsif (length($CFont) == 1) {
372 $CFont_embed = "\\f$CFont";
cb1a09d0 373}
a0d0e21e 374else {
cb1a09d0
AD
375 die "roff font should be 1 or 2 chars, not `$CFont_embed'";
376}
377
378$section = $opt_section || $DEF_SECTION;
379$date = $opt_date || $DEF_DATE;
380
381for (qw{NAME DESCRIPTION}) {
382# for (qw{NAME DESCRIPTION AUTHOR}) {
383 $wanna_see{$_}++;
384}
385$wanna_see{SYNOPSIS}++ if $section =~ /^3/;
386
a0d0e21e 387
cb1a09d0
AD
388$name = @ARGV ? $ARGV[0] : "<STDIN>";
389$Filename = $name;
390$name = uc($name) if $section =~ /^1/;
9430eed9 391$name =~ s/\.[^.]*$//;
a0d0e21e 392
cb1a09d0
AD
393if ($name ne 'something') {
394 FCHECK: {
395 open(F, "< $ARGV[0]") || die "can't open $ARGV[0]: $!";
396 while (<F>) {
f360dba1 397 next unless /^=\b/;
cb1a09d0
AD
398 if (/^=head1\s+NAME\s*$/) { # an /m would forgive mistakes
399 $_ = <F>;
400 unless (/\s*-+\s+/) {
401 $oops++;
f360dba1 402 warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n"
cb1a09d0
AD
403 }
404 %namedesc = split /\s+-\s+/;
405 last FCHECK;
406 }
f360dba1
PP
407 next if /^=cut\b/; # DB_File and Net::Ping have =cut before NAME
408 die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n";
cb1a09d0 409 }
f360dba1 410 die "$0: Invalid man page - no documentation in $ARGV[0]\n";
cb1a09d0
AD
411 }
412 close F;
413}
414
a0d0e21e
LW
415print <<"END";
416.rn '' }`
417''' \$RCSfile\$\$Revision\$\$Date\$
cb1a09d0 418'''
a0d0e21e 419''' \$Log\$
cb1a09d0 420'''
a0d0e21e
LW
421.de Sh
422.br
423.if t .Sp
424.ne 5
425.PP
426\\fB\\\\\$1\\fR
427.PP
428..
429.de Sp
430.if t .sp .5v
431.if n .sp
432..
433.de Ip
434.br
435.ie \\\\n(.\$>=3 .ne \\\\\$3
436.el .ne 3
437.IP "\\\\\$1" \\\\\$2
438..
439.de Vb
440.ft $CFont
441.nf
442.ne \\\\\$1
443..
444.de Ve
445.ft R
446
447.fi
448..
449'''
450'''
451''' Set up \\*(-- to give an unbreakable dash;
452''' string Tr holds user defined translation string.
453''' Bell System Logo is used as a dummy character.
454'''
455.tr \\(*W-|\\(bv\\*(Tr
456.ie n \\{\\
457.ds -- \\(*W-
cb1a09d0 458.ds PI pi
a0d0e21e
LW
459.if (\\n(.H=4u)&(1m=24u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-12u'-\\" diablo 10 pitch
460.if (\\n(.H=4u)&(1m=20u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-8u'-\\" diablo 12 pitch
461.ds L" ""
462.ds R" ""
463.ds L' '
464.ds R' '
465'br\\}
466.el\\{\\
467.ds -- \\(em\\|
468.tr \\*(Tr
469.ds L" ``
470.ds R" ''
471.ds L' `
472.ds R' '
cb1a09d0 473.ds PI \\(*p
a0d0e21e 474'br\\}
cb1a09d0
AD
475END
476
477print <<'END';
478.\" If the F register is turned on, we'll generate
479.\" index entries out stderr for the following things:
480.\" TH Title
481.\" SH Header
482.\" Sh Subsection
483.\" Ip Item
484.\" X<> Xref (embedded
485.\" Of course, you have to process the output yourself
486.\" in some meaninful fashion.
487.if \nF \{
488.de IX
489.tm Index:\\$1\t\\n%\t"\\$2"
490..
491.nr % 0
492.rr F
493.\}
494END
495
496print <<"END";
497.TH $name $section "$RP" "$date" "$center"
498.IX Title "$name $section"
a0d0e21e
LW
499.UC
500END
501
cb1a09d0
AD
502while (($name, $desc) = each %namedesc) {
503 for ($name, $desc) { s/^\s+//; s/\s+$//; }
504 print qq(.IX Name "$name - $desc"\n);
505}
506
a0d0e21e 507print <<'END';
cb1a09d0 508.if n .hy 0
a0d0e21e
LW
509.if n .na
510.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
511.de CQ \" put $1 in typewriter font
512END
513print ".ft $CFont\n";
514print <<'END';
515'if n "\c
748a9306
LW
516'if t \\&\\$1\c
517'if n \\&\\$1\c
a0d0e21e 518'if n \&"
748a9306 519\\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
a0d0e21e
LW
520'.ft R
521..
522.\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
523. \" AM - accent mark definitions
9430eed9 524.bd B 3
a0d0e21e
LW
525. \" fudge factors for nroff and troff
526.if n \{\
527. ds #H 0
528. ds #V .8m
529. ds #F .3m
530. ds #[ \f1
531. ds #] \fP
532.\}
533.if t \{\
748a9306 534. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
a0d0e21e
LW
535. ds #V .6m
536. ds #F 0
537. ds #[ \&
538. ds #] \&
539.\}
540. \" simple accents for nroff and troff
541.if n \{\
542. ds ' \&
543. ds ` \&
544. ds ^ \&
545. ds , \&
546. ds ~ ~
547. ds ? ?
548. ds ! !
cb1a09d0
AD
549. ds /
550. ds q
a0d0e21e
LW
551.\}
552.if t \{\
748a9306
LW
553. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
554. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
555. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
556. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
557. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
a0d0e21e
LW
558. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
559. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
748a9306 560. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
a0d0e21e
LW
561. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
562.\}
563. \" troff and (daisy-wheel) nroff accents
748a9306 564.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
a0d0e21e 565.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
748a9306
LW
566.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
567.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
568.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
a0d0e21e 569.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
748a9306 570.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
a0d0e21e 571.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
748a9306 572.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
a0d0e21e
LW
573.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
574.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
575.ds ae a\h'-(\w'a'u*4/10)'e
576.ds Ae A\h'-(\w'A'u*4/10)'E
577.ds oe o\h'-(\w'o'u*4/10)'e
578.ds Oe O\h'-(\w'O'u*4/10)'E
579. \" corrections for vroff
748a9306
LW
580.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
581.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
a0d0e21e
LW
582. \" for low resolution devices (crt and lpr)
583.if \n(.H>23 .if \n(.V>19 \
584\{\
585. ds : e
586. ds 8 ss
587. ds v \h'-1'\o'\(aa\(ga'
588. ds _ \h'-1'^
589. ds . \h'-1'.
590. ds 3 3
591. ds o a
592. ds d- d\h'-1'\(ga
593. ds D- D\h'-1'\(hy
594. ds th \o'bp'
595. ds Th \o'LP'
596. ds ae ae
597. ds Ae AE
598. ds oe oe
599. ds Oe OE
600.\}
601.rm #[ #] #H #V #F C
602END
603
604$indent = 0;
605
606while (<>) {
607 if ($cutting) {
608 next unless /^=/;
609 $cutting = 0;
610 }
611 chomp;
612
613 # Translate verbatim paragraph
614
615 if (/^\s/) {
616 @lines = split(/\n/);
617 for (@lines) {
618 1 while s
619 {^( [^\t]* ) \t ( \t* ) }
620 { $1 . ' ' x (8 - (length($1)%8) + 8 * (length($2))) }ex;
621 s/\\/\\e/g;
622 s/\A/\\&/s;
623 }
624 $lines = @lines;
625 makespace() unless $verbatim++;
626 print ".Vb $lines\n";
627 print join("\n", @lines), "\n";
628 print ".Ve\n";
629 $needspace = 0;
630 next;
631 }
632
633 $verbatim = 0;
634
635 # check for things that'll hosed our noremap scheme; affects $_
636 init_noremap();
637
638 if (!/^=item/) {
639
640 # trofficate backslashes; must do it before what happens below
641 s/\\/noremap('\\e')/ge;
642
cb1a09d0 643 # first hide the escapes in case we need to
a0d0e21e
LW
644 # intuit something and get it wrong due to fmting
645
646 s/([A-Z]<[^<>]*>)/noremap($1)/ge;
647
648 # func() is a reference to a perl function
649 s{
650 \b
651 (
652 [:\w]+ \(\)
653 )
654 } {I<$1>}gx;
655
656 # func(n) is a reference to a man page
657 s{
658 (\w+)
659 (
660 \(
661 [^\s,\051]+
662 \)
663 )
664 } {I<$1>\\|$2}gx;
665
666 # convert simple variable references
748a9306 667 s/(\s+)([\$\@%][\w:]+)/${1}C<$2>/g;
a0d0e21e
LW
668
669 if (m{ (
670 [\-\w]+
671 \(
672 [^\051]*?
673 [\@\$,]
674 [^\051]*?
675 \)
676 )
cb1a09d0 677 }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/)
a0d0e21e 678 {
cb1a09d0
AD
679 warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [LCI]<$1>\n";
680 $oops++;
681 }
a0d0e21e
LW
682
683 while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) {
cb1a09d0
AD
684 warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [CB]<$1>\n";
685 $oops++;
686 }
a0d0e21e
LW
687
688 # put it back so we get the <> processed again;
689 clear_noremap(0); # 0 means leave the E's
690
691 } else {
692 # trofficate backslashes
693 s/\\/noremap('\\e')/ge;
694
cb1a09d0 695 }
a0d0e21e
LW
696
697 # need to hide E<> first; they're processed in clear_noremap
698 s/(E<[^<>]+>)/noremap($1)/ge;
699
700
701 $maxnest = 10;
702 while ($maxnest-- && /[A-Z]</) {
703
704 # can't do C font here
705 s/([BI])<([^<>]*)>/font($1) . $2 . font('R')/eg;
706
707 # files and filelike refs in italics
708 s/F<([^<>]*)>/I<$1>/g;
709
710 # no break -- usually we want C<> for this
711 s/S<([^<>]*)>/nobreak($1)/eg;
712
cb1a09d0 713 # LREF: a manpage(3f)
a0d0e21e
LW
714 s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the I<$1>$2 manpage:g;
715
716 # LREF: an =item on another manpage
717 s{
718 L<
719 ([^/]+)
720 /
721 (
722 [:\w]+
723 (\(\))?
724 )
725 >
726 } {the C<$2> entry in the I<$1> manpage}gx;
727
728 # LREF: an =item on this manpage
729 s{
730 ((?:
731 L<
732 /
733 (
734 [:\w]+
735 (\(\))?
736 )
737 >
738 (,?\s+(and\s+)?)?
739 )+)
740 } { internal_lrefs($1) }gex;
741
742 # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
743 # the "func" can disambiguate
744 s{
745 L<
746 (?:
cb1a09d0 747 ([a-zA-Z]\S+?) /
a0d0e21e
LW
748 )?
749 "?(.*?)"?
750 >
751 }{
752 do {
753 $1 # if no $1, assume it means on this page.
754 ? "the section on I<$2> in the I<$1> manpage"
755 : "the section on I<$2>"
cb1a09d0 756 }
a0d0e21e
LW
757 }gex;
758
759 s/Z<>/\\&/g;
760
761 # comes last because not subject to reprocessing
762 s/C<([^<>]*)>/noremap("${CFont_embed}${1}\\fR")/eg;
763 }
764
765 if (s/^=//) {
766 $needspace = 0; # Assume this.
767
768 s/\n/ /g;
769
770 ($Cmd, $_) = split(' ', $_, 2);
771
772 if (defined $_) {
773 &escapes;
774 s/"/""/g;
775 }
776
777 clear_noremap(1);
778
779 if ($Cmd eq 'cut') {
780 $cutting = 1;
781 }
782 elsif ($Cmd eq 'head1') {
cb1a09d0
AD
783 s/\s+$//;
784 delete $wanna_see{$_} if exists $wanna_see{$_};
785 print qq{.SH "$_"\n};
786 print qq{.IX Header "$_"\n};
a0d0e21e
LW
787 }
788 elsif ($Cmd eq 'head2') {
cb1a09d0
AD
789 print qq{.Sh "$_"\n};
790 print qq{.IX Subsection "$_"\n};
a0d0e21e
LW
791 }
792 elsif ($Cmd eq 'over') {
793 push(@indent,$indent);
cb1a09d0 794 $indent += ($_ + 0) || 5;
a0d0e21e
LW
795 }
796 elsif ($Cmd eq 'back') {
797 $indent = pop(@indent);
f360dba1 798 warn "$0: Unmatched =back in paragraph $. of $ARGV\n" unless defined $indent;
a0d0e21e
LW
799 $needspace = 1;
800 }
801 elsif ($Cmd eq 'item') {
802 s/^\*( |$)/\\(bu$1/g;
803 print STDOUT qq{.Ip "$_" $indent\n};
cb1a09d0 804 print qq{.IX Item "$_"\n};
a0d0e21e 805 }
cb1a09d0
AD
806 elsif ($Cmd eq 'pod') {
807 # this is just a comment
808 }
a0d0e21e 809 else {
f360dba1 810 warn "$0: Unrecognized pod directive in paragraph $. of $ARGV: $Cmd\n";
a0d0e21e
LW
811 }
812 }
813 else {
814 if ($needspace) {
815 &makespace;
816 }
817 &escapes;
818 clear_noremap(1);
819 print $_, "\n";
820 $needspace = 1;
821 }
822}
823
824print <<"END";
825
826.rn }` ''
827END
828
cb1a09d0
AD
829if (%wanna_see) {
830 @missing = keys %wanna_see;
831 warn "$0: $Filename is missing required section"
832 . (@missing > 1 && "s")
833 . ": @missing\n";
834 $oops++;
835}
836
837exit;
838#exit ($oops != 0);
839
a0d0e21e
LW
840#########################################################################
841
842sub nobreak {
843 my $string = shift;
844 $string =~ s/ /\\ /g;
845 $string;
846}
847
848sub escapes {
849
cb1a09d0
AD
850 s/X<(.*?)>/mkindex($1)/ge;
851
a0d0e21e
LW
852 # translate the minus in foo-bar into foo\-bar for roff
853 s/([^0-9a-z-])-([^-])/$1\\-$2/g;
854
855 # make -- into the string version \*(-- (defined above)
856 s/\b--\b/\\*(--/g;
857 s/"--([^"])/"\\*(--$1/g; # should be a better way
858 s/([^"])--"/$1\\*(--"/g;
859
860 # fix up quotes; this is somewhat tricky
861 if (!/""/) {
862 s/(^|\s)(['"])/noremap("$1\\*(L$2")/ge;
863 s/(['"])($|[\-\s,;\\!?.])/noremap("\\*(R$1$2")/ge;
864 }
865
866 #s/(?!")(?:.)--(?!")(?:.)/\\*(--/g;
867 #s/(?:(?!")(?:.)--(?:"))|(?:(?:")--(?!")(?:.))/\\*(--/g;
cb1a09d0 868
a0d0e21e
LW
869
870 # make sure that func() keeps a bit a space tween the parens
871 ### s/\b\(\)/\\|()/g;
872 ### s/\b\(\)/(\\|)/g;
873
874 # make C++ into \*C+, which is a squinched version (defined above)
875 s/\bC\+\+/\\*(C+/g;
876
877 # make double underbars have a little tiny space between them
878 s/__/_\\|_/g;
879
cb1a09d0 880 # PI goes to \*(PI (defined above)
a0d0e21e
LW
881 s/\bPI\b/noremap('\\*(PI')/ge;
882
883 # make all caps a teeny bit smaller, but don't muck with embedded code literals
884 my $hidCFont = font('C');
885 if ($Cmd !~ /^head1/) { # SH already makes smaller
886 # /g isn't enough; 1 while or we'll be off
887
888# 1 while s{
889# (?!$hidCFont)(..|^.|^)
890# \b
891# (
892# [A-Z][\/A-Z+:\-\d_$.]+
893# )
894# (s?)
895# \b
896# } {$1\\s-1$2\\s0}gmox;
897
898 1 while s{
899 (?!$hidCFont)(..|^.|^)
900 (
901 \b[A-Z]{2,}[\/A-Z+:\-\d_\$]*\b
902 )
cb1a09d0 903 } {
a0d0e21e
LW
904 $1 . noremap( '\\s-1' . $2 . '\\s0' )
905 }egmox;
906
907 }
908}
909
910# make troff just be normal, but make small nroff get quoted
911# decided to just put the quotes in the text; sigh;
912sub ccvt {
913 local($_,$prev) = @_;
914 if ( /^\W+$/ && !/^\$./ ) {
915 ($prev && "\n") . noremap(qq{.CQ $_ \n\\&});
916 # what about $" ?
917 } else {
918 noremap(qq{${CFont_embed}$_\\fR});
cb1a09d0 919 }
a0d0e21e 920 noremap(qq{.CQ "$_" \n\\&});
cb1a09d0 921}
a0d0e21e
LW
922
923sub makespace {
924 if ($indent) {
925 print ".Sp\n";
926 }
927 else {
928 print ".PP\n";
929 }
930}
931
cb1a09d0
AD
932sub mkindex {
933 my ($entry) = @_;
934 my @entries = split m:\s*/\s*:, $entry;
935 print ".IX Xref ";
936 for $entry (@entries) {
937 print qq("$entry" );
938 }
939 print "\n";
940 return '';
941}
942
a0d0e21e
LW
943sub font {
944 local($font) = shift;
945 return '\\f' . noremap($font);
cb1a09d0 946}
a0d0e21e
LW
947
948sub noremap {
949 local($thing_to_hide) = shift;
950 $thing_to_hide =~ tr/\000-\177/\200-\377/;
951 return $thing_to_hide;
cb1a09d0 952}
a0d0e21e
LW
953
954sub init_noremap {
955 if ( /[\200-\377]/ ) {
f360dba1 956 warn "$0: high bit char in input stream in paragraph $. of $ARGV\n";
cb1a09d0
AD
957 }
958}
a0d0e21e
LW
959
960sub clear_noremap {
961 my $ready_to_print = $_[0];
962
963 tr/\200-\377/\000-\177/;
964
965 # trofficate backslashes
966 # s/(?!\\e)(?:..|^.|^)\\/\\e/g;
967
968 # now for the E<>s, which have been hidden until now
969 # otherwise the interative \w<> processing would have
970 # been hosed by the E<gt>
971 s {
972 E<
973 ( [A-Za-z]+ )
974 >
cb1a09d0 975 } {
a0d0e21e
LW
976 do {
977 exists $HTML_Escapes{$1}
978 ? do { $HTML_Escapes{$1} }
979 : do {
f360dba1 980 warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n";
a0d0e21e 981 "E<$1>";
cb1a09d0
AD
982 }
983 }
a0d0e21e 984 }egx if $ready_to_print;
cb1a09d0 985}
a0d0e21e
LW
986
987sub internal_lrefs {
988 local($_) = shift;
989
990 s{L</([^>]+)>}{$1}g;
991 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
992 my $retstr = "the ";
993 my $i;
994 for ($i = 0; $i <= $#items; $i++) {
995 $retstr .= "C<$items[$i]>";
996 $retstr .= ", " if @items > 2 && $i != $#items;
997 $retstr .= " and " if $i+2 == @items;
cb1a09d0 998 }
a0d0e21e
LW
999
1000 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
1001 . " elsewhere in this document";
1002
1003 return $retstr;
1004
cb1a09d0 1005}
a0d0e21e
LW
1006
1007BEGIN {
1008%HTML_Escapes = (
1009 'amp' => '&', # ampersand
1010 'lt' => '<', # left chevron, less-than
1011 'gt' => '>', # right chevron, greater-than
1012 'quot' => '"', # double quote
1013
1014 "Aacute" => "A\\*'", # capital A, acute accent
1015 "aacute" => "a\\*'", # small a, acute accent
1016 "Acirc" => "A\\*^", # capital A, circumflex accent
1017 "acirc" => "a\\*^", # small a, circumflex accent
1018 "AElig" => '\*(AE', # capital AE diphthong (ligature)
1019 "aelig" => '\*(ae', # small ae diphthong (ligature)
1020 "Agrave" => "A\\*`", # capital A, grave accent
1021 "agrave" => "A\\*`", # small a, grave accent
1022 "Aring" => 'A\\*o', # capital A, ring
1023 "aring" => 'a\\*o', # small a, ring
1024 "Atilde" => 'A\\*~', # capital A, tilde
1025 "atilde" => 'a\\*~', # small a, tilde
1026 "Auml" => 'A\\*:', # capital A, dieresis or umlaut mark
1027 "auml" => 'a\\*:', # small a, dieresis or umlaut mark
1028 "Ccedil" => 'C\\*,', # capital C, cedilla
1029 "ccedil" => 'c\\*,', # small c, cedilla
1030 "Eacute" => "E\\*'", # capital E, acute accent
1031 "eacute" => "e\\*'", # small e, acute accent
1032 "Ecirc" => "E\\*^", # capital E, circumflex accent
1033 "ecirc" => "e\\*^", # small e, circumflex accent
1034 "Egrave" => "E\\*`", # capital E, grave accent
1035 "egrave" => "e\\*`", # small e, grave accent
1036 "ETH" => '\\*(D-', # capital Eth, Icelandic
1037 "eth" => '\\*(d-', # small eth, Icelandic
1038 "Euml" => "E\\*:", # capital E, dieresis or umlaut mark
1039 "euml" => "e\\*:", # small e, dieresis or umlaut mark
1040 "Iacute" => "I\\*'", # capital I, acute accent
1041 "iacute" => "i\\*'", # small i, acute accent
1042 "Icirc" => "I\\*^", # capital I, circumflex accent
1043 "icirc" => "i\\*^", # small i, circumflex accent
1044 "Igrave" => "I\\*`", # capital I, grave accent
1045 "igrave" => "i\\*`", # small i, grave accent
1046 "Iuml" => "I\\*:", # capital I, dieresis or umlaut mark
1047 "iuml" => "i\\*:", # small i, dieresis or umlaut mark
1048 "Ntilde" => 'N\*~', # capital N, tilde
1049 "ntilde" => 'n\*~', # small n, tilde
1050 "Oacute" => "O\\*'", # capital O, acute accent
1051 "oacute" => "o\\*'", # small o, acute accent
1052 "Ocirc" => "O\\*^", # capital O, circumflex accent
1053 "ocirc" => "o\\*^", # small o, circumflex accent
1054 "Ograve" => "O\\*`", # capital O, grave accent
1055 "ograve" => "o\\*`", # small o, grave accent
1056 "Oslash" => "O\\*/", # capital O, slash
1057 "oslash" => "o\\*/", # small o, slash
1058 "Otilde" => "O\\*~", # capital O, tilde
1059 "otilde" => "o\\*~", # small o, tilde
1060 "Ouml" => "O\\*:", # capital O, dieresis or umlaut mark
1061 "ouml" => "o\\*:", # small o, dieresis or umlaut mark
1062 "szlig" => '\*8', # small sharp s, German (sz ligature)
1063 "THORN" => '\\*(Th', # capital THORN, Icelandic
1064 "thorn" => '\\*(th',, # small thorn, Icelandic
1065 "Uacute" => "U\\*'", # capital U, acute accent
1066 "uacute" => "u\\*'", # small u, acute accent
1067 "Ucirc" => "U\\*^", # capital U, circumflex accent
1068 "ucirc" => "u\\*^", # small u, circumflex accent
1069 "Ugrave" => "U\\*`", # capital U, grave accent
1070 "ugrave" => "u\\*`", # small u, grave accent
1071 "Uuml" => "U\\*:", # capital U, dieresis or umlaut mark
1072 "uuml" => "u\\*:", # small u, dieresis or umlaut mark
1073 "Yacute" => "Y\\*'", # capital Y, acute accent
1074 "yacute" => "y\\*'", # small y, acute accent
1075 "yuml" => "y\\*:", # small y, dieresis or umlaut mark
1076);
1077}
cb1a09d0 1078
5d94fbed 1079!NO!SUBS!
4633a7c4
LW
1080
1081close OUT or die "Can't close $file: $!";
1082chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
1083exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';