This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate change #18420 from maint-5.8:
[perl5.git] / lib / Pod / Checker.pm
CommitLineData
360aca43
GS
1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43
GS
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
9c6ed6d7 13$VERSION = 1.40; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
e2c3adef
GS
16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43
GS
18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43
GS
31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43
GS
36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef
GS
39=head2 podchecker()
40
41This function can take a hash of options:
e3237417
GS
42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
92e3d63a
JH
47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
e3237417
GS
49
50=back
360aca43
GS
51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
e2c3adef 56I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!>
360aca43
GS
57
58It is hoped that curious/ambitious user will help flesh out and add the
e2c3adef
GS
59additional features they wish to see in B<Pod::Checker> and B<podchecker>
60and verify that the checks are consistent with L<perlpod>.
360aca43 61
48f30392 62The following checks are currently preformed:
e3237417
GS
63
64=over 4
65
66=item *
67
48f30392 68Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef
GS
69and unterminated interior sequences.
70
71=item *
72
73Check for proper balancing of C<=begin> and C<=end>. The contents of such
74a block are generally ignored, i.e. no syntax checks are performed.
e3237417
GS
75
76=item *
77
78Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
79
80=item *
81
e2c3adef
GS
82Check for same nested interior-sequences (e.g.
83C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417
GS
84
85=item *
86
e2c3adef 87Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417
GS
88
89=item *
90
e2c3adef
GS
91Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
92for details.
e3237417
GS
93
94=item *
95
e2c3adef
GS
96Check for unresolved document-internal links. This check may also reveal
97misspelled links that seem to be internal links but should be links
98to something else.
e3237417
GS
99
100=back
101
e2c3adef 102=head1 DIAGNOSTICS
e3237417 103
e2c3adef 104=head2 Errors
e3237417
GS
105
106=over 4
107
66aff6dd
GS
108=item * empty =headn
109
110A heading (C<=head1> or C<=head2>) without any text? That ain't no
111heading!
112
e2c3adef 113=item * =over on line I<N> without closing =back
e3237417 114
e2c3adef
GS
115The C<=over> command does not have a corresponding C<=back> before the
116next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 117
e2c3adef 118=item * =item without previous =over
e3237417 119
e2c3adef 120=item * =back without previous =over
e3237417 121
e2c3adef
GS
122An C<=item> or C<=back> command has been found outside a
123C<=over>/C<=back> block.
e3237417 124
e2c3adef 125=item * No argument for =begin
e3237417 126
e2c3adef
GS
127A C<=begin> command was found that is not followed by the formatter
128specification.
e3237417 129
e2c3adef 130=item * =end without =begin
e3237417 131
e2c3adef
GS
132A standalone C<=end> command was found.
133
134=item * Nested =begin's
135
66aff6dd 136There were at least two consecutive C<=begin> commands without
e2c3adef
GS
137the corresponding C<=end>. Only one C<=begin> may be active at
138a time.
139
140=item * =for without formatter specification
e3237417 141
e2c3adef
GS
142There is no specification of the formatter after the C<=for> command.
143
144=item * unresolved internal link I<NAME>
145
146The given link to I<NAME> does not have a matching node in the current
147POD. This also happend when a single word node name is not enclosed in
148C<"">.
149
150=item * Unknown command "I<CMD>"
151
152An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
c6b85e5d
CT
153C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
154C<=for>, C<=pod>, C<=cut>
e2c3adef
GS
155
156=item * Unknown interior-sequence "I<SEQ>"
157
158An invalid markup command has been encountered. Valid are:
159C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
160C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
161C<ZE<lt>E<gt>>
162
163=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
164
165Two nested identical markup commands have been found. Generally this
166does not make sense.
167
168=item * garbled entity I<STRING>
169
66aff6dd
GS
170The I<STRING> found cannot be interpreted as a character entity.
171
172=item * Entity number out of range
173
174An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef
GS
175
176=item * malformed link LE<lt>E<gt>
177
178The link found cannot be parsed because it does not conform to the
179syntax described in L<perlpod>.
e3237417 180
66aff6dd
GS
181=item * nonempty ZE<lt>E<gt>
182
183The C<ZE<lt>E<gt>> sequence is supposed to be empty.
184
48f30392
GS
185=item * empty XE<lt>E<gt>
186
187The index entry specified contains nothing but whitespace.
188
66aff6dd
GS
189=item * Spurious text after =pod / =cut
190
191The commands C<=pod> and C<=cut> do not take any arguments.
192
193=item * Spurious character(s) after =back
194
195The C<=back> command does not take any arguments.
196
e3237417
GS
197=back
198
e2c3adef 199=head2 Warnings
e3237417 200
e2c3adef
GS
201These may not necessarily cause trouble, but indicate mediocre style.
202
203=over 4
204
fb8eeed8 205=item * multiple occurrence of link target I<name>
66aff6dd
GS
206
207The POD file has some C<=item> and/or C<=head> commands that have
208the same text. Potential hyperlinks to such a text cannot be unique then.
209
210=item * line containing nothing but whitespace in paragraph
211
212There is some whitespace on a seemingly empty line. POD is very sensitive
213to such things, so this is flagged. B<vi> users switch on the B<list>
214option to avoid this problem.
215
92e3d63a
JH
216=begin _disabled_
217
66aff6dd
GS
218=item * file does not start with =head
219
220The file starts with a different POD directive than head.
221This is most probably something you do not want.
222
92e3d63a
JH
223=end _disabled_
224
66aff6dd 225=item * previous =item has no contents
e2c3adef 226
66aff6dd
GS
227There is a list C<=item> right above the flagged line that has no
228text contents. You probably want to delete empty items.
229
230=item * preceding non-item paragraph(s)
231
232A list introduced by C<=over> starts with a text or verbatim paragraph,
233but continues with C<=item>s. Move the non-item paragraph out of the
234C<=over>/C<=back> block.
235
236=item * =item type mismatch (I<one> vs. I<two>)
237
238A list started with e.g. a bulletted C<=item> and continued with a
239numbered one. This is obviously inconsistent. For most translators the
240type of the I<first> C<=item> determines the type of the list.
e2c3adef
GS
241
242=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
243
244Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
245can potentially cause errors as they could be misinterpreted as
92e3d63a
JH
246markup commands. This is only printed when the -warnings level is
247greater than 1.
e2c3adef 248
66aff6dd 249=item * Unknown entity
e2c3adef
GS
250
251A character entity was found that does not belong to the standard
66aff6dd 252ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef
GS
253
254=item * No items in =over
255
66aff6dd 256The list opened with C<=over> does not contain any items.
e2c3adef
GS
257
258=item * No argument for =item
259
260C<=item> without any parameters is deprecated. It should either be followed
261by C<*> to indicate an unordered list, by a number (optionally followed
262by a dot) to indicate an ordered (numbered) list or simple text for a
263definition list.
264
66aff6dd
GS
265=item * empty section in previous paragraph
266
267The previous section (introduced by a C<=head> command) does not contain
268any text. This usually indicates that something is missing. Note: A
269C<=head1> followed immediately by C<=head2> does not trigger this warning.
270
e2c3adef
GS
271=item * Verbatim paragraph in NAME section
272
273The NAME section (C<=head1 NAME>) should consist of a single paragraph
274with the script/module name, followed by a dash `-' and a very short
275description of what the thing is good for.
276
9c6ed6d7
HS
277=item * =headI<n> without preceding higher level
278
279For example if there is a C<=head2> in the POD file prior to a
280C<=head1>.
281
92e3d63a
JH
282=back
283
284=head2 Hyperlinks
285
286There are some warnings wrt. malformed hyperlinks.
287
288=over 4
289
92e3d63a
JH
290=item * ignoring leading/trailing whitespace in link
291
292There is whitespace at the beginning or the end of the contents of
293LE<lt>...E<gt>.
294
295=item * (section) in '$page' deprecated
296
297There is a section detected in the page name of LE<lt>...E<gt>, e.g.
ad838556 298C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
92e3d63a
JH
299Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
300to expand this to appropriate code. For links to (builtin) functions,
301please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
302
303=item * alternative text/node '%s' contains non-escaped | or /
304
305The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
306Although the hyperlink parser does its best to determine which "/" is
307text and which is a delimiter in case of doubt, one ought to escape
308these literal characters like this:
309
310 / E<sol>
311 | E<verbar>
e2c3adef
GS
312
313=back
e3237417
GS
314
315=head1 RETURN VALUE
316
317B<podchecker> returns the number of POD syntax errors found or -1 if
318there were no POD commands at all found in the file.
319
360aca43
GS
320=head1 EXAMPLES
321
322I<[T.B.D.]>
323
48f30392 324=head1 INTERFACE
360aca43 325
48f30392
GS
326While checking, this module collects document properties, e.g. the nodes
327for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
328POD translators can use this feature to syntax-check and get the nodes in
329a first pass before actually starting to convert. This is expensive in terms
330of execution time, but allows for very robust conversions.
360aca43
GS
331
332=cut
333
334#############################################################################
335
336use strict;
337#use diagnostics;
338use Carp;
339use Exporter;
340use Pod::Parser;
341
342use vars qw(@ISA @EXPORT);
343@ISA = qw(Pod::Parser);
344@EXPORT = qw(&podchecker);
345
346use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
347
348my %VALID_COMMANDS = (
349 'pod' => 1,
350 'cut' => 1,
351 'head1' => 1,
352 'head2' => 1,
c6b85e5d
CT
353 'head3' => 1,
354 'head4' => 1,
360aca43
GS
355 'over' => 1,
356 'back' => 1,
357 'item' => 1,
358 'for' => 1,
359 'begin' => 1,
360 'end' => 1,
361);
362
363my %VALID_SEQUENCES = (
364 'I' => 1,
365 'B' => 1,
366 'S' => 1,
367 'C' => 1,
368 'L' => 1,
369 'F' => 1,
370 'X' => 1,
371 'Z' => 1,
372 'E' => 1,
373);
374
e2c3adef
GS
375# stolen from HTML::Entities
376my %ENTITIES = (
377 # Some normal chars that have special meaning in SGML context
378 amp => '&', # ampersand
379'gt' => '>', # greater than
380'lt' => '<', # less than
381 quot => '"', # double quote
382
383 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
384 AElig => 'Æ', # capital AE diphthong (ligature)
385 Aacute => 'Á', # capital A, acute accent
386 Acirc => 'Â', # capital A, circumflex accent
387 Agrave => 'À', # capital A, grave accent
388 Aring => 'Å', # capital A, ring
389 Atilde => 'Ã', # capital A, tilde
390 Auml => 'Ä', # capital A, dieresis or umlaut mark
391 Ccedil => 'Ç', # capital C, cedilla
392 ETH => 'Ð', # capital Eth, Icelandic
393 Eacute => 'É', # capital E, acute accent
394 Ecirc => 'Ê', # capital E, circumflex accent
395 Egrave => 'È', # capital E, grave accent
396 Euml => 'Ë', # capital E, dieresis or umlaut mark
397 Iacute => 'Í', # capital I, acute accent
398 Icirc => 'Î', # capital I, circumflex accent
399 Igrave => 'Ì', # capital I, grave accent
400 Iuml => 'Ï', # capital I, dieresis or umlaut mark
401 Ntilde => 'Ñ', # capital N, tilde
402 Oacute => 'Ó', # capital O, acute accent
403 Ocirc => 'Ô', # capital O, circumflex accent
404 Ograve => 'Ò', # capital O, grave accent
405 Oslash => 'Ø', # capital O, slash
406 Otilde => 'Õ', # capital O, tilde
407 Ouml => 'Ö', # capital O, dieresis or umlaut mark
408 THORN => 'Þ', # capital THORN, Icelandic
409 Uacute => 'Ú', # capital U, acute accent
410 Ucirc => 'Û', # capital U, circumflex accent
411 Ugrave => 'Ù', # capital U, grave accent
412 Uuml => 'Ü', # capital U, dieresis or umlaut mark
413 Yacute => 'Ý', # capital Y, acute accent
414 aacute => 'á', # small a, acute accent
415 acirc => 'â', # small a, circumflex accent
416 aelig => 'æ', # small ae diphthong (ligature)
417 agrave => 'à', # small a, grave accent
418 aring => 'å', # small a, ring
419 atilde => 'ã', # small a, tilde
420 auml => 'ä', # small a, dieresis or umlaut mark
421 ccedil => 'ç', # small c, cedilla
422 eacute => 'é', # small e, acute accent
423 ecirc => 'ê', # small e, circumflex accent
424 egrave => 'è', # small e, grave accent
425 eth => 'ð', # small eth, Icelandic
426 euml => 'ë', # small e, dieresis or umlaut mark
427 iacute => 'í', # small i, acute accent
428 icirc => 'î', # small i, circumflex accent
429 igrave => 'ì', # small i, grave accent
430 iuml => 'ï', # small i, dieresis or umlaut mark
431 ntilde => 'ñ', # small n, tilde
432 oacute => 'ó', # small o, acute accent
433 ocirc => 'ô', # small o, circumflex accent
434 ograve => 'ò', # small o, grave accent
435 oslash => 'ø', # small o, slash
436 otilde => 'õ', # small o, tilde
437 ouml => 'ö', # small o, dieresis or umlaut mark
438 szlig => 'ß', # small sharp s, German (sz ligature)
439 thorn => 'þ', # small thorn, Icelandic
440 uacute => 'ú', # small u, acute accent
441 ucirc => 'û', # small u, circumflex accent
442 ugrave => 'ù', # small u, grave accent
443 uuml => 'ü', # small u, dieresis or umlaut mark
444 yacute => 'ý', # small y, acute accent
445 yuml => 'ÿ', # small y, dieresis or umlaut mark
446
447 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
448 copy => '©', # copyright sign
449 reg => '®', # registered sign
450 nbsp => "\240", # non breaking space
451
452 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
453 iexcl => '¡',
454 cent => '¢',
455 pound => '£',
456 curren => '¤',
457 yen => '¥',
458 brvbar => '¦',
459 sect => '§',
460 uml => '¨',
461 ordf => 'ª',
462 laquo => '«',
463'not' => '¬', # not is a keyword in perl
464 shy => '­',
465 macr => '¯',
466 deg => '°',
467 plusmn => '±',
468 sup1 => '¹',
469 sup2 => '²',
470 sup3 => '³',
471 acute => '´',
472 micro => 'µ',
473 para => '¶',
474 middot => '·',
475 cedil => '¸',
476 ordm => 'º',
477 raquo => '»',
478 frac14 => '¼',
479 frac12 => '½',
480 frac34 => '¾',
481 iquest => '¿',
482'times' => '×', # times is a keyword in perl
483 divide => '÷',
66aff6dd
GS
484
485# some POD special entities
486 verbar => '|',
487 sol => '/'
e2c3adef
GS
488);
489
360aca43
GS
490##---------------------------------------------------------------------------
491
492##---------------------------------
493## Function definitions begin here
494##---------------------------------
495
e3237417
GS
496sub podchecker( $ ; $ % ) {
497 my ($infile, $outfile, %options) = @_;
360aca43
GS
498 local $_;
499
500 ## Set defaults
501 $infile ||= \*STDIN;
502 $outfile ||= \*STDERR;
503
504 ## Now create a pod checker
e3237417 505 my $checker = new Pod::Checker(%options);
360aca43
GS
506
507 ## Now check the pod document for errors
508 $checker->parse_from_file($infile, $outfile);
48f30392 509
360aca43
GS
510 ## Return the number of errors found
511 return $checker->num_errors();
512}
513
514##---------------------------------------------------------------------------
515
516##-------------------------------
517## Method definitions begin here
518##-------------------------------
519
92e3d63a
JH
520##################################
521
522=over 4
523
524=item C<Pod::Checker-E<gt>new( %options )>
525
526Return a reference to a new Pod::Checker object that inherits from
527Pod::Parser and is used for calling the required methods later. The
528following options are recognized:
529
530C<-warnings =E<gt> num>
531 Print warnings if C<num> is true. The higher the value of C<num>,
532the more warnings are printed. Currently there are only levels 1 and 2.
533
534C<-quiet =E<gt> num>
535 If C<num> is true, do not print any errors/warnings. This is useful
536when Pod::Checker is used to munge POD code into plain text from within
537POD formatters.
538
539=cut
540
66aff6dd
GS
541## sub new {
542## my $this = shift;
543## my $class = ref($this) || $this;
544## my %params = @_;
545## my $self = {%params};
546## bless $self, $class;
547## $self->initialize();
548## return $self;
549## }
360aca43
GS
550
551sub initialize {
552 my $self = shift;
664bb207
GS
553 ## Initialize number of errors, and setup an error function to
554 ## increment this number and then print to the designated output.
555 $self->{_NUM_ERRORS} = 0;
9c6ed6d7 556 $self->{_NUM_WARNINGS} = 0;
92e3d63a
JH
557 $self->{-quiet} ||= 0;
558 # set the error handling subroutine
559 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417
GS
560 $self->{_commands} = 0; # total number of POD commands encountered
561 $self->{_list_stack} = []; # stack for nested lists
562 $self->{_have_begin} = ''; # stores =begin
563 $self->{_links} = []; # stack for internal hyperlinks
564 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 565 $self->{_index} = []; # text in X<>
e2c3adef 566 # print warnings?
e3237417 567 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 568 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 569 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207
GS
570}
571
48f30392
GS
572##################################
573
48f30392
GS
574=item C<$checker-E<gt>poderror( @args )>
575
576=item C<$checker-E<gt>poderror( {%opts}, @args )>
577
578Internal method for printing errors and warnings. If no options are
579given, simply prints "@_". The following options are recognized and used
580to form the output:
581
582 -msg
583
584A message to print prior to C<@args>.
585
586 -line
587
588The line number the error occurred in.
589
590 -file
591
592The file (name) the error occurred in.
593
594 -severity
595
596The error level, should be 'WARNING' or 'ERROR'.
597
598=cut
599
e2c3adef 600# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207
GS
601sub poderror {
602 my $self = shift;
603 my %opts = (ref $_[0]) ? %{shift()} : ();
604
605 ## Retrieve options
606 chomp( my $msg = ($opts{-msg} || "")."@_" );
607 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
608 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd
GS
609 unless (exists $opts{-severity}) {
610 ## See if can find severity in message prefix
611 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
612 }
664bb207
GS
613 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
614
e3237417
GS
615 ## Increment error count and print message "
616 ++($self->{_NUM_ERRORS})
617 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
9c6ed6d7
HS
618 ++($self->{_NUM_WARNINGS})
619 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
92e3d63a 620 my $out_fh = $self->output_handle() || \*STDERR;
e2c3adef
GS
621 print $out_fh ($severity, $msg, $line, $file, "\n")
622 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43
GS
623}
624
48f30392
GS
625##################################
626
627=item C<$checker-E<gt>num_errors()>
628
629Set (if argument specified) and retrieve the number of errors found.
630
631=cut
632
360aca43
GS
633sub num_errors {
634 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
635}
636
48f30392
GS
637##################################
638
9c6ed6d7
HS
639=item C<$checker-E<gt>num_warnings()>
640
641Set (if argument specified) and retrieve the number of warnings found.
642
643=cut
644
645sub num_warnings {
646 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
647}
648
649##################################
650
48f30392
GS
651=item C<$checker-E<gt>name()>
652
653Set (if argument specified) and retrieve the canonical name of POD as
654found in the C<=head1 NAME> section.
655
656=cut
657
e2c3adef
GS
658sub name {
659 return (@_ > 1 && $_[1]) ?
660 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
661}
662
48f30392
GS
663##################################
664
665=item C<$checker-E<gt>node()>
666
667Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
668and C<=item>) of the current POD. The nodes are returned in the order of
fb8eeed8 669their occurrence. They consist of plain text, each piece of whitespace is
48f30392
GS
670collapsed to a single blank.
671
672=cut
673
e2c3adef
GS
674sub node {
675 my ($self,$text) = @_;
676 if(defined $text) {
66aff6dd
GS
677 $text =~ s/\s+$//s; # strip trailing whitespace
678 $text =~ s/\s+/ /gs; # collapse whitespace
679 # add node, order important!
e2c3adef 680 push(@{$self->{_nodes}}, $text);
66aff6dd 681 # keep also a uniqueness counter
48f30392 682 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef
GS
683 return $text;
684 }
685 @{$self->{_nodes}};
686}
687
48f30392
GS
688##################################
689
690=item C<$checker-E<gt>idx()>
691
692Add (if argument specified) and retrieve the index entries (as defined by
693C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
694of whitespace is collapsed to a single blank.
695
696=cut
697
698# set/return index entries of current POD
699sub idx {
700 my ($self,$text) = @_;
701 if(defined $text) {
702 $text =~ s/\s+$//s; # strip trailing whitespace
703 $text =~ s/\s+/ /gs; # collapse whitespace
704 # add node, order important!
705 push(@{$self->{_index}}, $text);
706 # keep also a uniqueness counter
707 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
708 return $text;
709 }
710 @{$self->{_index}};
711}
712
713##################################
714
715=item C<$checker-E<gt>hyperlink()>
716
717Add (if argument specified) and retrieve the hyperlinks (as defined by
d1be9408 718C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
48f30392
GS
719number and C<Pod::Hyperlink> object.
720
721=back
722
723=cut
724
e2c3adef
GS
725# set/return hyperlinks of the current POD
726sub hyperlink {
727 my $self = shift;
728 if($_[0]) {
729 push(@{$self->{_links}}, $_[0]);
730 return $_[0];
731 }
732 @{$self->{_links}};
733}
734
e3237417
GS
735## overrides for Pod::Parser
736
360aca43 737sub end_pod {
66aff6dd
GS
738 ## Do some final checks and
739 ## print the number of errors found
740 my $self = shift;
741 my $infile = $self->input_file();
742 my $out_fh = $self->output_handle();
743
744 if(@{$self->{_list_stack}}) {
745 # _TODO_ display, but don't count them for now
746 my $list;
747 while(($list = $self->_close_list('EOF',$infile)) &&
748 $list->indent() ne 'auto') {
749 $self->poderror({ -line => 'EOF', -file => $infile,
750 -severity => 'ERROR', -msg => "=over on line " .
751 $list->start() . " without closing =back" }); #"
752 }
753 }
754
755 # check validity of document internal hyperlinks
756 # first build the node names from the paragraph text
757 my %nodes;
758 foreach($self->node()) {
759 $nodes{$_} = 1;
92e3d63a 760 if(/^(\S+)\s+\S/) {
66aff6dd
GS
761 # we have more than one word. Use the first as a node, too.
762 # This is used heavily in perlfunc.pod
763 $nodes{$1} ||= 2; # derived node
764 }
765 }
92e3d63a
JH
766 foreach($self->idx()) {
767 $nodes{$_} = 3; # index node
768 }
66aff6dd 769 foreach($self->hyperlink()) {
48f30392
GS
770 my ($line,$link) = @$_;
771 # _TODO_ what if there is a link to the page itself by the name,
772 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
773 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
774 my $node = $self->_check_ptree($self->parse_text($link->node(),
775 $line), $line, $infile, 'L');
776 if($node && !$nodes{$node}) {
777 $self->poderror({ -line => $line || '', -file => $infile,
778 -severity => 'ERROR',
779 -msg => "unresolved internal link '$node'"});
780 }
66aff6dd
GS
781 }
782 }
48f30392
GS
783
784 # check the internal nodes for uniqueness. This pertains to
785 # =headX, =item and X<...>
66aff6dd
GS
786 foreach(grep($self->{_unique_nodes}->{$_} > 1,
787 keys %{$self->{_unique_nodes}})) {
788 $self->poderror({ -line => '-', -file => $infile,
789 -severity => 'WARNING',
fb8eeed8 790 -msg => "multiple occurrence of link target '$_'"});
66aff6dd
GS
791 }
792
793 ## Print the number of errors found
794 my $num_errors = $self->num_errors();
795 if ($num_errors > 0) {
796 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
360aca43 797 ($num_errors == 1) ? "error" : "errors");
66aff6dd
GS
798 }
799 elsif($self->{_commands} == 0) {
800 print $out_fh "$infile does not contain any pod commands.\n";
801 $self->num_errors(-1);
802 }
803 else {
804 print $out_fh "$infile pod syntax OK.\n";
805 }
360aca43
GS
806}
807
e2c3adef 808# check a POD command directive
360aca43 809sub command {
664bb207 810 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 811 my ($file, $line) = $pod_para->file_line;
360aca43 812 ## Check the command syntax
e3237417 813 my $arg; # this will hold the command argument
664bb207
GS
814 if (! $VALID_COMMANDS{$cmd}) {
815 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 816 -msg => "Unknown command '$cmd'" });
360aca43 817 }
92e3d63a
JH
818 else { # found a valid command
819 $self->{_commands}++; # delete this line if below is enabled again
820
821 ##### following check disabled due to strong request
822 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
823 # $self->poderror({ -line => $line, -file => $file,
824 # -severity => 'WARNING',
825 # -msg => "file does not start with =head" });
826 #}
827
828 # check syntax of particular command
e3237417 829 if($cmd eq 'over') {
e2c3adef
GS
830 # check for argument
831 $arg = $self->interpolate_and_check($paragraph, $line,$file);
832 my $indent = 4; # default
833 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
834 $indent = $1;
e2c3adef 835 }
e3237417 836 # start a new list
66aff6dd 837 $self->_open_list($indent,$line,$file);
e3237417
GS
838 }
839 elsif($cmd eq 'item') {
e2c3adef 840 # are we in a list?
e3237417
GS
841 unless(@{$self->{_list_stack}}) {
842 $self->poderror({ -line => $line, -file => $file,
843 -severity => 'ERROR',
844 -msg => "=item without previous =over" });
e2c3adef 845 # auto-open in case we encounter many more
66aff6dd
GS
846 $self->_open_list('auto',$line,$file);
847 }
848 my $list = $self->{_list_stack}->[0];
849 # check whether the previous item had some contents
850 if(defined $self->{_list_item_contents} &&
851 $self->{_list_item_contents} == 0) {
852 $self->poderror({ -line => $line, -file => $file,
853 -severity => 'WARNING',
854 -msg => "previous =item has no contents" });
855 }
856 if($list->{_has_par}) {
857 $self->poderror({ -line => $line, -file => $file,
858 -severity => 'WARNING',
859 -msg => "preceding non-item paragraph(s)" });
860 delete $list->{_has_par};
e3237417 861 }
e2c3adef
GS
862 # check for argument
863 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd
GS
864 if($arg && $arg =~ /(\S+)/) {
865 $arg =~ s/[\s\n]+$//;
866 my $type;
867 if($arg =~ /^[*]\s*(\S*.*)/) {
868 $type = 'bullet';
869 $self->{_list_item_contents} = $1 ? 1 : 0;
870 $arg = $1;
871 }
872 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
873 $type = 'number';
874 $self->{_list_item_contents} = $1 ? 1 : 0;
875 $arg = $1;
876 }
877 else {
878 $type = 'definition';
879 $self->{_list_item_contents} = 1;
880 }
881 my $first = $list->type();
882 if($first && $first ne $type) {
883 $self->poderror({ -line => $line, -file => $file,
884 -severity => 'WARNING',
885 -msg => "=item type mismatch ('$first' vs. '$type')"});
886 }
887 else { # first item
888 $list->type($type);
889 }
890 }
891 else {
e2c3adef
GS
892 $self->poderror({ -line => $line, -file => $file,
893 -severity => 'WARNING',
894 -msg => "No argument for =item" });
895 $arg = ' '; # empty
66aff6dd 896 $self->{_list_item_contents} = 0;
e3237417 897 }
e2c3adef 898 # add this item
66aff6dd 899 $list->item($arg);
e2c3adef
GS
900 # remember this node
901 $self->node($arg);
e3237417
GS
902 }
903 elsif($cmd eq 'back') {
904 # check if we have an open list
905 unless(@{$self->{_list_stack}}) {
906 $self->poderror({ -line => $line, -file => $file,
907 -severity => 'ERROR',
908 -msg => "=back without previous =over" });
909 }
910 else {
911 # check for spurious characters
e2c3adef 912 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
913 if($arg && $arg =~ /\S/) {
914 $self->poderror({ -line => $line, -file => $file,
66aff6dd 915 -severity => 'ERROR',
e3237417
GS
916 -msg => "Spurious character(s) after =back" });
917 }
918 # close list
66aff6dd 919 my $list = $self->_close_list($line,$file);
e3237417
GS
920 # check for empty lists
921 if(!$list->item() && $self->{-warnings}) {
922 $self->poderror({ -line => $line, -file => $file,
923 -severity => 'WARNING',
924 -msg => "No items in =over (at line " .
925 $list->start() . ") / =back list"}); #"
926 }
927 }
928 }
66aff6dd 929 elsif($cmd =~ /^head(\d+)/) {
9c6ed6d7
HS
930 my $hnum = $1;
931 $self->{"_have_head_$hnum"}++; # count head types
932 if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) {
933 $self->poderror({ -line => $line, -file => $file,
934 -severity => 'WARNING',
935 -msg => "=head$hnum without preceding higher level"});
936 }
48f30392 937 # check whether the previous =head section had some contents
66aff6dd
GS
938 if(defined $self->{_commands_in_head} &&
939 $self->{_commands_in_head} == 0 &&
940 defined $self->{_last_head} &&
9c6ed6d7 941 $self->{_last_head} >= $hnum) {
66aff6dd
GS
942 $self->poderror({ -line => $line, -file => $file,
943 -severity => 'WARNING',
944 -msg => "empty section in previous paragraph"});
945 }
946 $self->{_commands_in_head} = -1;
9c6ed6d7 947 $self->{_last_head} = $hnum;
e3237417
GS
948 # check if there is an open list
949 if(@{$self->{_list_stack}}) {
950 my $list;
66aff6dd
GS
951 while(($list = $self->_close_list($line,$file)) &&
952 $list->indent() ne 'auto') {
e3237417
GS
953 $self->poderror({ -line => $line, -file => $file,
954 -severity => 'ERROR',
e2c3adef
GS
955 -msg => "=over on line ". $list->start() .
956 " without closing =back (at $cmd)" });
e3237417
GS
957 }
958 }
959 # remember this node
e2c3adef 960 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd
GS
961 $arg =~ s/[\s\n]+$//s;
962 $self->node($arg);
963 unless(length($arg)) {
964 $self->poderror({ -line => $line, -file => $file,
965 -severity => 'ERROR',
966 -msg => "empty =$cmd"});
967 }
e2c3adef 968 if($cmd eq 'head1') {
e2c3adef
GS
969 $self->{_current_head1} = $arg;
970 } else {
971 $self->{_current_head1} = '';
972 }
e3237417
GS
973 }
974 elsif($cmd eq 'begin') {
975 if($self->{_have_begin}) {
976 # already have a begin
977 $self->poderror({ -line => $line, -file => $file,
978 -severity => 'ERROR',
979 -msg => "Nested =begin's (first at line " .
980 $self->{_have_begin} . ")"});
981 }
982 else {
983 # check for argument
e2c3adef 984 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
985 unless($arg && $arg =~ /(\S+)/) {
986 $self->poderror({ -line => $line, -file => $file,
e2c3adef 987 -severity => 'ERROR',
e3237417
GS
988 -msg => "No argument for =begin"});
989 }
990 # remember the =begin
991 $self->{_have_begin} = "$line:$1";
992 }
993 }
994 elsif($cmd eq 'end') {
995 if($self->{_have_begin}) {
996 # close the existing =begin
997 $self->{_have_begin} = '';
998 # check for spurious characters
e2c3adef
GS
999 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1000 # the closing argument is optional
1001 #if($arg && $arg =~ /\S/) {
1002 # $self->poderror({ -line => $line, -file => $file,
1003 # -severity => 'WARNING',
1004 # -msg => "Spurious character(s) after =end" });
1005 #}
e3237417
GS
1006 }
1007 else {
1008 # don't have a matching =begin
1009 $self->poderror({ -line => $line, -file => $file,
e2c3adef 1010 -severity => 'ERROR',
e3237417
GS
1011 -msg => "=end without =begin" });
1012 }
1013 }
e2c3adef
GS
1014 elsif($cmd eq 'for') {
1015 unless($paragraph =~ /\s*(\S+)\s*/) {
1016 $self->poderror({ -line => $line, -file => $file,
1017 -severity => 'ERROR',
1018 -msg => "=for without formatter specification" });
1019 }
1020 $arg = ''; # do not expand paragraph below
1021 }
66aff6dd
GS
1022 elsif($cmd =~ /^(pod|cut)$/) {
1023 # check for argument
1024 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1025 if($arg && $arg =~ /(\S+)/) {
1026 $self->poderror({ -line => $line, -file => $file,
1027 -severity => 'ERROR',
1028 -msg => "Spurious text after =$cmd"});
1029 }
1030 }
1031 $self->{_commands_in_head}++;
e3237417 1032 ## Check the interior sequences in the command-text
e2c3adef 1033 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1034 unless(defined $arg);
e2c3adef 1035 }
360aca43
GS
1036}
1037
66aff6dd
GS
1038sub _open_list
1039{
1040 my ($self,$indent,$line,$file) = @_;
1041 my $list = Pod::List->new(
1042 -indent => $indent,
1043 -start => $line,
1044 -file => $file);
1045 unshift(@{$self->{_list_stack}}, $list);
1046 undef $self->{_list_item_contents};
1047 $list;
1048}
1049
1050sub _close_list
1051{
1052 my ($self,$line,$file) = @_;
1053 my $list = shift(@{$self->{_list_stack}});
1054 if(defined $self->{_list_item_contents} &&
1055 $self->{_list_item_contents} == 0) {
1056 $self->poderror({ -line => $line, -file => $file,
1057 -severity => 'WARNING',
1058 -msg => "previous =item has no contents" });
1059 }
1060 undef $self->{_list_item_contents};
1061 $list;
1062}
1063
e2c3adef
GS
1064# process a block of some text
1065sub interpolate_and_check {
e3237417
GS
1066 my ($self, $paragraph, $line, $file) = @_;
1067 ## Check the interior sequences in the command-text
1068 # and return the text
1069 $self->_check_ptree(
1070 $self->parse_text($paragraph,$line), $line, $file, '');
1071}
1072
1073sub _check_ptree {
1074 my ($self,$ptree,$line,$file,$nestlist) = @_;
1075 local($_);
1076 my $text = '';
1077 # process each node in the parse tree
1078 foreach(@$ptree) {
1079 # regular text chunk
1080 unless(ref) {
1081 my $count;
1082 # count the unescaped angle brackets
92e3d63a 1083 # complain only when warning level is greater than 1
e3237417 1084 my $i = $_;
e2c3adef 1085 if($count = $i =~ tr/<>/<>/) {
e3237417
GS
1086 $self->poderror({ -line => $line, -file => $file,
1087 -severity => 'WARNING',
e2c3adef 1088 -msg => "$count unescaped <> in paragraph" })
92e3d63a 1089 if($self->{-warnings} && $self->{-warnings}>1);
e3237417
GS
1090 }
1091 $text .= $i;
1092 next;
1093 }
1094 # have an interior sequence
1095 my $cmd = $_->cmd_name();
1096 my $contents = $_->parse_tree();
1097 ($file,$line) = $_->file_line();
1098 # check for valid tag
1099 if (! $VALID_SEQUENCES{$cmd}) {
1100 $self->poderror({ -line => $line, -file => $file,
1101 -severity => 'ERROR',
66aff6dd 1102 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417
GS
1103 # expand it anyway
1104 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1105 next;
1106 }
1107 if($nestlist =~ /$cmd/) {
1108 $self->poderror({ -line => $line, -file => $file,
1109 -severity => 'ERROR',
1110 -msg => "nested commands $cmd<...$cmd<...>...>"});
1111 # _TODO_ should we add the contents anyway?
1112 # expand it anyway, see below
1113 }
1114 if($cmd eq 'E') {
1115 # preserve entities
1116 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1117 $self->poderror({ -line => $line, -file => $file,
1118 -severity => 'ERROR',
1119 -msg => "garbled entity " . $_->raw_text()});
1120 next;
1121 }
e2c3adef 1122 my $ent = $$contents[0];
66aff6dd
GS
1123 my $val;
1124 if($ent =~ /^0x[0-9a-f]+$/i) {
1125 # hexadec entity
1126 $val = hex($ent);
1127 }
1128 elsif($ent =~ /^0\d+$/) {
1129 # octal
1130 $val = oct($ent);
1131 }
1132 elsif($ent =~ /^\d+$/) {
e2c3adef 1133 # numeric entity
66aff6dd
GS
1134 $val = $ent;
1135 }
1136 if(defined $val) {
1137 if($val>0 && $val<256) {
1138 $text .= chr($val);
1139 }
1140 else {
1141 $self->poderror({ -line => $line, -file => $file,
1142 -severity => 'ERROR',
1143 -msg => "Entity number out of range " . $_->raw_text()});
1144 }
e2c3adef
GS
1145 }
1146 elsif($ENTITIES{$ent}) {
1147 # known ISO entity
1148 $text .= $ENTITIES{$ent};
1149 }
1150 else {
1151 $self->poderror({ -line => $line, -file => $file,
1152 -severity => 'WARNING',
66aff6dd 1153 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef
GS
1154 $text .= "E<$ent>";
1155 }
e3237417
GS
1156 }
1157 elsif($cmd eq 'L') {
1158 # try to parse the hyperlink
1159 my $link = Pod::Hyperlink->new($contents->raw_text());
1160 unless(defined $link) {
1161 $self->poderror({ -line => $line, -file => $file,
1162 -severity => 'ERROR',
e2c3adef 1163 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417
GS
1164 next;
1165 }
1166 $link->line($line); # remember line
1167 if($self->{-warnings}) {
1168 foreach my $w ($link->warning()) {
1169 $self->poderror({ -line => $line, -file => $file,
1170 -severity => 'WARNING',
1171 -msg => $w });
1172 }
1173 }
1174 # check the link text
1175 $text .= $self->_check_ptree($self->parse_text($link->text(),
1176 $line), $line, $file, "$nestlist$cmd");
48f30392
GS
1177 # remember link
1178 $self->hyperlink([$line,$link]);
e3237417
GS
1179 }
1180 elsif($cmd =~ /[BCFIS]/) {
1181 # add the guts
1182 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1183 }
66aff6dd
GS
1184 elsif($cmd eq 'Z') {
1185 if(length($contents->raw_text())) {
1186 $self->poderror({ -line => $line, -file => $file,
1187 -severity => 'ERROR',
1188 -msg => "Nonempty Z<>"});
1189 }
1190 }
48f30392
GS
1191 elsif($cmd eq 'X') {
1192 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1193 if($idx =~ /^\s*$/s) {
1194 $self->poderror({ -line => $line, -file => $file,
1195 -severity => 'ERROR',
1196 -msg => "Empty X<>"});
1197 }
1198 else {
1199 # remember this node
1200 $self->idx($idx);
1201 }
1202 }
1203 else {
1204 # not reached
1205 die "internal error";
e3237417
GS
1206 }
1207 }
1208 $text;
1209}
1210
e2c3adef 1211# process a block of verbatim text
360aca43 1212sub verbatim {
66aff6dd 1213 ## Nothing particular to check
e2c3adef 1214 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd
GS
1215
1216 $self->_preproc_par($paragraph);
1217
e2c3adef
GS
1218 if($self->{_current_head1} eq 'NAME') {
1219 my ($file, $line) = $pod_para->file_line;
1220 $self->poderror({ -line => $line, -file => $file,
1221 -severity => 'WARNING',
1222 -msg => 'Verbatim paragraph in NAME section' });
1223 }
360aca43
GS
1224}
1225
e2c3adef 1226# process a block of regular text
360aca43
GS
1227sub textblock {
1228 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1229 my ($file, $line) = $pod_para->file_line;
e3237417 1230
66aff6dd
GS
1231 $self->_preproc_par($paragraph);
1232
e2c3adef
GS
1233 # skip this paragraph if in a =begin block
1234 unless($self->{_have_begin}) {
1235 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1236 if($self->{_current_head1} eq 'NAME') {
1237 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1238 # this is the canonical name
1239 $self->{-name} = $1 unless(defined $self->{-name});
1240 }
e3237417
GS
1241 }
1242 }
e3237417
GS
1243}
1244
66aff6dd
GS
1245sub _preproc_par
1246{
1247 my $self = shift;
1248 $_[0] =~ s/[\s\n]+$//;
1249 if($_[0]) {
1250 $self->{_commands_in_head}++;
1251 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1252 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1253 $self->{_list_stack}->[0]->{_has_par} = 1;
1254 }
1255 }
1256}
1257
e3237417 12581;
66aff6dd 1259
48f30392
GS
1260__END__
1261
1262=head1 AUTHOR
1263
1264Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1265Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
1266
1267Based on code for B<Pod::Text::pod2text()> written by
1268Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1269
1270=cut
1271