This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update to PodParser 1.18, from Brad Appleton.
[perl5.git] / lib / Pod / Checker.pm
CommitLineData
360aca43
GS
1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43
GS
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
92e3d63a 13$VERSION = 1.2; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
e2c3adef
GS
16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43
GS
18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43
GS
31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43
GS
36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef
GS
39=head2 podchecker()
40
41This function can take a hash of options:
e3237417
GS
42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
92e3d63a
JH
47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
e3237417
GS
49
50=back
360aca43
GS
51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
e2c3adef 56I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!>
360aca43
GS
57
58It is hoped that curious/ambitious user will help flesh out and add the
e2c3adef
GS
59additional features they wish to see in B<Pod::Checker> and B<podchecker>
60and verify that the checks are consistent with L<perlpod>.
360aca43 61
48f30392 62The following checks are currently preformed:
e3237417
GS
63
64=over 4
65
66=item *
67
48f30392 68Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef
GS
69and unterminated interior sequences.
70
71=item *
72
73Check for proper balancing of C<=begin> and C<=end>. The contents of such
74a block are generally ignored, i.e. no syntax checks are performed.
e3237417
GS
75
76=item *
77
78Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
79
80=item *
81
e2c3adef
GS
82Check for same nested interior-sequences (e.g.
83C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417
GS
84
85=item *
86
e2c3adef 87Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417
GS
88
89=item *
90
e2c3adef
GS
91Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
92for details.
e3237417
GS
93
94=item *
95
e2c3adef
GS
96Check for unresolved document-internal links. This check may also reveal
97misspelled links that seem to be internal links but should be links
98to something else.
e3237417
GS
99
100=back
101
e2c3adef 102=head1 DIAGNOSTICS
e3237417 103
e2c3adef 104=head2 Errors
e3237417
GS
105
106=over 4
107
66aff6dd
GS
108=item * empty =headn
109
110A heading (C<=head1> or C<=head2>) without any text? That ain't no
111heading!
112
e2c3adef 113=item * =over on line I<N> without closing =back
e3237417 114
e2c3adef
GS
115The C<=over> command does not have a corresponding C<=back> before the
116next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 117
e2c3adef 118=item * =item without previous =over
e3237417 119
e2c3adef 120=item * =back without previous =over
e3237417 121
e2c3adef
GS
122An C<=item> or C<=back> command has been found outside a
123C<=over>/C<=back> block.
e3237417 124
e2c3adef 125=item * No argument for =begin
e3237417 126
e2c3adef
GS
127A C<=begin> command was found that is not followed by the formatter
128specification.
e3237417 129
e2c3adef 130=item * =end without =begin
e3237417 131
e2c3adef
GS
132A standalone C<=end> command was found.
133
134=item * Nested =begin's
135
66aff6dd 136There were at least two consecutive C<=begin> commands without
e2c3adef
GS
137the corresponding C<=end>. Only one C<=begin> may be active at
138a time.
139
140=item * =for without formatter specification
e3237417 141
e2c3adef
GS
142There is no specification of the formatter after the C<=for> command.
143
144=item * unresolved internal link I<NAME>
145
146The given link to I<NAME> does not have a matching node in the current
147POD. This also happend when a single word node name is not enclosed in
148C<"">.
149
150=item * Unknown command "I<CMD>"
151
152An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
153C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, C<=for>, C<=pod>,
154C<=cut>
155
156=item * Unknown interior-sequence "I<SEQ>"
157
158An invalid markup command has been encountered. Valid are:
159C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
160C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
161C<ZE<lt>E<gt>>
162
163=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
164
165Two nested identical markup commands have been found. Generally this
166does not make sense.
167
168=item * garbled entity I<STRING>
169
66aff6dd
GS
170The I<STRING> found cannot be interpreted as a character entity.
171
172=item * Entity number out of range
173
174An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef
GS
175
176=item * malformed link LE<lt>E<gt>
177
178The link found cannot be parsed because it does not conform to the
179syntax described in L<perlpod>.
e3237417 180
66aff6dd
GS
181=item * nonempty ZE<lt>E<gt>
182
183The C<ZE<lt>E<gt>> sequence is supposed to be empty.
184
48f30392
GS
185=item * empty XE<lt>E<gt>
186
187The index entry specified contains nothing but whitespace.
188
66aff6dd
GS
189=item * Spurious text after =pod / =cut
190
191The commands C<=pod> and C<=cut> do not take any arguments.
192
193=item * Spurious character(s) after =back
194
195The C<=back> command does not take any arguments.
196
e3237417
GS
197=back
198
e2c3adef 199=head2 Warnings
e3237417 200
e2c3adef
GS
201These may not necessarily cause trouble, but indicate mediocre style.
202
203=over 4
204
051b35bf 205=item * multiple occurence of link target I<name>
66aff6dd
GS
206
207The POD file has some C<=item> and/or C<=head> commands that have
208the same text. Potential hyperlinks to such a text cannot be unique then.
209
210=item * line containing nothing but whitespace in paragraph
211
212There is some whitespace on a seemingly empty line. POD is very sensitive
213to such things, so this is flagged. B<vi> users switch on the B<list>
214option to avoid this problem.
215
92e3d63a
JH
216=begin _disabled_
217
66aff6dd
GS
218=item * file does not start with =head
219
220The file starts with a different POD directive than head.
221This is most probably something you do not want.
222
92e3d63a
JH
223=end _disabled_
224
e2c3adef
GS
225=item * No numeric argument for =over
226
227The C<=over> command is supposed to have a numeric argument (the
228indentation).
229
66aff6dd 230=item * previous =item has no contents
e2c3adef 231
66aff6dd
GS
232There is a list C<=item> right above the flagged line that has no
233text contents. You probably want to delete empty items.
234
235=item * preceding non-item paragraph(s)
236
237A list introduced by C<=over> starts with a text or verbatim paragraph,
238but continues with C<=item>s. Move the non-item paragraph out of the
239C<=over>/C<=back> block.
240
241=item * =item type mismatch (I<one> vs. I<two>)
242
243A list started with e.g. a bulletted C<=item> and continued with a
244numbered one. This is obviously inconsistent. For most translators the
245type of the I<first> C<=item> determines the type of the list.
e2c3adef
GS
246
247=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
248
249Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
250can potentially cause errors as they could be misinterpreted as
92e3d63a
JH
251markup commands. This is only printed when the -warnings level is
252greater than 1.
e2c3adef 253
66aff6dd 254=item * Unknown entity
e2c3adef
GS
255
256A character entity was found that does not belong to the standard
66aff6dd 257ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef
GS
258
259=item * No items in =over
260
66aff6dd 261The list opened with C<=over> does not contain any items.
e2c3adef
GS
262
263=item * No argument for =item
264
265C<=item> without any parameters is deprecated. It should either be followed
266by C<*> to indicate an unordered list, by a number (optionally followed
267by a dot) to indicate an ordered (numbered) list or simple text for a
268definition list.
269
66aff6dd
GS
270=item * empty section in previous paragraph
271
272The previous section (introduced by a C<=head> command) does not contain
273any text. This usually indicates that something is missing. Note: A
274C<=head1> followed immediately by C<=head2> does not trigger this warning.
275
e2c3adef
GS
276=item * Verbatim paragraph in NAME section
277
278The NAME section (C<=head1 NAME>) should consist of a single paragraph
279with the script/module name, followed by a dash `-' and a very short
280description of what the thing is good for.
281
92e3d63a
JH
282=back
283
284=head2 Hyperlinks
285
286There are some warnings wrt. malformed hyperlinks.
287
288=over 4
289
290=item * collapsing newlines to blanks
291
292A hyperlink LE<lt>...E<gt> spans more than one line. This may indicate
293and error.
e2c3adef 294
92e3d63a
JH
295=item * ignoring leading/trailing whitespace in link
296
297There is whitespace at the beginning or the end of the contents of
298LE<lt>...E<gt>.
299
300=item * (section) in '$page' deprecated
301
302There is a section detected in the page name of LE<lt>...E<gt>, e.g.
303C<LE<gt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
304Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
305to expand this to appropriate code. For links to (builtin) functions,
306please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
307
308=item * alternative text/node '%s' contains non-escaped | or /
309
310The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
311Although the hyperlink parser does its best to determine which "/" is
312text and which is a delimiter in case of doubt, one ought to escape
313these literal characters like this:
314
315 / E<sol>
316 | E<verbar>
e2c3adef
GS
317
318=back
e3237417
GS
319
320=head1 RETURN VALUE
321
322B<podchecker> returns the number of POD syntax errors found or -1 if
323there were no POD commands at all found in the file.
324
360aca43
GS
325=head1 EXAMPLES
326
327I<[T.B.D.]>
328
48f30392 329=head1 INTERFACE
360aca43 330
48f30392
GS
331While checking, this module collects document properties, e.g. the nodes
332for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
333POD translators can use this feature to syntax-check and get the nodes in
334a first pass before actually starting to convert. This is expensive in terms
335of execution time, but allows for very robust conversions.
360aca43
GS
336
337=cut
338
339#############################################################################
340
341use strict;
342#use diagnostics;
343use Carp;
344use Exporter;
345use Pod::Parser;
346
347use vars qw(@ISA @EXPORT);
348@ISA = qw(Pod::Parser);
349@EXPORT = qw(&podchecker);
350
351use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
352
353my %VALID_COMMANDS = (
354 'pod' => 1,
355 'cut' => 1,
356 'head1' => 1,
357 'head2' => 1,
358 'over' => 1,
359 'back' => 1,
360 'item' => 1,
361 'for' => 1,
362 'begin' => 1,
363 'end' => 1,
364);
365
366my %VALID_SEQUENCES = (
367 'I' => 1,
368 'B' => 1,
369 'S' => 1,
370 'C' => 1,
371 'L' => 1,
372 'F' => 1,
373 'X' => 1,
374 'Z' => 1,
375 'E' => 1,
376);
377
e2c3adef
GS
378# stolen from HTML::Entities
379my %ENTITIES = (
380 # Some normal chars that have special meaning in SGML context
381 amp => '&', # ampersand
382'gt' => '>', # greater than
383'lt' => '<', # less than
384 quot => '"', # double quote
385
386 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
387 AElig => 'Æ', # capital AE diphthong (ligature)
388 Aacute => 'Á', # capital A, acute accent
389 Acirc => 'Â', # capital A, circumflex accent
390 Agrave => 'À', # capital A, grave accent
391 Aring => 'Å', # capital A, ring
392 Atilde => 'Ã', # capital A, tilde
393 Auml => 'Ä', # capital A, dieresis or umlaut mark
394 Ccedil => 'Ç', # capital C, cedilla
395 ETH => 'Ð', # capital Eth, Icelandic
396 Eacute => 'É', # capital E, acute accent
397 Ecirc => 'Ê', # capital E, circumflex accent
398 Egrave => 'È', # capital E, grave accent
399 Euml => 'Ë', # capital E, dieresis or umlaut mark
400 Iacute => 'Í', # capital I, acute accent
401 Icirc => 'Î', # capital I, circumflex accent
402 Igrave => 'Ì', # capital I, grave accent
403 Iuml => 'Ï', # capital I, dieresis or umlaut mark
404 Ntilde => 'Ñ', # capital N, tilde
405 Oacute => 'Ó', # capital O, acute accent
406 Ocirc => 'Ô', # capital O, circumflex accent
407 Ograve => 'Ò', # capital O, grave accent
408 Oslash => 'Ø', # capital O, slash
409 Otilde => 'Õ', # capital O, tilde
410 Ouml => 'Ö', # capital O, dieresis or umlaut mark
411 THORN => 'Þ', # capital THORN, Icelandic
412 Uacute => 'Ú', # capital U, acute accent
413 Ucirc => 'Û', # capital U, circumflex accent
414 Ugrave => 'Ù', # capital U, grave accent
415 Uuml => 'Ü', # capital U, dieresis or umlaut mark
416 Yacute => 'Ý', # capital Y, acute accent
417 aacute => 'á', # small a, acute accent
418 acirc => 'â', # small a, circumflex accent
419 aelig => 'æ', # small ae diphthong (ligature)
420 agrave => 'à', # small a, grave accent
421 aring => 'å', # small a, ring
422 atilde => 'ã', # small a, tilde
423 auml => 'ä', # small a, dieresis or umlaut mark
424 ccedil => 'ç', # small c, cedilla
425 eacute => 'é', # small e, acute accent
426 ecirc => 'ê', # small e, circumflex accent
427 egrave => 'è', # small e, grave accent
428 eth => 'ð', # small eth, Icelandic
429 euml => 'ë', # small e, dieresis or umlaut mark
430 iacute => 'í', # small i, acute accent
431 icirc => 'î', # small i, circumflex accent
432 igrave => 'ì', # small i, grave accent
433 iuml => 'ï', # small i, dieresis or umlaut mark
434 ntilde => 'ñ', # small n, tilde
435 oacute => 'ó', # small o, acute accent
436 ocirc => 'ô', # small o, circumflex accent
437 ograve => 'ò', # small o, grave accent
438 oslash => 'ø', # small o, slash
439 otilde => 'õ', # small o, tilde
440 ouml => 'ö', # small o, dieresis or umlaut mark
441 szlig => 'ß', # small sharp s, German (sz ligature)
442 thorn => 'þ', # small thorn, Icelandic
443 uacute => 'ú', # small u, acute accent
444 ucirc => 'û', # small u, circumflex accent
445 ugrave => 'ù', # small u, grave accent
446 uuml => 'ü', # small u, dieresis or umlaut mark
447 yacute => 'ý', # small y, acute accent
448 yuml => 'ÿ', # small y, dieresis or umlaut mark
449
450 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
451 copy => '©', # copyright sign
452 reg => '®', # registered sign
453 nbsp => "\240", # non breaking space
454
455 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
456 iexcl => '¡',
457 cent => '¢',
458 pound => '£',
459 curren => '¤',
460 yen => '¥',
461 brvbar => '¦',
462 sect => '§',
463 uml => '¨',
464 ordf => 'ª',
465 laquo => '«',
466'not' => '¬', # not is a keyword in perl
467 shy => '­',
468 macr => '¯',
469 deg => '°',
470 plusmn => '±',
471 sup1 => '¹',
472 sup2 => '²',
473 sup3 => '³',
474 acute => '´',
475 micro => 'µ',
476 para => '¶',
477 middot => '·',
478 cedil => '¸',
479 ordm => 'º',
480 raquo => '»',
481 frac14 => '¼',
482 frac12 => '½',
483 frac34 => '¾',
484 iquest => '¿',
485'times' => '×', # times is a keyword in perl
486 divide => '÷',
66aff6dd
GS
487
488# some POD special entities
489 verbar => '|',
490 sol => '/'
e2c3adef
GS
491);
492
360aca43
GS
493##---------------------------------------------------------------------------
494
495##---------------------------------
496## Function definitions begin here
497##---------------------------------
498
e3237417
GS
499sub podchecker( $ ; $ % ) {
500 my ($infile, $outfile, %options) = @_;
360aca43
GS
501 local $_;
502
503 ## Set defaults
504 $infile ||= \*STDIN;
505 $outfile ||= \*STDERR;
506
507 ## Now create a pod checker
e3237417 508 my $checker = new Pod::Checker(%options);
360aca43
GS
509
510 ## Now check the pod document for errors
511 $checker->parse_from_file($infile, $outfile);
48f30392 512
360aca43
GS
513 ## Return the number of errors found
514 return $checker->num_errors();
515}
516
517##---------------------------------------------------------------------------
518
519##-------------------------------
520## Method definitions begin here
521##-------------------------------
522
92e3d63a
JH
523##################################
524
525=over 4
526
527=item C<Pod::Checker-E<gt>new( %options )>
528
529Return a reference to a new Pod::Checker object that inherits from
530Pod::Parser and is used for calling the required methods later. The
531following options are recognized:
532
533C<-warnings =E<gt> num>
534 Print warnings if C<num> is true. The higher the value of C<num>,
535the more warnings are printed. Currently there are only levels 1 and 2.
536
537C<-quiet =E<gt> num>
538 If C<num> is true, do not print any errors/warnings. This is useful
539when Pod::Checker is used to munge POD code into plain text from within
540POD formatters.
541
542=cut
543
66aff6dd
GS
544## sub new {
545## my $this = shift;
546## my $class = ref($this) || $this;
547## my %params = @_;
548## my $self = {%params};
549## bless $self, $class;
550## $self->initialize();
551## return $self;
552## }
360aca43
GS
553
554sub initialize {
555 my $self = shift;
664bb207
GS
556 ## Initialize number of errors, and setup an error function to
557 ## increment this number and then print to the designated output.
558 $self->{_NUM_ERRORS} = 0;
92e3d63a
JH
559 $self->{-quiet} ||= 0;
560 # set the error handling subroutine
561 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417
GS
562 $self->{_commands} = 0; # total number of POD commands encountered
563 $self->{_list_stack} = []; # stack for nested lists
564 $self->{_have_begin} = ''; # stores =begin
565 $self->{_links} = []; # stack for internal hyperlinks
566 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 567 $self->{_index} = []; # text in X<>
e2c3adef 568 # print warnings?
e3237417 569 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 570 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 571 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207
GS
572}
573
48f30392
GS
574##################################
575
48f30392
GS
576=item C<$checker-E<gt>poderror( @args )>
577
578=item C<$checker-E<gt>poderror( {%opts}, @args )>
579
580Internal method for printing errors and warnings. If no options are
581given, simply prints "@_". The following options are recognized and used
582to form the output:
583
584 -msg
585
586A message to print prior to C<@args>.
587
588 -line
589
590The line number the error occurred in.
591
592 -file
593
594The file (name) the error occurred in.
595
596 -severity
597
598The error level, should be 'WARNING' or 'ERROR'.
599
600=cut
601
e2c3adef 602# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207
GS
603sub poderror {
604 my $self = shift;
605 my %opts = (ref $_[0]) ? %{shift()} : ();
606
607 ## Retrieve options
608 chomp( my $msg = ($opts{-msg} || "")."@_" );
609 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
610 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd
GS
611 unless (exists $opts{-severity}) {
612 ## See if can find severity in message prefix
613 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
614 }
664bb207
GS
615 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
616
e3237417
GS
617 ## Increment error count and print message "
618 ++($self->{_NUM_ERRORS})
619 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
92e3d63a 620 my $out_fh = $self->output_handle() || \*STDERR;
e2c3adef
GS
621 print $out_fh ($severity, $msg, $line, $file, "\n")
622 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43
GS
623}
624
48f30392
GS
625##################################
626
627=item C<$checker-E<gt>num_errors()>
628
629Set (if argument specified) and retrieve the number of errors found.
630
631=cut
632
360aca43
GS
633sub num_errors {
634 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
635}
636
48f30392
GS
637##################################
638
639=item C<$checker-E<gt>name()>
640
641Set (if argument specified) and retrieve the canonical name of POD as
642found in the C<=head1 NAME> section.
643
644=cut
645
e2c3adef
GS
646sub name {
647 return (@_ > 1 && $_[1]) ?
648 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
649}
650
48f30392
GS
651##################################
652
653=item C<$checker-E<gt>node()>
654
655Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
656and C<=item>) of the current POD. The nodes are returned in the order of
657their occurence. They consist of plain text, each piece of whitespace is
658collapsed to a single blank.
659
660=cut
661
e2c3adef
GS
662sub node {
663 my ($self,$text) = @_;
664 if(defined $text) {
66aff6dd
GS
665 $text =~ s/\s+$//s; # strip trailing whitespace
666 $text =~ s/\s+/ /gs; # collapse whitespace
667 # add node, order important!
e2c3adef 668 push(@{$self->{_nodes}}, $text);
66aff6dd 669 # keep also a uniqueness counter
48f30392 670 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef
GS
671 return $text;
672 }
673 @{$self->{_nodes}};
674}
675
48f30392
GS
676##################################
677
678=item C<$checker-E<gt>idx()>
679
680Add (if argument specified) and retrieve the index entries (as defined by
681C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
682of whitespace is collapsed to a single blank.
683
684=cut
685
686# set/return index entries of current POD
687sub idx {
688 my ($self,$text) = @_;
689 if(defined $text) {
690 $text =~ s/\s+$//s; # strip trailing whitespace
691 $text =~ s/\s+/ /gs; # collapse whitespace
692 # add node, order important!
693 push(@{$self->{_index}}, $text);
694 # keep also a uniqueness counter
695 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
696 return $text;
697 }
698 @{$self->{_index}};
699}
700
701##################################
702
703=item C<$checker-E<gt>hyperlink()>
704
705Add (if argument specified) and retrieve the hyperlinks (as defined by
706C<LE<lt>E<gt>>) of the current POD. They consist of an 2-item array: line
707number and C<Pod::Hyperlink> object.
708
709=back
710
711=cut
712
e2c3adef
GS
713# set/return hyperlinks of the current POD
714sub hyperlink {
715 my $self = shift;
716 if($_[0]) {
717 push(@{$self->{_links}}, $_[0]);
718 return $_[0];
719 }
720 @{$self->{_links}};
721}
722
e3237417
GS
723## overrides for Pod::Parser
724
360aca43 725sub end_pod {
66aff6dd
GS
726 ## Do some final checks and
727 ## print the number of errors found
728 my $self = shift;
729 my $infile = $self->input_file();
730 my $out_fh = $self->output_handle();
731
732 if(@{$self->{_list_stack}}) {
733 # _TODO_ display, but don't count them for now
734 my $list;
735 while(($list = $self->_close_list('EOF',$infile)) &&
736 $list->indent() ne 'auto') {
737 $self->poderror({ -line => 'EOF', -file => $infile,
738 -severity => 'ERROR', -msg => "=over on line " .
739 $list->start() . " without closing =back" }); #"
740 }
741 }
742
743 # check validity of document internal hyperlinks
744 # first build the node names from the paragraph text
745 my %nodes;
746 foreach($self->node()) {
747 $nodes{$_} = 1;
92e3d63a 748 if(/^(\S+)\s+\S/) {
66aff6dd
GS
749 # we have more than one word. Use the first as a node, too.
750 # This is used heavily in perlfunc.pod
751 $nodes{$1} ||= 2; # derived node
752 }
753 }
92e3d63a
JH
754 foreach($self->idx()) {
755 $nodes{$_} = 3; # index node
756 }
66aff6dd 757 foreach($self->hyperlink()) {
48f30392
GS
758 my ($line,$link) = @$_;
759 # _TODO_ what if there is a link to the page itself by the name,
760 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
761 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
762 my $node = $self->_check_ptree($self->parse_text($link->node(),
763 $line), $line, $infile, 'L');
764 if($node && !$nodes{$node}) {
765 $self->poderror({ -line => $line || '', -file => $infile,
766 -severity => 'ERROR',
767 -msg => "unresolved internal link '$node'"});
768 }
66aff6dd
GS
769 }
770 }
48f30392
GS
771
772 # check the internal nodes for uniqueness. This pertains to
773 # =headX, =item and X<...>
66aff6dd
GS
774 foreach(grep($self->{_unique_nodes}->{$_} > 1,
775 keys %{$self->{_unique_nodes}})) {
776 $self->poderror({ -line => '-', -file => $infile,
777 -severity => 'WARNING',
778 -msg => "multiple occurence of link target '$_'"});
779 }
780
781 ## Print the number of errors found
782 my $num_errors = $self->num_errors();
783 if ($num_errors > 0) {
784 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
360aca43 785 ($num_errors == 1) ? "error" : "errors");
66aff6dd
GS
786 }
787 elsif($self->{_commands} == 0) {
788 print $out_fh "$infile does not contain any pod commands.\n";
789 $self->num_errors(-1);
790 }
791 else {
792 print $out_fh "$infile pod syntax OK.\n";
793 }
360aca43
GS
794}
795
e2c3adef 796# check a POD command directive
360aca43 797sub command {
664bb207 798 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 799 my ($file, $line) = $pod_para->file_line;
360aca43 800 ## Check the command syntax
e3237417 801 my $arg; # this will hold the command argument
664bb207
GS
802 if (! $VALID_COMMANDS{$cmd}) {
803 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 804 -msg => "Unknown command '$cmd'" });
360aca43 805 }
92e3d63a
JH
806 else { # found a valid command
807 $self->{_commands}++; # delete this line if below is enabled again
808
809 ##### following check disabled due to strong request
810 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
811 # $self->poderror({ -line => $line, -file => $file,
812 # -severity => 'WARNING',
813 # -msg => "file does not start with =head" });
814 #}
815
816 # check syntax of particular command
e3237417 817 if($cmd eq 'over') {
e2c3adef
GS
818 # check for argument
819 $arg = $self->interpolate_and_check($paragraph, $line,$file);
820 my $indent = 4; # default
821 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
822 $indent = $1;
823 } else {
824 $self->poderror({ -line => $line, -file => $file,
825 -severity => 'WARNING',
826 -msg => "No numeric argument for =over"});
827 }
e3237417 828 # start a new list
66aff6dd 829 $self->_open_list($indent,$line,$file);
e3237417
GS
830 }
831 elsif($cmd eq 'item') {
e2c3adef 832 # are we in a list?
e3237417
GS
833 unless(@{$self->{_list_stack}}) {
834 $self->poderror({ -line => $line, -file => $file,
835 -severity => 'ERROR',
836 -msg => "=item without previous =over" });
e2c3adef 837 # auto-open in case we encounter many more
66aff6dd
GS
838 $self->_open_list('auto',$line,$file);
839 }
840 my $list = $self->{_list_stack}->[0];
841 # check whether the previous item had some contents
842 if(defined $self->{_list_item_contents} &&
843 $self->{_list_item_contents} == 0) {
844 $self->poderror({ -line => $line, -file => $file,
845 -severity => 'WARNING',
846 -msg => "previous =item has no contents" });
847 }
848 if($list->{_has_par}) {
849 $self->poderror({ -line => $line, -file => $file,
850 -severity => 'WARNING',
851 -msg => "preceding non-item paragraph(s)" });
852 delete $list->{_has_par};
e3237417 853 }
e2c3adef
GS
854 # check for argument
855 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd
GS
856 if($arg && $arg =~ /(\S+)/) {
857 $arg =~ s/[\s\n]+$//;
858 my $type;
859 if($arg =~ /^[*]\s*(\S*.*)/) {
860 $type = 'bullet';
861 $self->{_list_item_contents} = $1 ? 1 : 0;
862 $arg = $1;
863 }
864 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
865 $type = 'number';
866 $self->{_list_item_contents} = $1 ? 1 : 0;
867 $arg = $1;
868 }
869 else {
870 $type = 'definition';
871 $self->{_list_item_contents} = 1;
872 }
873 my $first = $list->type();
874 if($first && $first ne $type) {
875 $self->poderror({ -line => $line, -file => $file,
876 -severity => 'WARNING',
877 -msg => "=item type mismatch ('$first' vs. '$type')"});
878 }
879 else { # first item
880 $list->type($type);
881 }
882 }
883 else {
e2c3adef
GS
884 $self->poderror({ -line => $line, -file => $file,
885 -severity => 'WARNING',
886 -msg => "No argument for =item" });
887 $arg = ' '; # empty
66aff6dd 888 $self->{_list_item_contents} = 0;
e3237417 889 }
e2c3adef 890 # add this item
66aff6dd 891 $list->item($arg);
e2c3adef
GS
892 # remember this node
893 $self->node($arg);
e3237417
GS
894 }
895 elsif($cmd eq 'back') {
896 # check if we have an open list
897 unless(@{$self->{_list_stack}}) {
898 $self->poderror({ -line => $line, -file => $file,
899 -severity => 'ERROR',
900 -msg => "=back without previous =over" });
901 }
902 else {
903 # check for spurious characters
e2c3adef 904 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
905 if($arg && $arg =~ /\S/) {
906 $self->poderror({ -line => $line, -file => $file,
66aff6dd 907 -severity => 'ERROR',
e3237417
GS
908 -msg => "Spurious character(s) after =back" });
909 }
910 # close list
66aff6dd 911 my $list = $self->_close_list($line,$file);
e3237417
GS
912 # check for empty lists
913 if(!$list->item() && $self->{-warnings}) {
914 $self->poderror({ -line => $line, -file => $file,
915 -severity => 'WARNING',
916 -msg => "No items in =over (at line " .
917 $list->start() . ") / =back list"}); #"
918 }
919 }
920 }
66aff6dd 921 elsif($cmd =~ /^head(\d+)/) {
48f30392 922 # check whether the previous =head section had some contents
66aff6dd
GS
923 if(defined $self->{_commands_in_head} &&
924 $self->{_commands_in_head} == 0 &&
925 defined $self->{_last_head} &&
926 $self->{_last_head} >= $1) {
927 $self->poderror({ -line => $line, -file => $file,
928 -severity => 'WARNING',
929 -msg => "empty section in previous paragraph"});
930 }
931 $self->{_commands_in_head} = -1;
932 $self->{_last_head} = $1;
e3237417
GS
933 # check if there is an open list
934 if(@{$self->{_list_stack}}) {
935 my $list;
66aff6dd
GS
936 while(($list = $self->_close_list($line,$file)) &&
937 $list->indent() ne 'auto') {
e3237417
GS
938 $self->poderror({ -line => $line, -file => $file,
939 -severity => 'ERROR',
e2c3adef
GS
940 -msg => "=over on line ". $list->start() .
941 " without closing =back (at $cmd)" });
e3237417
GS
942 }
943 }
944 # remember this node
e2c3adef 945 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd
GS
946 $arg =~ s/[\s\n]+$//s;
947 $self->node($arg);
948 unless(length($arg)) {
949 $self->poderror({ -line => $line, -file => $file,
950 -severity => 'ERROR',
951 -msg => "empty =$cmd"});
952 }
e2c3adef 953 if($cmd eq 'head1') {
e2c3adef
GS
954 $self->{_current_head1} = $arg;
955 } else {
956 $self->{_current_head1} = '';
957 }
e3237417
GS
958 }
959 elsif($cmd eq 'begin') {
960 if($self->{_have_begin}) {
961 # already have a begin
962 $self->poderror({ -line => $line, -file => $file,
963 -severity => 'ERROR',
964 -msg => "Nested =begin's (first at line " .
965 $self->{_have_begin} . ")"});
966 }
967 else {
968 # check for argument
e2c3adef 969 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
970 unless($arg && $arg =~ /(\S+)/) {
971 $self->poderror({ -line => $line, -file => $file,
e2c3adef 972 -severity => 'ERROR',
e3237417
GS
973 -msg => "No argument for =begin"});
974 }
975 # remember the =begin
976 $self->{_have_begin} = "$line:$1";
977 }
978 }
979 elsif($cmd eq 'end') {
980 if($self->{_have_begin}) {
981 # close the existing =begin
982 $self->{_have_begin} = '';
983 # check for spurious characters
e2c3adef
GS
984 $arg = $self->interpolate_and_check($paragraph, $line,$file);
985 # the closing argument is optional
986 #if($arg && $arg =~ /\S/) {
987 # $self->poderror({ -line => $line, -file => $file,
988 # -severity => 'WARNING',
989 # -msg => "Spurious character(s) after =end" });
990 #}
e3237417
GS
991 }
992 else {
993 # don't have a matching =begin
994 $self->poderror({ -line => $line, -file => $file,
e2c3adef 995 -severity => 'ERROR',
e3237417
GS
996 -msg => "=end without =begin" });
997 }
998 }
e2c3adef
GS
999 elsif($cmd eq 'for') {
1000 unless($paragraph =~ /\s*(\S+)\s*/) {
1001 $self->poderror({ -line => $line, -file => $file,
1002 -severity => 'ERROR',
1003 -msg => "=for without formatter specification" });
1004 }
1005 $arg = ''; # do not expand paragraph below
1006 }
66aff6dd
GS
1007 elsif($cmd =~ /^(pod|cut)$/) {
1008 # check for argument
1009 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1010 if($arg && $arg =~ /(\S+)/) {
1011 $self->poderror({ -line => $line, -file => $file,
1012 -severity => 'ERROR',
1013 -msg => "Spurious text after =$cmd"});
1014 }
1015 }
1016 $self->{_commands_in_head}++;
e3237417 1017 ## Check the interior sequences in the command-text
e2c3adef 1018 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1019 unless(defined $arg);
e2c3adef 1020 }
360aca43
GS
1021}
1022
66aff6dd
GS
1023sub _open_list
1024{
1025 my ($self,$indent,$line,$file) = @_;
1026 my $list = Pod::List->new(
1027 -indent => $indent,
1028 -start => $line,
1029 -file => $file);
1030 unshift(@{$self->{_list_stack}}, $list);
1031 undef $self->{_list_item_contents};
1032 $list;
1033}
1034
1035sub _close_list
1036{
1037 my ($self,$line,$file) = @_;
1038 my $list = shift(@{$self->{_list_stack}});
1039 if(defined $self->{_list_item_contents} &&
1040 $self->{_list_item_contents} == 0) {
1041 $self->poderror({ -line => $line, -file => $file,
1042 -severity => 'WARNING',
1043 -msg => "previous =item has no contents" });
1044 }
1045 undef $self->{_list_item_contents};
1046 $list;
1047}
1048
e2c3adef
GS
1049# process a block of some text
1050sub interpolate_and_check {
e3237417
GS
1051 my ($self, $paragraph, $line, $file) = @_;
1052 ## Check the interior sequences in the command-text
1053 # and return the text
1054 $self->_check_ptree(
1055 $self->parse_text($paragraph,$line), $line, $file, '');
1056}
1057
1058sub _check_ptree {
1059 my ($self,$ptree,$line,$file,$nestlist) = @_;
1060 local($_);
1061 my $text = '';
1062 # process each node in the parse tree
1063 foreach(@$ptree) {
1064 # regular text chunk
1065 unless(ref) {
1066 my $count;
1067 # count the unescaped angle brackets
92e3d63a 1068 # complain only when warning level is greater than 1
e3237417 1069 my $i = $_;
e2c3adef 1070 if($count = $i =~ tr/<>/<>/) {
e3237417
GS
1071 $self->poderror({ -line => $line, -file => $file,
1072 -severity => 'WARNING',
e2c3adef 1073 -msg => "$count unescaped <> in paragraph" })
92e3d63a 1074 if($self->{-warnings} && $self->{-warnings}>1);
e3237417
GS
1075 }
1076 $text .= $i;
1077 next;
1078 }
1079 # have an interior sequence
1080 my $cmd = $_->cmd_name();
1081 my $contents = $_->parse_tree();
1082 ($file,$line) = $_->file_line();
1083 # check for valid tag
1084 if (! $VALID_SEQUENCES{$cmd}) {
1085 $self->poderror({ -line => $line, -file => $file,
1086 -severity => 'ERROR',
66aff6dd 1087 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417
GS
1088 # expand it anyway
1089 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1090 next;
1091 }
1092 if($nestlist =~ /$cmd/) {
1093 $self->poderror({ -line => $line, -file => $file,
1094 -severity => 'ERROR',
1095 -msg => "nested commands $cmd<...$cmd<...>...>"});
1096 # _TODO_ should we add the contents anyway?
1097 # expand it anyway, see below
1098 }
1099 if($cmd eq 'E') {
1100 # preserve entities
1101 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1102 $self->poderror({ -line => $line, -file => $file,
1103 -severity => 'ERROR',
1104 -msg => "garbled entity " . $_->raw_text()});
1105 next;
1106 }
e2c3adef 1107 my $ent = $$contents[0];
66aff6dd
GS
1108 my $val;
1109 if($ent =~ /^0x[0-9a-f]+$/i) {
1110 # hexadec entity
1111 $val = hex($ent);
1112 }
1113 elsif($ent =~ /^0\d+$/) {
1114 # octal
1115 $val = oct($ent);
1116 }
1117 elsif($ent =~ /^\d+$/) {
e2c3adef 1118 # numeric entity
66aff6dd
GS
1119 $val = $ent;
1120 }
1121 if(defined $val) {
1122 if($val>0 && $val<256) {
1123 $text .= chr($val);
1124 }
1125 else {
1126 $self->poderror({ -line => $line, -file => $file,
1127 -severity => 'ERROR',
1128 -msg => "Entity number out of range " . $_->raw_text()});
1129 }
e2c3adef
GS
1130 }
1131 elsif($ENTITIES{$ent}) {
1132 # known ISO entity
1133 $text .= $ENTITIES{$ent};
1134 }
1135 else {
1136 $self->poderror({ -line => $line, -file => $file,
1137 -severity => 'WARNING',
66aff6dd 1138 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef
GS
1139 $text .= "E<$ent>";
1140 }
e3237417
GS
1141 }
1142 elsif($cmd eq 'L') {
1143 # try to parse the hyperlink
1144 my $link = Pod::Hyperlink->new($contents->raw_text());
1145 unless(defined $link) {
1146 $self->poderror({ -line => $line, -file => $file,
1147 -severity => 'ERROR',
e2c3adef 1148 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417
GS
1149 next;
1150 }
1151 $link->line($line); # remember line
1152 if($self->{-warnings}) {
1153 foreach my $w ($link->warning()) {
1154 $self->poderror({ -line => $line, -file => $file,
1155 -severity => 'WARNING',
1156 -msg => $w });
1157 }
1158 }
1159 # check the link text
1160 $text .= $self->_check_ptree($self->parse_text($link->text(),
1161 $line), $line, $file, "$nestlist$cmd");
48f30392
GS
1162 # remember link
1163 $self->hyperlink([$line,$link]);
e3237417
GS
1164 }
1165 elsif($cmd =~ /[BCFIS]/) {
1166 # add the guts
1167 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1168 }
66aff6dd
GS
1169 elsif($cmd eq 'Z') {
1170 if(length($contents->raw_text())) {
1171 $self->poderror({ -line => $line, -file => $file,
1172 -severity => 'ERROR',
1173 -msg => "Nonempty Z<>"});
1174 }
1175 }
48f30392
GS
1176 elsif($cmd eq 'X') {
1177 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1178 if($idx =~ /^\s*$/s) {
1179 $self->poderror({ -line => $line, -file => $file,
1180 -severity => 'ERROR',
1181 -msg => "Empty X<>"});
1182 }
1183 else {
1184 # remember this node
1185 $self->idx($idx);
1186 }
1187 }
1188 else {
1189 # not reached
1190 die "internal error";
e3237417
GS
1191 }
1192 }
1193 $text;
1194}
1195
e2c3adef 1196# process a block of verbatim text
360aca43 1197sub verbatim {
66aff6dd 1198 ## Nothing particular to check
e2c3adef 1199 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd
GS
1200
1201 $self->_preproc_par($paragraph);
1202
e2c3adef
GS
1203 if($self->{_current_head1} eq 'NAME') {
1204 my ($file, $line) = $pod_para->file_line;
1205 $self->poderror({ -line => $line, -file => $file,
1206 -severity => 'WARNING',
1207 -msg => 'Verbatim paragraph in NAME section' });
1208 }
360aca43
GS
1209}
1210
e2c3adef 1211# process a block of regular text
360aca43
GS
1212sub textblock {
1213 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1214 my ($file, $line) = $pod_para->file_line;
e3237417 1215
66aff6dd
GS
1216 $self->_preproc_par($paragraph);
1217
e2c3adef
GS
1218 # skip this paragraph if in a =begin block
1219 unless($self->{_have_begin}) {
1220 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1221 if($self->{_current_head1} eq 'NAME') {
1222 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1223 # this is the canonical name
1224 $self->{-name} = $1 unless(defined $self->{-name});
1225 }
e3237417
GS
1226 }
1227 }
e3237417
GS
1228}
1229
66aff6dd
GS
1230sub _preproc_par
1231{
1232 my $self = shift;
1233 $_[0] =~ s/[\s\n]+$//;
1234 if($_[0]) {
1235 $self->{_commands_in_head}++;
1236 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1237 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1238 $self->{_list_stack}->[0]->{_has_par} = 1;
1239 }
1240 }
1241}
1242
e3237417 12431;
66aff6dd 1244
48f30392
GS
1245__END__
1246
1247=head1 AUTHOR
1248
1249Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1250Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
1251
1252Based on code for B<Pod::Text::pod2text()> written by
1253Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1254
1255=cut
1256