This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
More leniency to the \p and \P: now can have whitespace
[perl5.git] / lib / Pod / Checker.pm
CommitLineData
360aca43
GS
1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43
GS
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
88d01e8d 13$VERSION = 1.3; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
e2c3adef
GS
16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43
GS
18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43
GS
31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43
GS
36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef
GS
39=head2 podchecker()
40
41This function can take a hash of options:
e3237417
GS
42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
92e3d63a
JH
47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
e3237417
GS
49
50=back
360aca43
GS
51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
e2c3adef 56I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!>
360aca43
GS
57
58It is hoped that curious/ambitious user will help flesh out and add the
e2c3adef
GS
59additional features they wish to see in B<Pod::Checker> and B<podchecker>
60and verify that the checks are consistent with L<perlpod>.
360aca43 61
48f30392 62The following checks are currently preformed:
e3237417
GS
63
64=over 4
65
66=item *
67
48f30392 68Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef
GS
69and unterminated interior sequences.
70
71=item *
72
73Check for proper balancing of C<=begin> and C<=end>. The contents of such
74a block are generally ignored, i.e. no syntax checks are performed.
e3237417
GS
75
76=item *
77
78Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
79
80=item *
81
e2c3adef
GS
82Check for same nested interior-sequences (e.g.
83C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417
GS
84
85=item *
86
e2c3adef 87Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417
GS
88
89=item *
90
e2c3adef
GS
91Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
92for details.
e3237417
GS
93
94=item *
95
e2c3adef
GS
96Check for unresolved document-internal links. This check may also reveal
97misspelled links that seem to be internal links but should be links
98to something else.
e3237417
GS
99
100=back
101
e2c3adef 102=head1 DIAGNOSTICS
e3237417 103
e2c3adef 104=head2 Errors
e3237417
GS
105
106=over 4
107
66aff6dd
GS
108=item * empty =headn
109
110A heading (C<=head1> or C<=head2>) without any text? That ain't no
111heading!
112
e2c3adef 113=item * =over on line I<N> without closing =back
e3237417 114
e2c3adef
GS
115The C<=over> command does not have a corresponding C<=back> before the
116next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 117
e2c3adef 118=item * =item without previous =over
e3237417 119
e2c3adef 120=item * =back without previous =over
e3237417 121
e2c3adef
GS
122An C<=item> or C<=back> command has been found outside a
123C<=over>/C<=back> block.
e3237417 124
e2c3adef 125=item * No argument for =begin
e3237417 126
e2c3adef
GS
127A C<=begin> command was found that is not followed by the formatter
128specification.
e3237417 129
e2c3adef 130=item * =end without =begin
e3237417 131
e2c3adef
GS
132A standalone C<=end> command was found.
133
134=item * Nested =begin's
135
66aff6dd 136There were at least two consecutive C<=begin> commands without
e2c3adef
GS
137the corresponding C<=end>. Only one C<=begin> may be active at
138a time.
139
140=item * =for without formatter specification
e3237417 141
e2c3adef
GS
142There is no specification of the formatter after the C<=for> command.
143
144=item * unresolved internal link I<NAME>
145
146The given link to I<NAME> does not have a matching node in the current
147POD. This also happend when a single word node name is not enclosed in
148C<"">.
149
150=item * Unknown command "I<CMD>"
151
152An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
c6b85e5d
CT
153C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
154C<=for>, C<=pod>, C<=cut>
e2c3adef
GS
155
156=item * Unknown interior-sequence "I<SEQ>"
157
158An invalid markup command has been encountered. Valid are:
159C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
160C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
161C<ZE<lt>E<gt>>
162
163=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
164
165Two nested identical markup commands have been found. Generally this
166does not make sense.
167
168=item * garbled entity I<STRING>
169
66aff6dd
GS
170The I<STRING> found cannot be interpreted as a character entity.
171
172=item * Entity number out of range
173
174An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef
GS
175
176=item * malformed link LE<lt>E<gt>
177
178The link found cannot be parsed because it does not conform to the
179syntax described in L<perlpod>.
e3237417 180
66aff6dd
GS
181=item * nonempty ZE<lt>E<gt>
182
183The C<ZE<lt>E<gt>> sequence is supposed to be empty.
184
48f30392
GS
185=item * empty XE<lt>E<gt>
186
187The index entry specified contains nothing but whitespace.
188
66aff6dd
GS
189=item * Spurious text after =pod / =cut
190
191The commands C<=pod> and C<=cut> do not take any arguments.
192
193=item * Spurious character(s) after =back
194
195The C<=back> command does not take any arguments.
196
e3237417
GS
197=back
198
e2c3adef 199=head2 Warnings
e3237417 200
e2c3adef
GS
201These may not necessarily cause trouble, but indicate mediocre style.
202
203=over 4
204
051b35bf 205=item * multiple occurence of link target I<name>
66aff6dd
GS
206
207The POD file has some C<=item> and/or C<=head> commands that have
208the same text. Potential hyperlinks to such a text cannot be unique then.
209
210=item * line containing nothing but whitespace in paragraph
211
212There is some whitespace on a seemingly empty line. POD is very sensitive
213to such things, so this is flagged. B<vi> users switch on the B<list>
214option to avoid this problem.
215
92e3d63a
JH
216=begin _disabled_
217
66aff6dd
GS
218=item * file does not start with =head
219
220The file starts with a different POD directive than head.
221This is most probably something you do not want.
222
92e3d63a
JH
223=end _disabled_
224
66aff6dd 225=item * previous =item has no contents
e2c3adef 226
66aff6dd
GS
227There is a list C<=item> right above the flagged line that has no
228text contents. You probably want to delete empty items.
229
230=item * preceding non-item paragraph(s)
231
232A list introduced by C<=over> starts with a text or verbatim paragraph,
233but continues with C<=item>s. Move the non-item paragraph out of the
234C<=over>/C<=back> block.
235
236=item * =item type mismatch (I<one> vs. I<two>)
237
238A list started with e.g. a bulletted C<=item> and continued with a
239numbered one. This is obviously inconsistent. For most translators the
240type of the I<first> C<=item> determines the type of the list.
e2c3adef
GS
241
242=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
243
244Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
245can potentially cause errors as they could be misinterpreted as
92e3d63a
JH
246markup commands. This is only printed when the -warnings level is
247greater than 1.
e2c3adef 248
66aff6dd 249=item * Unknown entity
e2c3adef
GS
250
251A character entity was found that does not belong to the standard
66aff6dd 252ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef
GS
253
254=item * No items in =over
255
66aff6dd 256The list opened with C<=over> does not contain any items.
e2c3adef
GS
257
258=item * No argument for =item
259
260C<=item> without any parameters is deprecated. It should either be followed
261by C<*> to indicate an unordered list, by a number (optionally followed
262by a dot) to indicate an ordered (numbered) list or simple text for a
263definition list.
264
66aff6dd
GS
265=item * empty section in previous paragraph
266
267The previous section (introduced by a C<=head> command) does not contain
268any text. This usually indicates that something is missing. Note: A
269C<=head1> followed immediately by C<=head2> does not trigger this warning.
270
e2c3adef
GS
271=item * Verbatim paragraph in NAME section
272
273The NAME section (C<=head1 NAME>) should consist of a single paragraph
274with the script/module name, followed by a dash `-' and a very short
275description of what the thing is good for.
276
92e3d63a
JH
277=back
278
279=head2 Hyperlinks
280
281There are some warnings wrt. malformed hyperlinks.
282
283=over 4
284
92e3d63a
JH
285=item * ignoring leading/trailing whitespace in link
286
287There is whitespace at the beginning or the end of the contents of
288LE<lt>...E<gt>.
289
290=item * (section) in '$page' deprecated
291
292There is a section detected in the page name of LE<lt>...E<gt>, e.g.
293C<LE<gt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
294Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
295to expand this to appropriate code. For links to (builtin) functions,
296please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
297
298=item * alternative text/node '%s' contains non-escaped | or /
299
300The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
301Although the hyperlink parser does its best to determine which "/" is
302text and which is a delimiter in case of doubt, one ought to escape
303these literal characters like this:
304
305 / E<sol>
306 | E<verbar>
e2c3adef
GS
307
308=back
e3237417
GS
309
310=head1 RETURN VALUE
311
312B<podchecker> returns the number of POD syntax errors found or -1 if
313there were no POD commands at all found in the file.
314
360aca43
GS
315=head1 EXAMPLES
316
317I<[T.B.D.]>
318
48f30392 319=head1 INTERFACE
360aca43 320
48f30392
GS
321While checking, this module collects document properties, e.g. the nodes
322for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
323POD translators can use this feature to syntax-check and get the nodes in
324a first pass before actually starting to convert. This is expensive in terms
325of execution time, but allows for very robust conversions.
360aca43
GS
326
327=cut
328
329#############################################################################
330
331use strict;
332#use diagnostics;
333use Carp;
334use Exporter;
335use Pod::Parser;
336
337use vars qw(@ISA @EXPORT);
338@ISA = qw(Pod::Parser);
339@EXPORT = qw(&podchecker);
340
341use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
342
343my %VALID_COMMANDS = (
344 'pod' => 1,
345 'cut' => 1,
346 'head1' => 1,
347 'head2' => 1,
c6b85e5d
CT
348 'head3' => 1,
349 'head4' => 1,
360aca43
GS
350 'over' => 1,
351 'back' => 1,
352 'item' => 1,
353 'for' => 1,
354 'begin' => 1,
355 'end' => 1,
356);
357
358my %VALID_SEQUENCES = (
359 'I' => 1,
360 'B' => 1,
361 'S' => 1,
362 'C' => 1,
363 'L' => 1,
364 'F' => 1,
365 'X' => 1,
366 'Z' => 1,
367 'E' => 1,
368);
369
e2c3adef
GS
370# stolen from HTML::Entities
371my %ENTITIES = (
372 # Some normal chars that have special meaning in SGML context
373 amp => '&', # ampersand
374'gt' => '>', # greater than
375'lt' => '<', # less than
376 quot => '"', # double quote
377
378 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
379 AElig => 'Æ', # capital AE diphthong (ligature)
380 Aacute => 'Á', # capital A, acute accent
381 Acirc => 'Â', # capital A, circumflex accent
382 Agrave => 'À', # capital A, grave accent
383 Aring => 'Å', # capital A, ring
384 Atilde => 'Ã', # capital A, tilde
385 Auml => 'Ä', # capital A, dieresis or umlaut mark
386 Ccedil => 'Ç', # capital C, cedilla
387 ETH => 'Ð', # capital Eth, Icelandic
388 Eacute => 'É', # capital E, acute accent
389 Ecirc => 'Ê', # capital E, circumflex accent
390 Egrave => 'È', # capital E, grave accent
391 Euml => 'Ë', # capital E, dieresis or umlaut mark
392 Iacute => 'Í', # capital I, acute accent
393 Icirc => 'Î', # capital I, circumflex accent
394 Igrave => 'Ì', # capital I, grave accent
395 Iuml => 'Ï', # capital I, dieresis or umlaut mark
396 Ntilde => 'Ñ', # capital N, tilde
397 Oacute => 'Ó', # capital O, acute accent
398 Ocirc => 'Ô', # capital O, circumflex accent
399 Ograve => 'Ò', # capital O, grave accent
400 Oslash => 'Ø', # capital O, slash
401 Otilde => 'Õ', # capital O, tilde
402 Ouml => 'Ö', # capital O, dieresis or umlaut mark
403 THORN => 'Þ', # capital THORN, Icelandic
404 Uacute => 'Ú', # capital U, acute accent
405 Ucirc => 'Û', # capital U, circumflex accent
406 Ugrave => 'Ù', # capital U, grave accent
407 Uuml => 'Ü', # capital U, dieresis or umlaut mark
408 Yacute => 'Ý', # capital Y, acute accent
409 aacute => 'á', # small a, acute accent
410 acirc => 'â', # small a, circumflex accent
411 aelig => 'æ', # small ae diphthong (ligature)
412 agrave => 'à', # small a, grave accent
413 aring => 'å', # small a, ring
414 atilde => 'ã', # small a, tilde
415 auml => 'ä', # small a, dieresis or umlaut mark
416 ccedil => 'ç', # small c, cedilla
417 eacute => 'é', # small e, acute accent
418 ecirc => 'ê', # small e, circumflex accent
419 egrave => 'è', # small e, grave accent
420 eth => 'ð', # small eth, Icelandic
421 euml => 'ë', # small e, dieresis or umlaut mark
422 iacute => 'í', # small i, acute accent
423 icirc => 'î', # small i, circumflex accent
424 igrave => 'ì', # small i, grave accent
425 iuml => 'ï', # small i, dieresis or umlaut mark
426 ntilde => 'ñ', # small n, tilde
427 oacute => 'ó', # small o, acute accent
428 ocirc => 'ô', # small o, circumflex accent
429 ograve => 'ò', # small o, grave accent
430 oslash => 'ø', # small o, slash
431 otilde => 'õ', # small o, tilde
432 ouml => 'ö', # small o, dieresis or umlaut mark
433 szlig => 'ß', # small sharp s, German (sz ligature)
434 thorn => 'þ', # small thorn, Icelandic
435 uacute => 'ú', # small u, acute accent
436 ucirc => 'û', # small u, circumflex accent
437 ugrave => 'ù', # small u, grave accent
438 uuml => 'ü', # small u, dieresis or umlaut mark
439 yacute => 'ý', # small y, acute accent
440 yuml => 'ÿ', # small y, dieresis or umlaut mark
441
442 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
443 copy => '©', # copyright sign
444 reg => '®', # registered sign
445 nbsp => "\240", # non breaking space
446
447 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
448 iexcl => '¡',
449 cent => '¢',
450 pound => '£',
451 curren => '¤',
452 yen => '¥',
453 brvbar => '¦',
454 sect => '§',
455 uml => '¨',
456 ordf => 'ª',
457 laquo => '«',
458'not' => '¬', # not is a keyword in perl
459 shy => '­',
460 macr => '¯',
461 deg => '°',
462 plusmn => '±',
463 sup1 => '¹',
464 sup2 => '²',
465 sup3 => '³',
466 acute => '´',
467 micro => 'µ',
468 para => '¶',
469 middot => '·',
470 cedil => '¸',
471 ordm => 'º',
472 raquo => '»',
473 frac14 => '¼',
474 frac12 => '½',
475 frac34 => '¾',
476 iquest => '¿',
477'times' => '×', # times is a keyword in perl
478 divide => '÷',
66aff6dd
GS
479
480# some POD special entities
481 verbar => '|',
482 sol => '/'
e2c3adef
GS
483);
484
360aca43
GS
485##---------------------------------------------------------------------------
486
487##---------------------------------
488## Function definitions begin here
489##---------------------------------
490
e3237417
GS
491sub podchecker( $ ; $ % ) {
492 my ($infile, $outfile, %options) = @_;
360aca43
GS
493 local $_;
494
495 ## Set defaults
496 $infile ||= \*STDIN;
497 $outfile ||= \*STDERR;
498
499 ## Now create a pod checker
e3237417 500 my $checker = new Pod::Checker(%options);
360aca43
GS
501
502 ## Now check the pod document for errors
503 $checker->parse_from_file($infile, $outfile);
48f30392 504
360aca43
GS
505 ## Return the number of errors found
506 return $checker->num_errors();
507}
508
509##---------------------------------------------------------------------------
510
511##-------------------------------
512## Method definitions begin here
513##-------------------------------
514
92e3d63a
JH
515##################################
516
517=over 4
518
519=item C<Pod::Checker-E<gt>new( %options )>
520
521Return a reference to a new Pod::Checker object that inherits from
522Pod::Parser and is used for calling the required methods later. The
523following options are recognized:
524
525C<-warnings =E<gt> num>
526 Print warnings if C<num> is true. The higher the value of C<num>,
527the more warnings are printed. Currently there are only levels 1 and 2.
528
529C<-quiet =E<gt> num>
530 If C<num> is true, do not print any errors/warnings. This is useful
531when Pod::Checker is used to munge POD code into plain text from within
532POD formatters.
533
534=cut
535
66aff6dd
GS
536## sub new {
537## my $this = shift;
538## my $class = ref($this) || $this;
539## my %params = @_;
540## my $self = {%params};
541## bless $self, $class;
542## $self->initialize();
543## return $self;
544## }
360aca43
GS
545
546sub initialize {
547 my $self = shift;
664bb207
GS
548 ## Initialize number of errors, and setup an error function to
549 ## increment this number and then print to the designated output.
550 $self->{_NUM_ERRORS} = 0;
92e3d63a
JH
551 $self->{-quiet} ||= 0;
552 # set the error handling subroutine
553 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417
GS
554 $self->{_commands} = 0; # total number of POD commands encountered
555 $self->{_list_stack} = []; # stack for nested lists
556 $self->{_have_begin} = ''; # stores =begin
557 $self->{_links} = []; # stack for internal hyperlinks
558 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 559 $self->{_index} = []; # text in X<>
e2c3adef 560 # print warnings?
e3237417 561 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 562 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 563 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207
GS
564}
565
48f30392
GS
566##################################
567
48f30392
GS
568=item C<$checker-E<gt>poderror( @args )>
569
570=item C<$checker-E<gt>poderror( {%opts}, @args )>
571
572Internal method for printing errors and warnings. If no options are
573given, simply prints "@_". The following options are recognized and used
574to form the output:
575
576 -msg
577
578A message to print prior to C<@args>.
579
580 -line
581
582The line number the error occurred in.
583
584 -file
585
586The file (name) the error occurred in.
587
588 -severity
589
590The error level, should be 'WARNING' or 'ERROR'.
591
592=cut
593
e2c3adef 594# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207
GS
595sub poderror {
596 my $self = shift;
597 my %opts = (ref $_[0]) ? %{shift()} : ();
598
599 ## Retrieve options
600 chomp( my $msg = ($opts{-msg} || "")."@_" );
601 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
602 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd
GS
603 unless (exists $opts{-severity}) {
604 ## See if can find severity in message prefix
605 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
606 }
664bb207
GS
607 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
608
e3237417
GS
609 ## Increment error count and print message "
610 ++($self->{_NUM_ERRORS})
611 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
92e3d63a 612 my $out_fh = $self->output_handle() || \*STDERR;
e2c3adef
GS
613 print $out_fh ($severity, $msg, $line, $file, "\n")
614 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43
GS
615}
616
48f30392
GS
617##################################
618
619=item C<$checker-E<gt>num_errors()>
620
621Set (if argument specified) and retrieve the number of errors found.
622
623=cut
624
360aca43
GS
625sub num_errors {
626 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
627}
628
48f30392
GS
629##################################
630
631=item C<$checker-E<gt>name()>
632
633Set (if argument specified) and retrieve the canonical name of POD as
634found in the C<=head1 NAME> section.
635
636=cut
637
e2c3adef
GS
638sub name {
639 return (@_ > 1 && $_[1]) ?
640 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
641}
642
48f30392
GS
643##################################
644
645=item C<$checker-E<gt>node()>
646
647Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
648and C<=item>) of the current POD. The nodes are returned in the order of
649their occurence. They consist of plain text, each piece of whitespace is
650collapsed to a single blank.
651
652=cut
653
e2c3adef
GS
654sub node {
655 my ($self,$text) = @_;
656 if(defined $text) {
66aff6dd
GS
657 $text =~ s/\s+$//s; # strip trailing whitespace
658 $text =~ s/\s+/ /gs; # collapse whitespace
659 # add node, order important!
e2c3adef 660 push(@{$self->{_nodes}}, $text);
66aff6dd 661 # keep also a uniqueness counter
48f30392 662 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef
GS
663 return $text;
664 }
665 @{$self->{_nodes}};
666}
667
48f30392
GS
668##################################
669
670=item C<$checker-E<gt>idx()>
671
672Add (if argument specified) and retrieve the index entries (as defined by
673C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
674of whitespace is collapsed to a single blank.
675
676=cut
677
678# set/return index entries of current POD
679sub idx {
680 my ($self,$text) = @_;
681 if(defined $text) {
682 $text =~ s/\s+$//s; # strip trailing whitespace
683 $text =~ s/\s+/ /gs; # collapse whitespace
684 # add node, order important!
685 push(@{$self->{_index}}, $text);
686 # keep also a uniqueness counter
687 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
688 return $text;
689 }
690 @{$self->{_index}};
691}
692
693##################################
694
695=item C<$checker-E<gt>hyperlink()>
696
697Add (if argument specified) and retrieve the hyperlinks (as defined by
698C<LE<lt>E<gt>>) of the current POD. They consist of an 2-item array: line
699number and C<Pod::Hyperlink> object.
700
701=back
702
703=cut
704
e2c3adef
GS
705# set/return hyperlinks of the current POD
706sub hyperlink {
707 my $self = shift;
708 if($_[0]) {
709 push(@{$self->{_links}}, $_[0]);
710 return $_[0];
711 }
712 @{$self->{_links}};
713}
714
e3237417
GS
715## overrides for Pod::Parser
716
360aca43 717sub end_pod {
66aff6dd
GS
718 ## Do some final checks and
719 ## print the number of errors found
720 my $self = shift;
721 my $infile = $self->input_file();
722 my $out_fh = $self->output_handle();
723
724 if(@{$self->{_list_stack}}) {
725 # _TODO_ display, but don't count them for now
726 my $list;
727 while(($list = $self->_close_list('EOF',$infile)) &&
728 $list->indent() ne 'auto') {
729 $self->poderror({ -line => 'EOF', -file => $infile,
730 -severity => 'ERROR', -msg => "=over on line " .
731 $list->start() . " without closing =back" }); #"
732 }
733 }
734
735 # check validity of document internal hyperlinks
736 # first build the node names from the paragraph text
737 my %nodes;
738 foreach($self->node()) {
739 $nodes{$_} = 1;
92e3d63a 740 if(/^(\S+)\s+\S/) {
66aff6dd
GS
741 # we have more than one word. Use the first as a node, too.
742 # This is used heavily in perlfunc.pod
743 $nodes{$1} ||= 2; # derived node
744 }
745 }
92e3d63a
JH
746 foreach($self->idx()) {
747 $nodes{$_} = 3; # index node
748 }
66aff6dd 749 foreach($self->hyperlink()) {
48f30392
GS
750 my ($line,$link) = @$_;
751 # _TODO_ what if there is a link to the page itself by the name,
752 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
753 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
754 my $node = $self->_check_ptree($self->parse_text($link->node(),
755 $line), $line, $infile, 'L');
756 if($node && !$nodes{$node}) {
757 $self->poderror({ -line => $line || '', -file => $infile,
758 -severity => 'ERROR',
759 -msg => "unresolved internal link '$node'"});
760 }
66aff6dd
GS
761 }
762 }
48f30392
GS
763
764 # check the internal nodes for uniqueness. This pertains to
765 # =headX, =item and X<...>
66aff6dd
GS
766 foreach(grep($self->{_unique_nodes}->{$_} > 1,
767 keys %{$self->{_unique_nodes}})) {
768 $self->poderror({ -line => '-', -file => $infile,
769 -severity => 'WARNING',
770 -msg => "multiple occurence of link target '$_'"});
771 }
772
773 ## Print the number of errors found
774 my $num_errors = $self->num_errors();
775 if ($num_errors > 0) {
776 printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
360aca43 777 ($num_errors == 1) ? "error" : "errors");
66aff6dd
GS
778 }
779 elsif($self->{_commands} == 0) {
780 print $out_fh "$infile does not contain any pod commands.\n";
781 $self->num_errors(-1);
782 }
783 else {
784 print $out_fh "$infile pod syntax OK.\n";
785 }
360aca43
GS
786}
787
e2c3adef 788# check a POD command directive
360aca43 789sub command {
664bb207 790 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 791 my ($file, $line) = $pod_para->file_line;
360aca43 792 ## Check the command syntax
e3237417 793 my $arg; # this will hold the command argument
664bb207
GS
794 if (! $VALID_COMMANDS{$cmd}) {
795 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 796 -msg => "Unknown command '$cmd'" });
360aca43 797 }
92e3d63a
JH
798 else { # found a valid command
799 $self->{_commands}++; # delete this line if below is enabled again
800
801 ##### following check disabled due to strong request
802 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
803 # $self->poderror({ -line => $line, -file => $file,
804 # -severity => 'WARNING',
805 # -msg => "file does not start with =head" });
806 #}
807
808 # check syntax of particular command
e3237417 809 if($cmd eq 'over') {
e2c3adef
GS
810 # check for argument
811 $arg = $self->interpolate_and_check($paragraph, $line,$file);
812 my $indent = 4; # default
813 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
814 $indent = $1;
e2c3adef 815 }
e3237417 816 # start a new list
66aff6dd 817 $self->_open_list($indent,$line,$file);
e3237417
GS
818 }
819 elsif($cmd eq 'item') {
e2c3adef 820 # are we in a list?
e3237417
GS
821 unless(@{$self->{_list_stack}}) {
822 $self->poderror({ -line => $line, -file => $file,
823 -severity => 'ERROR',
824 -msg => "=item without previous =over" });
e2c3adef 825 # auto-open in case we encounter many more
66aff6dd
GS
826 $self->_open_list('auto',$line,$file);
827 }
828 my $list = $self->{_list_stack}->[0];
829 # check whether the previous item had some contents
830 if(defined $self->{_list_item_contents} &&
831 $self->{_list_item_contents} == 0) {
832 $self->poderror({ -line => $line, -file => $file,
833 -severity => 'WARNING',
834 -msg => "previous =item has no contents" });
835 }
836 if($list->{_has_par}) {
837 $self->poderror({ -line => $line, -file => $file,
838 -severity => 'WARNING',
839 -msg => "preceding non-item paragraph(s)" });
840 delete $list->{_has_par};
e3237417 841 }
e2c3adef
GS
842 # check for argument
843 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd
GS
844 if($arg && $arg =~ /(\S+)/) {
845 $arg =~ s/[\s\n]+$//;
846 my $type;
847 if($arg =~ /^[*]\s*(\S*.*)/) {
848 $type = 'bullet';
849 $self->{_list_item_contents} = $1 ? 1 : 0;
850 $arg = $1;
851 }
852 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
853 $type = 'number';
854 $self->{_list_item_contents} = $1 ? 1 : 0;
855 $arg = $1;
856 }
857 else {
858 $type = 'definition';
859 $self->{_list_item_contents} = 1;
860 }
861 my $first = $list->type();
862 if($first && $first ne $type) {
863 $self->poderror({ -line => $line, -file => $file,
864 -severity => 'WARNING',
865 -msg => "=item type mismatch ('$first' vs. '$type')"});
866 }
867 else { # first item
868 $list->type($type);
869 }
870 }
871 else {
e2c3adef
GS
872 $self->poderror({ -line => $line, -file => $file,
873 -severity => 'WARNING',
874 -msg => "No argument for =item" });
875 $arg = ' '; # empty
66aff6dd 876 $self->{_list_item_contents} = 0;
e3237417 877 }
e2c3adef 878 # add this item
66aff6dd 879 $list->item($arg);
e2c3adef
GS
880 # remember this node
881 $self->node($arg);
e3237417
GS
882 }
883 elsif($cmd eq 'back') {
884 # check if we have an open list
885 unless(@{$self->{_list_stack}}) {
886 $self->poderror({ -line => $line, -file => $file,
887 -severity => 'ERROR',
888 -msg => "=back without previous =over" });
889 }
890 else {
891 # check for spurious characters
e2c3adef 892 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
893 if($arg && $arg =~ /\S/) {
894 $self->poderror({ -line => $line, -file => $file,
66aff6dd 895 -severity => 'ERROR',
e3237417
GS
896 -msg => "Spurious character(s) after =back" });
897 }
898 # close list
66aff6dd 899 my $list = $self->_close_list($line,$file);
e3237417
GS
900 # check for empty lists
901 if(!$list->item() && $self->{-warnings}) {
902 $self->poderror({ -line => $line, -file => $file,
903 -severity => 'WARNING',
904 -msg => "No items in =over (at line " .
905 $list->start() . ") / =back list"}); #"
906 }
907 }
908 }
66aff6dd 909 elsif($cmd =~ /^head(\d+)/) {
48f30392 910 # check whether the previous =head section had some contents
66aff6dd
GS
911 if(defined $self->{_commands_in_head} &&
912 $self->{_commands_in_head} == 0 &&
913 defined $self->{_last_head} &&
914 $self->{_last_head} >= $1) {
915 $self->poderror({ -line => $line, -file => $file,
916 -severity => 'WARNING',
917 -msg => "empty section in previous paragraph"});
918 }
919 $self->{_commands_in_head} = -1;
920 $self->{_last_head} = $1;
e3237417
GS
921 # check if there is an open list
922 if(@{$self->{_list_stack}}) {
923 my $list;
66aff6dd
GS
924 while(($list = $self->_close_list($line,$file)) &&
925 $list->indent() ne 'auto') {
e3237417
GS
926 $self->poderror({ -line => $line, -file => $file,
927 -severity => 'ERROR',
e2c3adef
GS
928 -msg => "=over on line ". $list->start() .
929 " without closing =back (at $cmd)" });
e3237417
GS
930 }
931 }
932 # remember this node
e2c3adef 933 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd
GS
934 $arg =~ s/[\s\n]+$//s;
935 $self->node($arg);
936 unless(length($arg)) {
937 $self->poderror({ -line => $line, -file => $file,
938 -severity => 'ERROR',
939 -msg => "empty =$cmd"});
940 }
e2c3adef 941 if($cmd eq 'head1') {
e2c3adef
GS
942 $self->{_current_head1} = $arg;
943 } else {
944 $self->{_current_head1} = '';
945 }
e3237417
GS
946 }
947 elsif($cmd eq 'begin') {
948 if($self->{_have_begin}) {
949 # already have a begin
950 $self->poderror({ -line => $line, -file => $file,
951 -severity => 'ERROR',
952 -msg => "Nested =begin's (first at line " .
953 $self->{_have_begin} . ")"});
954 }
955 else {
956 # check for argument
e2c3adef 957 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
958 unless($arg && $arg =~ /(\S+)/) {
959 $self->poderror({ -line => $line, -file => $file,
e2c3adef 960 -severity => 'ERROR',
e3237417
GS
961 -msg => "No argument for =begin"});
962 }
963 # remember the =begin
964 $self->{_have_begin} = "$line:$1";
965 }
966 }
967 elsif($cmd eq 'end') {
968 if($self->{_have_begin}) {
969 # close the existing =begin
970 $self->{_have_begin} = '';
971 # check for spurious characters
e2c3adef
GS
972 $arg = $self->interpolate_and_check($paragraph, $line,$file);
973 # the closing argument is optional
974 #if($arg && $arg =~ /\S/) {
975 # $self->poderror({ -line => $line, -file => $file,
976 # -severity => 'WARNING',
977 # -msg => "Spurious character(s) after =end" });
978 #}
e3237417
GS
979 }
980 else {
981 # don't have a matching =begin
982 $self->poderror({ -line => $line, -file => $file,
e2c3adef 983 -severity => 'ERROR',
e3237417
GS
984 -msg => "=end without =begin" });
985 }
986 }
e2c3adef
GS
987 elsif($cmd eq 'for') {
988 unless($paragraph =~ /\s*(\S+)\s*/) {
989 $self->poderror({ -line => $line, -file => $file,
990 -severity => 'ERROR',
991 -msg => "=for without formatter specification" });
992 }
993 $arg = ''; # do not expand paragraph below
994 }
66aff6dd
GS
995 elsif($cmd =~ /^(pod|cut)$/) {
996 # check for argument
997 $arg = $self->interpolate_and_check($paragraph, $line,$file);
998 if($arg && $arg =~ /(\S+)/) {
999 $self->poderror({ -line => $line, -file => $file,
1000 -severity => 'ERROR',
1001 -msg => "Spurious text after =$cmd"});
1002 }
1003 }
1004 $self->{_commands_in_head}++;
e3237417 1005 ## Check the interior sequences in the command-text
e2c3adef 1006 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1007 unless(defined $arg);
e2c3adef 1008 }
360aca43
GS
1009}
1010
66aff6dd
GS
1011sub _open_list
1012{
1013 my ($self,$indent,$line,$file) = @_;
1014 my $list = Pod::List->new(
1015 -indent => $indent,
1016 -start => $line,
1017 -file => $file);
1018 unshift(@{$self->{_list_stack}}, $list);
1019 undef $self->{_list_item_contents};
1020 $list;
1021}
1022
1023sub _close_list
1024{
1025 my ($self,$line,$file) = @_;
1026 my $list = shift(@{$self->{_list_stack}});
1027 if(defined $self->{_list_item_contents} &&
1028 $self->{_list_item_contents} == 0) {
1029 $self->poderror({ -line => $line, -file => $file,
1030 -severity => 'WARNING',
1031 -msg => "previous =item has no contents" });
1032 }
1033 undef $self->{_list_item_contents};
1034 $list;
1035}
1036
e2c3adef
GS
1037# process a block of some text
1038sub interpolate_and_check {
e3237417
GS
1039 my ($self, $paragraph, $line, $file) = @_;
1040 ## Check the interior sequences in the command-text
1041 # and return the text
1042 $self->_check_ptree(
1043 $self->parse_text($paragraph,$line), $line, $file, '');
1044}
1045
1046sub _check_ptree {
1047 my ($self,$ptree,$line,$file,$nestlist) = @_;
1048 local($_);
1049 my $text = '';
1050 # process each node in the parse tree
1051 foreach(@$ptree) {
1052 # regular text chunk
1053 unless(ref) {
1054 my $count;
1055 # count the unescaped angle brackets
92e3d63a 1056 # complain only when warning level is greater than 1
e3237417 1057 my $i = $_;
e2c3adef 1058 if($count = $i =~ tr/<>/<>/) {
e3237417
GS
1059 $self->poderror({ -line => $line, -file => $file,
1060 -severity => 'WARNING',
e2c3adef 1061 -msg => "$count unescaped <> in paragraph" })
92e3d63a 1062 if($self->{-warnings} && $self->{-warnings}>1);
e3237417
GS
1063 }
1064 $text .= $i;
1065 next;
1066 }
1067 # have an interior sequence
1068 my $cmd = $_->cmd_name();
1069 my $contents = $_->parse_tree();
1070 ($file,$line) = $_->file_line();
1071 # check for valid tag
1072 if (! $VALID_SEQUENCES{$cmd}) {
1073 $self->poderror({ -line => $line, -file => $file,
1074 -severity => 'ERROR',
66aff6dd 1075 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417
GS
1076 # expand it anyway
1077 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1078 next;
1079 }
1080 if($nestlist =~ /$cmd/) {
1081 $self->poderror({ -line => $line, -file => $file,
1082 -severity => 'ERROR',
1083 -msg => "nested commands $cmd<...$cmd<...>...>"});
1084 # _TODO_ should we add the contents anyway?
1085 # expand it anyway, see below
1086 }
1087 if($cmd eq 'E') {
1088 # preserve entities
1089 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1090 $self->poderror({ -line => $line, -file => $file,
1091 -severity => 'ERROR',
1092 -msg => "garbled entity " . $_->raw_text()});
1093 next;
1094 }
e2c3adef 1095 my $ent = $$contents[0];
66aff6dd
GS
1096 my $val;
1097 if($ent =~ /^0x[0-9a-f]+$/i) {
1098 # hexadec entity
1099 $val = hex($ent);
1100 }
1101 elsif($ent =~ /^0\d+$/) {
1102 # octal
1103 $val = oct($ent);
1104 }
1105 elsif($ent =~ /^\d+$/) {
e2c3adef 1106 # numeric entity
66aff6dd
GS
1107 $val = $ent;
1108 }
1109 if(defined $val) {
1110 if($val>0 && $val<256) {
1111 $text .= chr($val);
1112 }
1113 else {
1114 $self->poderror({ -line => $line, -file => $file,
1115 -severity => 'ERROR',
1116 -msg => "Entity number out of range " . $_->raw_text()});
1117 }
e2c3adef
GS
1118 }
1119 elsif($ENTITIES{$ent}) {
1120 # known ISO entity
1121 $text .= $ENTITIES{$ent};
1122 }
1123 else {
1124 $self->poderror({ -line => $line, -file => $file,
1125 -severity => 'WARNING',
66aff6dd 1126 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef
GS
1127 $text .= "E<$ent>";
1128 }
e3237417
GS
1129 }
1130 elsif($cmd eq 'L') {
1131 # try to parse the hyperlink
1132 my $link = Pod::Hyperlink->new($contents->raw_text());
1133 unless(defined $link) {
1134 $self->poderror({ -line => $line, -file => $file,
1135 -severity => 'ERROR',
e2c3adef 1136 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417
GS
1137 next;
1138 }
1139 $link->line($line); # remember line
1140 if($self->{-warnings}) {
1141 foreach my $w ($link->warning()) {
1142 $self->poderror({ -line => $line, -file => $file,
1143 -severity => 'WARNING',
1144 -msg => $w });
1145 }
1146 }
1147 # check the link text
1148 $text .= $self->_check_ptree($self->parse_text($link->text(),
1149 $line), $line, $file, "$nestlist$cmd");
48f30392
GS
1150 # remember link
1151 $self->hyperlink([$line,$link]);
e3237417
GS
1152 }
1153 elsif($cmd =~ /[BCFIS]/) {
1154 # add the guts
1155 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1156 }
66aff6dd
GS
1157 elsif($cmd eq 'Z') {
1158 if(length($contents->raw_text())) {
1159 $self->poderror({ -line => $line, -file => $file,
1160 -severity => 'ERROR',
1161 -msg => "Nonempty Z<>"});
1162 }
1163 }
48f30392
GS
1164 elsif($cmd eq 'X') {
1165 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1166 if($idx =~ /^\s*$/s) {
1167 $self->poderror({ -line => $line, -file => $file,
1168 -severity => 'ERROR',
1169 -msg => "Empty X<>"});
1170 }
1171 else {
1172 # remember this node
1173 $self->idx($idx);
1174 }
1175 }
1176 else {
1177 # not reached
1178 die "internal error";
e3237417
GS
1179 }
1180 }
1181 $text;
1182}
1183
e2c3adef 1184# process a block of verbatim text
360aca43 1185sub verbatim {
66aff6dd 1186 ## Nothing particular to check
e2c3adef 1187 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd
GS
1188
1189 $self->_preproc_par($paragraph);
1190
e2c3adef
GS
1191 if($self->{_current_head1} eq 'NAME') {
1192 my ($file, $line) = $pod_para->file_line;
1193 $self->poderror({ -line => $line, -file => $file,
1194 -severity => 'WARNING',
1195 -msg => 'Verbatim paragraph in NAME section' });
1196 }
360aca43
GS
1197}
1198
e2c3adef 1199# process a block of regular text
360aca43
GS
1200sub textblock {
1201 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1202 my ($file, $line) = $pod_para->file_line;
e3237417 1203
66aff6dd
GS
1204 $self->_preproc_par($paragraph);
1205
e2c3adef
GS
1206 # skip this paragraph if in a =begin block
1207 unless($self->{_have_begin}) {
1208 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1209 if($self->{_current_head1} eq 'NAME') {
1210 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1211 # this is the canonical name
1212 $self->{-name} = $1 unless(defined $self->{-name});
1213 }
e3237417
GS
1214 }
1215 }
e3237417
GS
1216}
1217
66aff6dd
GS
1218sub _preproc_par
1219{
1220 my $self = shift;
1221 $_[0] =~ s/[\s\n]+$//;
1222 if($_[0]) {
1223 $self->{_commands_in_head}++;
1224 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1225 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1226 $self->{_list_stack}->[0]->{_has_par} = 1;
1227 }
1228 }
1229}
1230
e3237417 12311;
66aff6dd 1232
48f30392
GS
1233__END__
1234
1235=head1 AUTHOR
1236
1237Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1238Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
1239
1240Based on code for B<Pod::Text::pod2text()> written by
1241Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1242
1243=cut
1244