This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add weights to ExtUtils::Constant to allow sorting by expected
[perl5.git] / lib / Pod / Checker.pm
CommitLineData
360aca43
GS
1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
828c4421 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
360aca43
GS
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
7d8277e2 13$VERSION = 1.42; ## Current version of this package
828c4421 14require 5.005; ## requires this Perl version or later
360aca43 15
e2c3adef
GS
16use Pod::ParseUtils; ## for hyperlinks and lists
17
360aca43
GS
18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
e3237417 26 $syntax_okay = podchecker($filepath, $outputpath, %options);
360aca43 27
e2c3adef 28 my $checker = new Pod::Checker %options;
48f30392 29 $checker->parse_from_file($filepath, \*STDERR);
e2c3adef 30
360aca43
GS
31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
e2c3adef 35indicating a file-path, or else a reference to an open filehandle.
360aca43
GS
36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
e2c3adef
GS
39=head2 podchecker()
40
41This function can take a hash of options:
e3237417
GS
42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
92e3d63a
JH
47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
e3237417
GS
49
50=back
360aca43
GS
51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
c23d1eb0
MR
56Curious/ambitious users are welcome to propose additional features they wish
57to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
58consistent with L<perlpod>.
360aca43 59
48f30392 60The following checks are currently preformed:
e3237417
GS
61
62=over 4
63
64=item *
65
48f30392 66Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
e2c3adef
GS
67and unterminated interior sequences.
68
69=item *
70
71Check for proper balancing of C<=begin> and C<=end>. The contents of such
72a block are generally ignored, i.e. no syntax checks are performed.
e3237417
GS
73
74=item *
75
76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
77
78=item *
79
e2c3adef
GS
80Check for same nested interior-sequences (e.g.
81C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
e3237417
GS
82
83=item *
84
e2c3adef 85Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
e3237417
GS
86
87=item *
88
e2c3adef
GS
89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
90for details.
e3237417
GS
91
92=item *
93
e2c3adef
GS
94Check for unresolved document-internal links. This check may also reveal
95misspelled links that seem to be internal links but should be links
96to something else.
e3237417
GS
97
98=back
99
e2c3adef 100=head1 DIAGNOSTICS
e3237417 101
e2c3adef 102=head2 Errors
e3237417
GS
103
104=over 4
105
66aff6dd
GS
106=item * empty =headn
107
108A heading (C<=head1> or C<=head2>) without any text? That ain't no
109heading!
110
e2c3adef 111=item * =over on line I<N> without closing =back
e3237417 112
e2c3adef
GS
113The C<=over> command does not have a corresponding C<=back> before the
114next heading (C<=head1> or C<=head2>) or the end of the file.
e3237417 115
e2c3adef 116=item * =item without previous =over
e3237417 117
e2c3adef 118=item * =back without previous =over
e3237417 119
e2c3adef
GS
120An C<=item> or C<=back> command has been found outside a
121C<=over>/C<=back> block.
e3237417 122
e2c3adef 123=item * No argument for =begin
e3237417 124
e2c3adef
GS
125A C<=begin> command was found that is not followed by the formatter
126specification.
e3237417 127
e2c3adef 128=item * =end without =begin
e3237417 129
e2c3adef
GS
130A standalone C<=end> command was found.
131
132=item * Nested =begin's
133
66aff6dd 134There were at least two consecutive C<=begin> commands without
e2c3adef
GS
135the corresponding C<=end>. Only one C<=begin> may be active at
136a time.
137
138=item * =for without formatter specification
e3237417 139
e2c3adef
GS
140There is no specification of the formatter after the C<=for> command.
141
142=item * unresolved internal link I<NAME>
143
144The given link to I<NAME> does not have a matching node in the current
145POD. This also happend when a single word node name is not enclosed in
146C<"">.
147
148=item * Unknown command "I<CMD>"
149
150An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
c6b85e5d
CT
151C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
152C<=for>, C<=pod>, C<=cut>
e2c3adef
GS
153
154=item * Unknown interior-sequence "I<SEQ>"
155
156An invalid markup command has been encountered. Valid are:
157C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
158C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
159C<ZE<lt>E<gt>>
160
161=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
162
163Two nested identical markup commands have been found. Generally this
164does not make sense.
165
166=item * garbled entity I<STRING>
167
66aff6dd
GS
168The I<STRING> found cannot be interpreted as a character entity.
169
170=item * Entity number out of range
171
172An entity specified by number (dec, hex, oct) is out of range (1-255).
e2c3adef
GS
173
174=item * malformed link LE<lt>E<gt>
175
176The link found cannot be parsed because it does not conform to the
177syntax described in L<perlpod>.
e3237417 178
66aff6dd
GS
179=item * nonempty ZE<lt>E<gt>
180
181The C<ZE<lt>E<gt>> sequence is supposed to be empty.
182
48f30392
GS
183=item * empty XE<lt>E<gt>
184
185The index entry specified contains nothing but whitespace.
186
66aff6dd
GS
187=item * Spurious text after =pod / =cut
188
189The commands C<=pod> and C<=cut> do not take any arguments.
190
191=item * Spurious character(s) after =back
192
193The C<=back> command does not take any arguments.
194
e3237417
GS
195=back
196
e2c3adef 197=head2 Warnings
e3237417 198
e2c3adef
GS
199These may not necessarily cause trouble, but indicate mediocre style.
200
201=over 4
202
fb8eeed8 203=item * multiple occurrence of link target I<name>
66aff6dd
GS
204
205The POD file has some C<=item> and/or C<=head> commands that have
206the same text. Potential hyperlinks to such a text cannot be unique then.
207
208=item * line containing nothing but whitespace in paragraph
209
210There is some whitespace on a seemingly empty line. POD is very sensitive
211to such things, so this is flagged. B<vi> users switch on the B<list>
212option to avoid this problem.
213
92e3d63a
JH
214=begin _disabled_
215
66aff6dd
GS
216=item * file does not start with =head
217
218The file starts with a different POD directive than head.
219This is most probably something you do not want.
220
92e3d63a
JH
221=end _disabled_
222
66aff6dd 223=item * previous =item has no contents
e2c3adef 224
66aff6dd
GS
225There is a list C<=item> right above the flagged line that has no
226text contents. You probably want to delete empty items.
227
228=item * preceding non-item paragraph(s)
229
230A list introduced by C<=over> starts with a text or verbatim paragraph,
231but continues with C<=item>s. Move the non-item paragraph out of the
232C<=over>/C<=back> block.
233
234=item * =item type mismatch (I<one> vs. I<two>)
235
236A list started with e.g. a bulletted C<=item> and continued with a
237numbered one. This is obviously inconsistent. For most translators the
238type of the I<first> C<=item> determines the type of the list.
e2c3adef
GS
239
240=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
241
242Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
243can potentially cause errors as they could be misinterpreted as
92e3d63a
JH
244markup commands. This is only printed when the -warnings level is
245greater than 1.
e2c3adef 246
66aff6dd 247=item * Unknown entity
e2c3adef
GS
248
249A character entity was found that does not belong to the standard
66aff6dd 250ISO set or the POD specials C<verbar> and C<sol>.
e2c3adef
GS
251
252=item * No items in =over
253
66aff6dd 254The list opened with C<=over> does not contain any items.
e2c3adef
GS
255
256=item * No argument for =item
257
258C<=item> without any parameters is deprecated. It should either be followed
259by C<*> to indicate an unordered list, by a number (optionally followed
260by a dot) to indicate an ordered (numbered) list or simple text for a
261definition list.
262
66aff6dd
GS
263=item * empty section in previous paragraph
264
265The previous section (introduced by a C<=head> command) does not contain
266any text. This usually indicates that something is missing. Note: A
267C<=head1> followed immediately by C<=head2> does not trigger this warning.
268
e2c3adef
GS
269=item * Verbatim paragraph in NAME section
270
271The NAME section (C<=head1 NAME>) should consist of a single paragraph
272with the script/module name, followed by a dash `-' and a very short
273description of what the thing is good for.
274
9c6ed6d7
HS
275=item * =headI<n> without preceding higher level
276
277For example if there is a C<=head2> in the POD file prior to a
278C<=head1>.
279
92e3d63a
JH
280=back
281
282=head2 Hyperlinks
283
284There are some warnings wrt. malformed hyperlinks.
285
286=over 4
287
92e3d63a
JH
288=item * ignoring leading/trailing whitespace in link
289
290There is whitespace at the beginning or the end of the contents of
291LE<lt>...E<gt>.
292
293=item * (section) in '$page' deprecated
294
295There is a section detected in the page name of LE<lt>...E<gt>, e.g.
ad838556 296C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
92e3d63a
JH
297Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
298to expand this to appropriate code. For links to (builtin) functions,
299please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
300
301=item * alternative text/node '%s' contains non-escaped | or /
302
303The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
304Although the hyperlink parser does its best to determine which "/" is
305text and which is a delimiter in case of doubt, one ought to escape
306these literal characters like this:
307
308 / E<sol>
309 | E<verbar>
e2c3adef
GS
310
311=back
e3237417
GS
312
313=head1 RETURN VALUE
314
315B<podchecker> returns the number of POD syntax errors found or -1 if
316there were no POD commands at all found in the file.
317
360aca43
GS
318=head1 EXAMPLES
319
c23d1eb0 320See L</SYNOPSIS>
360aca43 321
48f30392 322=head1 INTERFACE
360aca43 323
48f30392
GS
324While checking, this module collects document properties, e.g. the nodes
325for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
326POD translators can use this feature to syntax-check and get the nodes in
327a first pass before actually starting to convert. This is expensive in terms
328of execution time, but allows for very robust conversions.
360aca43 329
c23d1eb0
MR
330Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
331method to print errors and warnings. The summary output (e.g.
332"Pod syntax OK") has been dropped from the module and has been included in
333B<podchecker> (the script). This allows users of B<Pod::Checker> to
334control completely the output behaviour. Users of B<podchecker> (the script)
335get the well-known behaviour.
336
360aca43
GS
337=cut
338
339#############################################################################
340
341use strict;
342#use diagnostics;
343use Carp;
344use Exporter;
345use Pod::Parser;
346
347use vars qw(@ISA @EXPORT);
348@ISA = qw(Pod::Parser);
349@EXPORT = qw(&podchecker);
350
351use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
352
353my %VALID_COMMANDS = (
354 'pod' => 1,
355 'cut' => 1,
356 'head1' => 1,
357 'head2' => 1,
c6b85e5d
CT
358 'head3' => 1,
359 'head4' => 1,
360aca43
GS
360 'over' => 1,
361 'back' => 1,
362 'item' => 1,
363 'for' => 1,
364 'begin' => 1,
365 'end' => 1,
366);
367
368my %VALID_SEQUENCES = (
369 'I' => 1,
370 'B' => 1,
371 'S' => 1,
372 'C' => 1,
373 'L' => 1,
374 'F' => 1,
375 'X' => 1,
376 'Z' => 1,
377 'E' => 1,
378);
379
e2c3adef
GS
380# stolen from HTML::Entities
381my %ENTITIES = (
382 # Some normal chars that have special meaning in SGML context
383 amp => '&', # ampersand
384'gt' => '>', # greater than
385'lt' => '<', # less than
386 quot => '"', # double quote
387
388 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
389 AElig => 'Æ', # capital AE diphthong (ligature)
390 Aacute => 'Á', # capital A, acute accent
391 Acirc => 'Â', # capital A, circumflex accent
392 Agrave => 'À', # capital A, grave accent
393 Aring => 'Å', # capital A, ring
394 Atilde => 'Ã', # capital A, tilde
395 Auml => 'Ä', # capital A, dieresis or umlaut mark
396 Ccedil => 'Ç', # capital C, cedilla
397 ETH => 'Ð', # capital Eth, Icelandic
398 Eacute => 'É', # capital E, acute accent
399 Ecirc => 'Ê', # capital E, circumflex accent
400 Egrave => 'È', # capital E, grave accent
401 Euml => 'Ë', # capital E, dieresis or umlaut mark
402 Iacute => 'Í', # capital I, acute accent
403 Icirc => 'Î', # capital I, circumflex accent
404 Igrave => 'Ì', # capital I, grave accent
405 Iuml => 'Ï', # capital I, dieresis or umlaut mark
406 Ntilde => 'Ñ', # capital N, tilde
407 Oacute => 'Ó', # capital O, acute accent
408 Ocirc => 'Ô', # capital O, circumflex accent
409 Ograve => 'Ò', # capital O, grave accent
410 Oslash => 'Ø', # capital O, slash
411 Otilde => 'Õ', # capital O, tilde
412 Ouml => 'Ö', # capital O, dieresis or umlaut mark
413 THORN => 'Þ', # capital THORN, Icelandic
414 Uacute => 'Ú', # capital U, acute accent
415 Ucirc => 'Û', # capital U, circumflex accent
416 Ugrave => 'Ù', # capital U, grave accent
417 Uuml => 'Ü', # capital U, dieresis or umlaut mark
418 Yacute => 'Ý', # capital Y, acute accent
419 aacute => 'á', # small a, acute accent
420 acirc => 'â', # small a, circumflex accent
421 aelig => 'æ', # small ae diphthong (ligature)
422 agrave => 'à', # small a, grave accent
423 aring => 'å', # small a, ring
424 atilde => 'ã', # small a, tilde
425 auml => 'ä', # small a, dieresis or umlaut mark
426 ccedil => 'ç', # small c, cedilla
427 eacute => 'é', # small e, acute accent
428 ecirc => 'ê', # small e, circumflex accent
429 egrave => 'è', # small e, grave accent
430 eth => 'ð', # small eth, Icelandic
431 euml => 'ë', # small e, dieresis or umlaut mark
432 iacute => 'í', # small i, acute accent
433 icirc => 'î', # small i, circumflex accent
434 igrave => 'ì', # small i, grave accent
435 iuml => 'ï', # small i, dieresis or umlaut mark
436 ntilde => 'ñ', # small n, tilde
437 oacute => 'ó', # small o, acute accent
438 ocirc => 'ô', # small o, circumflex accent
439 ograve => 'ò', # small o, grave accent
440 oslash => 'ø', # small o, slash
441 otilde => 'õ', # small o, tilde
442 ouml => 'ö', # small o, dieresis or umlaut mark
443 szlig => 'ß', # small sharp s, German (sz ligature)
444 thorn => 'þ', # small thorn, Icelandic
445 uacute => 'ú', # small u, acute accent
446 ucirc => 'û', # small u, circumflex accent
447 ugrave => 'ù', # small u, grave accent
448 uuml => 'ü', # small u, dieresis or umlaut mark
449 yacute => 'ý', # small y, acute accent
450 yuml => 'ÿ', # small y, dieresis or umlaut mark
451
452 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
453 copy => '©', # copyright sign
454 reg => '®', # registered sign
455 nbsp => "\240", # non breaking space
456
457 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
458 iexcl => '¡',
459 cent => '¢',
460 pound => '£',
461 curren => '¤',
462 yen => '¥',
463 brvbar => '¦',
464 sect => '§',
465 uml => '¨',
466 ordf => 'ª',
467 laquo => '«',
468'not' => '¬', # not is a keyword in perl
469 shy => '­',
470 macr => '¯',
471 deg => '°',
472 plusmn => '±',
473 sup1 => '¹',
474 sup2 => '²',
475 sup3 => '³',
476 acute => '´',
477 micro => 'µ',
478 para => '¶',
479 middot => '·',
480 cedil => '¸',
481 ordm => 'º',
482 raquo => '»',
483 frac14 => '¼',
484 frac12 => '½',
485 frac34 => '¾',
486 iquest => '¿',
487'times' => '×', # times is a keyword in perl
488 divide => '÷',
66aff6dd
GS
489
490# some POD special entities
491 verbar => '|',
492 sol => '/'
e2c3adef
GS
493);
494
360aca43
GS
495##---------------------------------------------------------------------------
496
497##---------------------------------
498## Function definitions begin here
499##---------------------------------
500
e3237417
GS
501sub podchecker( $ ; $ % ) {
502 my ($infile, $outfile, %options) = @_;
360aca43
GS
503 local $_;
504
505 ## Set defaults
506 $infile ||= \*STDIN;
507 $outfile ||= \*STDERR;
508
509 ## Now create a pod checker
e3237417 510 my $checker = new Pod::Checker(%options);
360aca43
GS
511
512 ## Now check the pod document for errors
513 $checker->parse_from_file($infile, $outfile);
48f30392 514
360aca43
GS
515 ## Return the number of errors found
516 return $checker->num_errors();
517}
518
519##---------------------------------------------------------------------------
520
521##-------------------------------
522## Method definitions begin here
523##-------------------------------
524
92e3d63a
JH
525##################################
526
527=over 4
528
529=item C<Pod::Checker-E<gt>new( %options )>
530
531Return a reference to a new Pod::Checker object that inherits from
532Pod::Parser and is used for calling the required methods later. The
533following options are recognized:
534
535C<-warnings =E<gt> num>
536 Print warnings if C<num> is true. The higher the value of C<num>,
537the more warnings are printed. Currently there are only levels 1 and 2.
538
539C<-quiet =E<gt> num>
540 If C<num> is true, do not print any errors/warnings. This is useful
541when Pod::Checker is used to munge POD code into plain text from within
542POD formatters.
543
544=cut
545
66aff6dd
GS
546## sub new {
547## my $this = shift;
548## my $class = ref($this) || $this;
549## my %params = @_;
550## my $self = {%params};
551## bless $self, $class;
552## $self->initialize();
553## return $self;
554## }
360aca43
GS
555
556sub initialize {
557 my $self = shift;
664bb207
GS
558 ## Initialize number of errors, and setup an error function to
559 ## increment this number and then print to the designated output.
560 $self->{_NUM_ERRORS} = 0;
9c6ed6d7 561 $self->{_NUM_WARNINGS} = 0;
92e3d63a
JH
562 $self->{-quiet} ||= 0;
563 # set the error handling subroutine
564 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
e3237417
GS
565 $self->{_commands} = 0; # total number of POD commands encountered
566 $self->{_list_stack} = []; # stack for nested lists
567 $self->{_have_begin} = ''; # stores =begin
568 $self->{_links} = []; # stack for internal hyperlinks
569 $self->{_nodes} = []; # stack for =head/=item nodes
48f30392 570 $self->{_index} = []; # text in X<>
e2c3adef 571 # print warnings?
e3237417 572 $self->{-warnings} = 1 unless(defined $self->{-warnings});
e2c3adef 573 $self->{_current_head1} = ''; # the current =head1 block
92e3d63a 574 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
664bb207
GS
575}
576
48f30392
GS
577##################################
578
48f30392
GS
579=item C<$checker-E<gt>poderror( @args )>
580
581=item C<$checker-E<gt>poderror( {%opts}, @args )>
582
583Internal method for printing errors and warnings. If no options are
584given, simply prints "@_". The following options are recognized and used
585to form the output:
586
587 -msg
588
589A message to print prior to C<@args>.
590
591 -line
592
593The line number the error occurred in.
594
595 -file
596
597The file (name) the error occurred in.
598
599 -severity
600
601The error level, should be 'WARNING' or 'ERROR'.
602
603=cut
604
e2c3adef 605# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
664bb207
GS
606sub poderror {
607 my $self = shift;
608 my %opts = (ref $_[0]) ? %{shift()} : ();
609
610 ## Retrieve options
611 chomp( my $msg = ($opts{-msg} || "")."@_" );
612 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
613 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
66aff6dd
GS
614 unless (exists $opts{-severity}) {
615 ## See if can find severity in message prefix
616 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
617 }
664bb207
GS
618 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
619
e3237417
GS
620 ## Increment error count and print message "
621 ++($self->{_NUM_ERRORS})
622 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
9c6ed6d7
HS
623 ++($self->{_NUM_WARNINGS})
624 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
92e3d63a 625 my $out_fh = $self->output_handle() || \*STDERR;
e2c3adef
GS
626 print $out_fh ($severity, $msg, $line, $file, "\n")
627 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
360aca43
GS
628}
629
48f30392
GS
630##################################
631
632=item C<$checker-E<gt>num_errors()>
633
634Set (if argument specified) and retrieve the number of errors found.
635
636=cut
637
360aca43
GS
638sub num_errors {
639 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
640}
641
48f30392
GS
642##################################
643
9c6ed6d7
HS
644=item C<$checker-E<gt>num_warnings()>
645
646Set (if argument specified) and retrieve the number of warnings found.
647
648=cut
649
650sub num_warnings {
651 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
652}
653
654##################################
655
48f30392
GS
656=item C<$checker-E<gt>name()>
657
658Set (if argument specified) and retrieve the canonical name of POD as
659found in the C<=head1 NAME> section.
660
661=cut
662
e2c3adef
GS
663sub name {
664 return (@_ > 1 && $_[1]) ?
665 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
666}
667
48f30392
GS
668##################################
669
670=item C<$checker-E<gt>node()>
671
672Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
673and C<=item>) of the current POD. The nodes are returned in the order of
fb8eeed8 674their occurrence. They consist of plain text, each piece of whitespace is
48f30392
GS
675collapsed to a single blank.
676
677=cut
678
e2c3adef
GS
679sub node {
680 my ($self,$text) = @_;
681 if(defined $text) {
66aff6dd
GS
682 $text =~ s/\s+$//s; # strip trailing whitespace
683 $text =~ s/\s+/ /gs; # collapse whitespace
684 # add node, order important!
e2c3adef 685 push(@{$self->{_nodes}}, $text);
66aff6dd 686 # keep also a uniqueness counter
48f30392 687 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
e2c3adef
GS
688 return $text;
689 }
690 @{$self->{_nodes}};
691}
692
48f30392
GS
693##################################
694
695=item C<$checker-E<gt>idx()>
696
697Add (if argument specified) and retrieve the index entries (as defined by
698C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
699of whitespace is collapsed to a single blank.
700
701=cut
702
703# set/return index entries of current POD
704sub idx {
705 my ($self,$text) = @_;
706 if(defined $text) {
707 $text =~ s/\s+$//s; # strip trailing whitespace
708 $text =~ s/\s+/ /gs; # collapse whitespace
709 # add node, order important!
710 push(@{$self->{_index}}, $text);
711 # keep also a uniqueness counter
712 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
713 return $text;
714 }
715 @{$self->{_index}};
716}
717
718##################################
719
720=item C<$checker-E<gt>hyperlink()>
721
722Add (if argument specified) and retrieve the hyperlinks (as defined by
d1be9408 723C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
48f30392
GS
724number and C<Pod::Hyperlink> object.
725
726=back
727
728=cut
729
e2c3adef
GS
730# set/return hyperlinks of the current POD
731sub hyperlink {
732 my $self = shift;
733 if($_[0]) {
734 push(@{$self->{_links}}, $_[0]);
735 return $_[0];
736 }
737 @{$self->{_links}};
738}
739
e3237417
GS
740## overrides for Pod::Parser
741
360aca43 742sub end_pod {
66aff6dd
GS
743 ## Do some final checks and
744 ## print the number of errors found
745 my $self = shift;
746 my $infile = $self->input_file();
66aff6dd
GS
747
748 if(@{$self->{_list_stack}}) {
66aff6dd
GS
749 my $list;
750 while(($list = $self->_close_list('EOF',$infile)) &&
751 $list->indent() ne 'auto') {
752 $self->poderror({ -line => 'EOF', -file => $infile,
753 -severity => 'ERROR', -msg => "=over on line " .
754 $list->start() . " without closing =back" }); #"
755 }
756 }
757
758 # check validity of document internal hyperlinks
759 # first build the node names from the paragraph text
760 my %nodes;
761 foreach($self->node()) {
762 $nodes{$_} = 1;
92e3d63a 763 if(/^(\S+)\s+\S/) {
66aff6dd
GS
764 # we have more than one word. Use the first as a node, too.
765 # This is used heavily in perlfunc.pod
766 $nodes{$1} ||= 2; # derived node
767 }
768 }
92e3d63a
JH
769 foreach($self->idx()) {
770 $nodes{$_} = 3; # index node
771 }
66aff6dd 772 foreach($self->hyperlink()) {
48f30392
GS
773 my ($line,$link) = @$_;
774 # _TODO_ what if there is a link to the page itself by the name,
775 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
776 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
777 my $node = $self->_check_ptree($self->parse_text($link->node(),
778 $line), $line, $infile, 'L');
779 if($node && !$nodes{$node}) {
780 $self->poderror({ -line => $line || '', -file => $infile,
781 -severity => 'ERROR',
782 -msg => "unresolved internal link '$node'"});
783 }
66aff6dd
GS
784 }
785 }
48f30392
GS
786
787 # check the internal nodes for uniqueness. This pertains to
788 # =headX, =item and X<...>
66aff6dd
GS
789 foreach(grep($self->{_unique_nodes}->{$_} > 1,
790 keys %{$self->{_unique_nodes}})) {
791 $self->poderror({ -line => '-', -file => $infile,
792 -severity => 'WARNING',
fb8eeed8 793 -msg => "multiple occurrence of link target '$_'"});
66aff6dd
GS
794 }
795
c23d1eb0
MR
796 # no POD found here
797 $self->num_errors(-1) if($self->{_commands} == 0);
360aca43
GS
798}
799
e2c3adef 800# check a POD command directive
360aca43 801sub command {
664bb207 802 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
360aca43 803 my ($file, $line) = $pod_para->file_line;
360aca43 804 ## Check the command syntax
e3237417 805 my $arg; # this will hold the command argument
664bb207
GS
806 if (! $VALID_COMMANDS{$cmd}) {
807 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
66aff6dd 808 -msg => "Unknown command '$cmd'" });
360aca43 809 }
92e3d63a
JH
810 else { # found a valid command
811 $self->{_commands}++; # delete this line if below is enabled again
812
813 ##### following check disabled due to strong request
814 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
815 # $self->poderror({ -line => $line, -file => $file,
816 # -severity => 'WARNING',
817 # -msg => "file does not start with =head" });
818 #}
819
820 # check syntax of particular command
e3237417 821 if($cmd eq 'over') {
e2c3adef
GS
822 # check for argument
823 $arg = $self->interpolate_and_check($paragraph, $line,$file);
824 my $indent = 4; # default
825 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
826 $indent = $1;
e2c3adef 827 }
e3237417 828 # start a new list
66aff6dd 829 $self->_open_list($indent,$line,$file);
e3237417
GS
830 }
831 elsif($cmd eq 'item') {
e2c3adef 832 # are we in a list?
e3237417
GS
833 unless(@{$self->{_list_stack}}) {
834 $self->poderror({ -line => $line, -file => $file,
835 -severity => 'ERROR',
836 -msg => "=item without previous =over" });
e2c3adef 837 # auto-open in case we encounter many more
66aff6dd
GS
838 $self->_open_list('auto',$line,$file);
839 }
840 my $list = $self->{_list_stack}->[0];
841 # check whether the previous item had some contents
842 if(defined $self->{_list_item_contents} &&
843 $self->{_list_item_contents} == 0) {
844 $self->poderror({ -line => $line, -file => $file,
845 -severity => 'WARNING',
846 -msg => "previous =item has no contents" });
847 }
848 if($list->{_has_par}) {
849 $self->poderror({ -line => $line, -file => $file,
850 -severity => 'WARNING',
851 -msg => "preceding non-item paragraph(s)" });
852 delete $list->{_has_par};
e3237417 853 }
e2c3adef
GS
854 # check for argument
855 $arg = $self->interpolate_and_check($paragraph, $line, $file);
66aff6dd
GS
856 if($arg && $arg =~ /(\S+)/) {
857 $arg =~ s/[\s\n]+$//;
858 my $type;
859 if($arg =~ /^[*]\s*(\S*.*)/) {
860 $type = 'bullet';
861 $self->{_list_item_contents} = $1 ? 1 : 0;
862 $arg = $1;
863 }
864 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
865 $type = 'number';
866 $self->{_list_item_contents} = $1 ? 1 : 0;
867 $arg = $1;
868 }
869 else {
870 $type = 'definition';
871 $self->{_list_item_contents} = 1;
872 }
873 my $first = $list->type();
874 if($first && $first ne $type) {
875 $self->poderror({ -line => $line, -file => $file,
876 -severity => 'WARNING',
877 -msg => "=item type mismatch ('$first' vs. '$type')"});
878 }
879 else { # first item
880 $list->type($type);
881 }
882 }
883 else {
e2c3adef
GS
884 $self->poderror({ -line => $line, -file => $file,
885 -severity => 'WARNING',
886 -msg => "No argument for =item" });
887 $arg = ' '; # empty
66aff6dd 888 $self->{_list_item_contents} = 0;
e3237417 889 }
e2c3adef 890 # add this item
66aff6dd 891 $list->item($arg);
e2c3adef
GS
892 # remember this node
893 $self->node($arg);
e3237417
GS
894 }
895 elsif($cmd eq 'back') {
896 # check if we have an open list
897 unless(@{$self->{_list_stack}}) {
898 $self->poderror({ -line => $line, -file => $file,
899 -severity => 'ERROR',
900 -msg => "=back without previous =over" });
901 }
902 else {
903 # check for spurious characters
e2c3adef 904 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
905 if($arg && $arg =~ /\S/) {
906 $self->poderror({ -line => $line, -file => $file,
66aff6dd 907 -severity => 'ERROR',
e3237417
GS
908 -msg => "Spurious character(s) after =back" });
909 }
910 # close list
66aff6dd 911 my $list = $self->_close_list($line,$file);
e3237417
GS
912 # check for empty lists
913 if(!$list->item() && $self->{-warnings}) {
914 $self->poderror({ -line => $line, -file => $file,
915 -severity => 'WARNING',
916 -msg => "No items in =over (at line " .
917 $list->start() . ") / =back list"}); #"
918 }
919 }
920 }
66aff6dd 921 elsif($cmd =~ /^head(\d+)/) {
9c6ed6d7
HS
922 my $hnum = $1;
923 $self->{"_have_head_$hnum"}++; # count head types
924 if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) {
925 $self->poderror({ -line => $line, -file => $file,
926 -severity => 'WARNING',
927 -msg => "=head$hnum without preceding higher level"});
928 }
48f30392 929 # check whether the previous =head section had some contents
66aff6dd
GS
930 if(defined $self->{_commands_in_head} &&
931 $self->{_commands_in_head} == 0 &&
932 defined $self->{_last_head} &&
9c6ed6d7 933 $self->{_last_head} >= $hnum) {
66aff6dd
GS
934 $self->poderror({ -line => $line, -file => $file,
935 -severity => 'WARNING',
936 -msg => "empty section in previous paragraph"});
937 }
938 $self->{_commands_in_head} = -1;
9c6ed6d7 939 $self->{_last_head} = $hnum;
e3237417
GS
940 # check if there is an open list
941 if(@{$self->{_list_stack}}) {
942 my $list;
66aff6dd
GS
943 while(($list = $self->_close_list($line,$file)) &&
944 $list->indent() ne 'auto') {
e3237417
GS
945 $self->poderror({ -line => $line, -file => $file,
946 -severity => 'ERROR',
e2c3adef
GS
947 -msg => "=over on line ". $list->start() .
948 " without closing =back (at $cmd)" });
e3237417
GS
949 }
950 }
951 # remember this node
e2c3adef 952 $arg = $self->interpolate_and_check($paragraph, $line,$file);
66aff6dd
GS
953 $arg =~ s/[\s\n]+$//s;
954 $self->node($arg);
955 unless(length($arg)) {
956 $self->poderror({ -line => $line, -file => $file,
957 -severity => 'ERROR',
958 -msg => "empty =$cmd"});
959 }
e2c3adef 960 if($cmd eq 'head1') {
e2c3adef
GS
961 $self->{_current_head1} = $arg;
962 } else {
963 $self->{_current_head1} = '';
964 }
e3237417
GS
965 }
966 elsif($cmd eq 'begin') {
967 if($self->{_have_begin}) {
968 # already have a begin
969 $self->poderror({ -line => $line, -file => $file,
970 -severity => 'ERROR',
971 -msg => "Nested =begin's (first at line " .
972 $self->{_have_begin} . ")"});
973 }
974 else {
975 # check for argument
e2c3adef 976 $arg = $self->interpolate_and_check($paragraph, $line,$file);
e3237417
GS
977 unless($arg && $arg =~ /(\S+)/) {
978 $self->poderror({ -line => $line, -file => $file,
e2c3adef 979 -severity => 'ERROR',
e3237417
GS
980 -msg => "No argument for =begin"});
981 }
982 # remember the =begin
983 $self->{_have_begin} = "$line:$1";
984 }
985 }
986 elsif($cmd eq 'end') {
987 if($self->{_have_begin}) {
988 # close the existing =begin
989 $self->{_have_begin} = '';
990 # check for spurious characters
e2c3adef
GS
991 $arg = $self->interpolate_and_check($paragraph, $line,$file);
992 # the closing argument is optional
993 #if($arg && $arg =~ /\S/) {
994 # $self->poderror({ -line => $line, -file => $file,
995 # -severity => 'WARNING',
996 # -msg => "Spurious character(s) after =end" });
997 #}
e3237417
GS
998 }
999 else {
1000 # don't have a matching =begin
1001 $self->poderror({ -line => $line, -file => $file,
e2c3adef 1002 -severity => 'ERROR',
e3237417
GS
1003 -msg => "=end without =begin" });
1004 }
1005 }
e2c3adef
GS
1006 elsif($cmd eq 'for') {
1007 unless($paragraph =~ /\s*(\S+)\s*/) {
1008 $self->poderror({ -line => $line, -file => $file,
1009 -severity => 'ERROR',
1010 -msg => "=for without formatter specification" });
1011 }
1012 $arg = ''; # do not expand paragraph below
1013 }
66aff6dd
GS
1014 elsif($cmd =~ /^(pod|cut)$/) {
1015 # check for argument
1016 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1017 if($arg && $arg =~ /(\S+)/) {
1018 $self->poderror({ -line => $line, -file => $file,
1019 -severity => 'ERROR',
1020 -msg => "Spurious text after =$cmd"});
1021 }
1022 }
1023 $self->{_commands_in_head}++;
e3237417 1024 ## Check the interior sequences in the command-text
e2c3adef 1025 $self->interpolate_and_check($paragraph, $line,$file)
e3237417 1026 unless(defined $arg);
e2c3adef 1027 }
360aca43
GS
1028}
1029
66aff6dd
GS
1030sub _open_list
1031{
1032 my ($self,$indent,$line,$file) = @_;
1033 my $list = Pod::List->new(
1034 -indent => $indent,
1035 -start => $line,
1036 -file => $file);
1037 unshift(@{$self->{_list_stack}}, $list);
1038 undef $self->{_list_item_contents};
1039 $list;
1040}
1041
1042sub _close_list
1043{
1044 my ($self,$line,$file) = @_;
1045 my $list = shift(@{$self->{_list_stack}});
1046 if(defined $self->{_list_item_contents} &&
1047 $self->{_list_item_contents} == 0) {
1048 $self->poderror({ -line => $line, -file => $file,
1049 -severity => 'WARNING',
1050 -msg => "previous =item has no contents" });
1051 }
1052 undef $self->{_list_item_contents};
1053 $list;
1054}
1055
e2c3adef
GS
1056# process a block of some text
1057sub interpolate_and_check {
e3237417
GS
1058 my ($self, $paragraph, $line, $file) = @_;
1059 ## Check the interior sequences in the command-text
1060 # and return the text
1061 $self->_check_ptree(
1062 $self->parse_text($paragraph,$line), $line, $file, '');
1063}
1064
1065sub _check_ptree {
1066 my ($self,$ptree,$line,$file,$nestlist) = @_;
1067 local($_);
1068 my $text = '';
1069 # process each node in the parse tree
1070 foreach(@$ptree) {
1071 # regular text chunk
1072 unless(ref) {
e3237417 1073 # count the unescaped angle brackets
92e3d63a 1074 # complain only when warning level is greater than 1
c23d1eb0
MR
1075 if($self->{-warnings} && $self->{-warnings}>1) {
1076 my $count;
1077 if($count = tr/<>/<>/) {
e3237417
GS
1078 $self->poderror({ -line => $line, -file => $file,
1079 -severity => 'WARNING',
c23d1eb0
MR
1080 -msg => "$count unescaped <> in paragraph" });
1081 }
e3237417 1082 }
c23d1eb0 1083 $text .= $_;
e3237417
GS
1084 next;
1085 }
1086 # have an interior sequence
1087 my $cmd = $_->cmd_name();
1088 my $contents = $_->parse_tree();
1089 ($file,$line) = $_->file_line();
1090 # check for valid tag
1091 if (! $VALID_SEQUENCES{$cmd}) {
1092 $self->poderror({ -line => $line, -file => $file,
1093 -severity => 'ERROR',
66aff6dd 1094 -msg => qq(Unknown interior-sequence '$cmd')});
e3237417
GS
1095 # expand it anyway
1096 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1097 next;
1098 }
1099 if($nestlist =~ /$cmd/) {
1100 $self->poderror({ -line => $line, -file => $file,
1101 -severity => 'ERROR',
1102 -msg => "nested commands $cmd<...$cmd<...>...>"});
1103 # _TODO_ should we add the contents anyway?
1104 # expand it anyway, see below
1105 }
1106 if($cmd eq 'E') {
1107 # preserve entities
1108 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1109 $self->poderror({ -line => $line, -file => $file,
1110 -severity => 'ERROR',
1111 -msg => "garbled entity " . $_->raw_text()});
1112 next;
1113 }
e2c3adef 1114 my $ent = $$contents[0];
66aff6dd
GS
1115 my $val;
1116 if($ent =~ /^0x[0-9a-f]+$/i) {
1117 # hexadec entity
1118 $val = hex($ent);
1119 }
1120 elsif($ent =~ /^0\d+$/) {
1121 # octal
1122 $val = oct($ent);
1123 }
1124 elsif($ent =~ /^\d+$/) {
e2c3adef 1125 # numeric entity
66aff6dd
GS
1126 $val = $ent;
1127 }
1128 if(defined $val) {
1129 if($val>0 && $val<256) {
1130 $text .= chr($val);
1131 }
1132 else {
1133 $self->poderror({ -line => $line, -file => $file,
1134 -severity => 'ERROR',
1135 -msg => "Entity number out of range " . $_->raw_text()});
1136 }
e2c3adef
GS
1137 }
1138 elsif($ENTITIES{$ent}) {
1139 # known ISO entity
1140 $text .= $ENTITIES{$ent};
1141 }
1142 else {
1143 $self->poderror({ -line => $line, -file => $file,
1144 -severity => 'WARNING',
66aff6dd 1145 -msg => "Unknown entity " . $_->raw_text()});
e2c3adef
GS
1146 $text .= "E<$ent>";
1147 }
e3237417
GS
1148 }
1149 elsif($cmd eq 'L') {
1150 # try to parse the hyperlink
1151 my $link = Pod::Hyperlink->new($contents->raw_text());
1152 unless(defined $link) {
1153 $self->poderror({ -line => $line, -file => $file,
1154 -severity => 'ERROR',
e2c3adef 1155 -msg => "malformed link " . $_->raw_text() ." : $@"});
e3237417
GS
1156 next;
1157 }
1158 $link->line($line); # remember line
1159 if($self->{-warnings}) {
1160 foreach my $w ($link->warning()) {
1161 $self->poderror({ -line => $line, -file => $file,
1162 -severity => 'WARNING',
1163 -msg => $w });
1164 }
1165 }
1166 # check the link text
1167 $text .= $self->_check_ptree($self->parse_text($link->text(),
1168 $line), $line, $file, "$nestlist$cmd");
48f30392
GS
1169 # remember link
1170 $self->hyperlink([$line,$link]);
e3237417
GS
1171 }
1172 elsif($cmd =~ /[BCFIS]/) {
1173 # add the guts
1174 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1175 }
66aff6dd
GS
1176 elsif($cmd eq 'Z') {
1177 if(length($contents->raw_text())) {
1178 $self->poderror({ -line => $line, -file => $file,
1179 -severity => 'ERROR',
1180 -msg => "Nonempty Z<>"});
1181 }
1182 }
48f30392
GS
1183 elsif($cmd eq 'X') {
1184 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1185 if($idx =~ /^\s*$/s) {
1186 $self->poderror({ -line => $line, -file => $file,
1187 -severity => 'ERROR',
1188 -msg => "Empty X<>"});
1189 }
1190 else {
1191 # remember this node
1192 $self->idx($idx);
1193 }
1194 }
1195 else {
1196 # not reached
1197 die "internal error";
e3237417
GS
1198 }
1199 }
1200 $text;
1201}
1202
e2c3adef 1203# process a block of verbatim text
360aca43 1204sub verbatim {
66aff6dd 1205 ## Nothing particular to check
e2c3adef 1206 my ($self, $paragraph, $line_num, $pod_para) = @_;
66aff6dd
GS
1207
1208 $self->_preproc_par($paragraph);
1209
e2c3adef
GS
1210 if($self->{_current_head1} eq 'NAME') {
1211 my ($file, $line) = $pod_para->file_line;
1212 $self->poderror({ -line => $line, -file => $file,
1213 -severity => 'WARNING',
1214 -msg => 'Verbatim paragraph in NAME section' });
1215 }
360aca43
GS
1216}
1217
e2c3adef 1218# process a block of regular text
360aca43
GS
1219sub textblock {
1220 my ($self, $paragraph, $line_num, $pod_para) = @_;
e3237417 1221 my ($file, $line) = $pod_para->file_line;
e3237417 1222
66aff6dd
GS
1223 $self->_preproc_par($paragraph);
1224
e2c3adef
GS
1225 # skip this paragraph if in a =begin block
1226 unless($self->{_have_begin}) {
1227 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1228 if($self->{_current_head1} eq 'NAME') {
1229 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1230 # this is the canonical name
1231 $self->{-name} = $1 unless(defined $self->{-name});
1232 }
e3237417
GS
1233 }
1234 }
e3237417
GS
1235}
1236
66aff6dd
GS
1237sub _preproc_par
1238{
1239 my $self = shift;
1240 $_[0] =~ s/[\s\n]+$//;
1241 if($_[0]) {
1242 $self->{_commands_in_head}++;
1243 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1244 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1245 $self->{_list_stack}->[0]->{_has_par} = 1;
1246 }
1247 }
1248}
1249
e3237417 12501;
66aff6dd 1251
48f30392
GS
1252__END__
1253
1254=head1 AUTHOR
1255
aaa799f9
NC
1256Please report bugs using L<http://rt.cpan.org>.
1257
48f30392 1258Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
aaa799f9 1259Marek Rouchal E<lt>marekr@cpan.orgE<gt>
48f30392
GS
1260
1261Based on code for B<Pod::Text::pod2text()> written by
1262Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1263
1264=cut
1265