This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade Test-Harness from version 3.39 to 3.41
[perl5.git] / cpan / Test-Harness / lib / TAP / Parser / Grammar.pm
CommitLineData
b965d173
NC
1package TAP::Parser::Grammar;
2
3use strict;
befb5359 4use warnings;
b965d173 5
f7c69158 6use TAP::Parser::ResultFactory ();
b965d173
NC
7use TAP::Parser::YAMLish::Reader ();
8
406e3fef 9use base 'TAP::Object';
f7c69158 10
b965d173
NC
11=head1 NAME
12
13TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
14
15=head1 VERSION
16
158ffeeb 17Version 3.42
b965d173
NC
18
19=cut
20
158ffeeb 21our $VERSION = '3.42';
f7c69158
NC
22
23=head1 SYNOPSIS
24
25 use TAP::Parser::Grammar;
26 my $grammar = $self->make_grammar({
6d313664
CBW
27 iterator => $tap_parser_iterator,
28 parser => $tap_parser,
29 version => 12,
f7c69158
NC
30 });
31
32 my $result = $grammar->tokenize;
b965d173
NC
33
34=head1 DESCRIPTION
35
6d313664
CBW
36C<TAP::Parser::Grammar> tokenizes lines from a L<TAP::Parser::Iterator> and
37constructs L<TAP::Parser::Result> subclasses to represent the tokens.
b965d173
NC
38
39Do not attempt to use this class directly. It won't make sense. It's mainly
40here to ensure that we will be able to have pluggable grammars when TAP is
41expanded at some future date (plus, this stuff was really cluttering the
42parser).
43
f7c69158 44=head1 METHODS
b965d173
NC
45
46=head2 Class Methods
47
b965d173
NC
48=head3 C<new>
49
f7c69158 50 my $grammar = TAP::Parser::Grammar->new({
6d313664
CBW
51 iterator => $iterator,
52 parser => $parser,
53 version => $version,
f7c69158 54 });
b965d173 55
6d313664
CBW
56Returns L<TAP::Parser> grammar object that will parse the TAP stream from the
57specified iterator. Both C<iterator> and C<parser> are required arguments.
58If C<version> is not set it defaults to C<12> (see L</set_version> for more
59details).
b965d173
NC
60
61=cut
62
f7c69158
NC
63# new() implementation supplied by TAP::Object
64sub _initialize {
65 my ( $self, $args ) = @_;
6d313664
CBW
66 $self->{iterator} = $args->{iterator}; # TODO: accessor
67 $self->{iterator} ||= $args->{stream}; # deprecated
68 $self->{parser} = $args->{parser}; # TODO: accessor
f7c69158 69 $self->set_version( $args->{version} || 12 );
b965d173
NC
70 return $self;
71}
72
73my %language_for;
74
75{
76
77 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
78 my $ok = qr/(?:not )?ok\b/;
79 my $num = qr/\d+/;
80
81 my %v12 = (
82 version => {
83 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
84 handler => sub {
85 my ( $self, $line ) = @_;
86 my $version = $1;
87 return $self->_make_version_token( $line, $version, );
88 },
89 },
90 plan => {
91 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
92 handler => sub {
93 my ( $self, $line ) = @_;
94 my ( $tests_planned, $tail ) = ( $1, $2 );
95 my $explanation = undef;
96 my $skip = '';
97
98 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
99 my @todo = split /\s+/, _trim($1);
100 return $self->_make_plan_token(
101 $line, $tests_planned, 'TODO',
102 '', \@todo
103 );
104 }
105 elsif ( 0 == $tests_planned ) {
f7c69158 106 $skip = 'SKIP';
b965d173 107
f7c69158 108 # If we can't match # SKIP the directive should be undef.
a39e16d8 109 ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
b965d173
NC
110 }
111 elsif ( $tail !~ /^\s*$/ ) {
112 return $self->_make_unknown_token($line);
113 }
114
115 $explanation = '' unless defined $explanation;
116
117 return $self->_make_plan_token(
118 $line, $tests_planned, $skip,
119 $explanation, []
120 );
121
122 },
123 },
124
125 # An optimization to handle the most common test lines without
126 # directives.
127 simple_test => {
128 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
129 handler => sub {
130 my ( $self, $line ) = @_;
131 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
132
133 return $self->_make_test_token(
134 $line, $ok, $num,
135 $desc
136 );
137 },
138 },
139 test => {
140 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
141 handler => sub {
142 my ( $self, $line ) = @_;
143 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
144 my ( $dir, $explanation ) = ( '', '' );
145 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
146 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
147 )
148 {
149 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
150 }
151 return $self->_make_test_token(
27fc0087
NC
152 $line, $ok, $num, $desc,
153 $dir, $explanation
b965d173
NC
154 );
155 },
156 },
157 comment => {
158 syntax => qr/^#(.*)/,
159 handler => sub {
160 my ( $self, $line ) = @_;
161 my $comment = $1;
162 return $self->_make_comment_token( $line, $comment );
163 },
164 },
165 bailout => {
686add28 166 syntax => qr/^\s*Bail out!\s*(.*)/,
b965d173
NC
167 handler => sub {
168 my ( $self, $line ) = @_;
169 my $explanation = $1;
170 return $self->_make_bailout_token(
171 $line,
172 $explanation
173 );
174 },
175 },
176 );
177
178 my %v13 = (
179 %v12,
180 plan => {
8fe9e9ec 181 syntax => qr/^1\.\.(\d+)\s*(?:\s*#\s*SKIP\b(.*))?\z/i,
b965d173
NC
182 handler => sub {
183 my ( $self, $line ) = @_;
184 my ( $tests_planned, $explanation ) = ( $1, $2 );
185 my $skip
186 = ( 0 == $tests_planned || defined $explanation )
187 ? 'SKIP'
188 : '';
189 $explanation = '' unless defined $explanation;
190 return $self->_make_plan_token(
191 $line, $tests_planned, $skip,
192 $explanation, []
193 );
194 },
195 },
196 yaml => {
197 syntax => qr/^ (\s+) (---.*) $/x,
198 handler => sub {
199 my ( $self, $line ) = @_;
200 my ( $pad, $marker ) = ( $1, $2 );
201 return $self->_make_yaml_token( $pad, $marker );
202 },
203 },
2a7f4b9b
SP
204 pragma => {
205 syntax =>
206 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
207 handler => sub {
208 my ( $self, $line ) = @_;
209 my $pragmas = $1;
210 return $self->_make_pragma_token( $line, $pragmas );
211 },
212 },
b965d173
NC
213 );
214
215 %language_for = (
216 '12' => {
217 tokens => \%v12,
218 },
219 '13' => {
220 tokens => \%v13,
221 setup => sub {
6d313664 222 shift->{iterator}->handle_unicode;
b965d173
NC
223 },
224 },
225 );
226}
227
228##############################################################################
229
230=head2 Instance Methods
231
232=head3 C<set_version>
233
234 $grammar->set_version(13);
235
236Tell the grammar which TAP syntax version to support. The lowest
237supported version is 12. Although 'TAP version' isn't valid version 12
238syntax it is accepted so that higher version numbers may be parsed.
239
240=cut
241
242sub set_version {
243 my $self = shift;
244 my $version = shift;
245
246 if ( my $language = $language_for{$version} ) {
f7c69158
NC
247 $self->{version} = $version;
248 $self->{tokens} = $language->{tokens};
b965d173
NC
249
250 if ( my $setup = $language->{setup} ) {
251 $self->$setup();
252 }
253
254 $self->_order_tokens;
255 }
256 else {
257 require Carp;
258 Carp::croak("Unsupported syntax version: $version");
259 }
260}
261
262# Optimization to put the most frequent tokens first.
263sub _order_tokens {
264 my $self = shift;
265
266 my %copy = %{ $self->{tokens} };
267 my @ordered_tokens = grep {defined}
268 map { delete $copy{$_} } qw( simple_test test comment plan );
269 push @ordered_tokens, values %copy;
270
271 $self->{ordered_tokens} = \@ordered_tokens;
272}
273
274##############################################################################
275
276=head3 C<tokenize>
277
278 my $token = $grammar->tokenize;
279
280This method will return a L<TAP::Parser::Result> object representing the
281current line of TAP.
282
283=cut
284
285sub tokenize {
286 my $self = shift;
287
6d313664 288 my $line = $self->{iterator}->next;
f7c69158
NC
289 unless ( defined $line ) {
290 delete $self->{parser}; # break circular ref
291 return;
292 }
b965d173
NC
293
294 my $token;
295
6d313664 296 for my $token_data ( @{ $self->{ordered_tokens} } ) {
b965d173
NC
297 if ( $line =~ $token_data->{syntax} ) {
298 my $handler = $token_data->{handler};
299 $token = $self->$handler($line);
300 last;
301 }
302 }
303
304 $token = $self->_make_unknown_token($line) unless $token;
305
f7c69158 306 return $self->{parser}->make_result($token);
b965d173
NC
307}
308
309##############################################################################
310
311=head3 C<token_types>
312
313 my @types = $grammar->token_types;
314
315Returns the different types of tokens which this grammar can parse.
316
317=cut
318
319sub token_types {
320 my $self = shift;
321 return keys %{ $self->{tokens} };
322}
323
324##############################################################################
325
326=head3 C<syntax_for>
327
328 my $syntax = $grammar->syntax_for($token_type);
329
330Returns a pre-compiled regular expression which will match a chunk of TAP
331corresponding to the token type. For example (not that you should really pay
332attention to this, C<< $grammar->syntax_for('comment') >> will return
333C<< qr/^#(.*)/ >>.
334
335=cut
336
337sub syntax_for {
338 my ( $self, $type ) = @_;
339 return $self->{tokens}->{$type}->{syntax};
340}
341
342##############################################################################
343
344=head3 C<handler_for>
345
346 my $handler = $grammar->handler_for($token_type);
347
348Returns a code reference which, when passed an appropriate line of TAP,
349returns the lexed token corresponding to that line. As a result, the basic
350TAP parsing loop looks similar to the following:
351
352 my @tokens;
353 my $grammar = TAP::Grammar->new;
354 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
6d313664 355 for my $type ( $grammar->token_types ) {
b965d173
NC
356 my $syntax = $grammar->syntax_for($type);
357 if ( $line =~ $syntax ) {
358 my $handler = $grammar->handler_for($type);
359 push @tokens => $grammar->$handler($line);
360 next LINE;
361 }
362 }
363 push @tokens => $grammar->_make_unknown_token($line);
364 }
365
366=cut
367
368sub handler_for {
369 my ( $self, $type ) = @_;
370 return $self->{tokens}->{$type}->{handler};
371}
372
373sub _make_version_token {
374 my ( $self, $line, $version ) = @_;
375 return {
376 type => 'version',
377 raw => $line,
378 version => $version,
379 };
380}
381
382sub _make_plan_token {
383 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
384
f7c69158
NC
385 if ( $directive eq 'SKIP'
386 && 0 != $tests_planned
387 && $self->{version} < 13 )
388 {
b965d173
NC
389 warn
390 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
391 }
f7c69158 392
b965d173
NC
393 return {
394 type => 'plan',
395 raw => $line,
396 tests_planned => $tests_planned,
397 directive => $directive,
398 explanation => _trim($explanation),
399 todo_list => $todo,
400 };
401}
402
403sub _make_test_token {
404 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
27fc0087 405 return {
b965d173 406 ok => $ok,
dbd04185
NC
407
408 # forcing this to be an integer (and not a string) reduces memory
409 # consumption. RT #84939
410 test_num => ( defined $num ? 0 + $num : undef ),
b965d173 411 description => _trim($desc),
27fc0087 412 directive => ( defined $dir ? uc $dir : '' ),
b965d173
NC
413 explanation => _trim($explanation),
414 raw => $line,
415 type => 'test',
27fc0087 416 };
b965d173
NC
417}
418
419sub _make_unknown_token {
420 my ( $self, $line ) = @_;
421 return {
422 raw => $line,
423 type => 'unknown',
424 };
425}
426
427sub _make_comment_token {
428 my ( $self, $line, $comment ) = @_;
429 return {
430 type => 'comment',
431 raw => $line,
432 comment => _trim($comment)
433 };
434}
435
436sub _make_bailout_token {
437 my ( $self, $line, $explanation ) = @_;
438 return {
439 type => 'bailout',
440 raw => $line,
441 bailout => _trim($explanation)
442 };
443}
444
445sub _make_yaml_token {
446 my ( $self, $pad, $marker ) = @_;
447
448 my $yaml = TAP::Parser::YAMLish::Reader->new;
449
6d313664 450 my $iterator = $self->{iterator};
b965d173
NC
451
452 # Construct a reader that reads from our input stripping leading
453 # spaces from each line.
454 my $leader = length($pad);
455 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
456 my @extra = ($marker);
457 my $reader = sub {
458 return shift @extra if @extra;
6d313664 459 my $line = $iterator->next;
b965d173
NC
460 return $2 if $line =~ $strip;
461 return;
462 };
463
464 my $data = $yaml->read($reader);
465
466 # Reconstitute input. This is convoluted. Maybe we should just
467 # record it on the way in...
468 chomp( my $raw = $yaml->get_raw );
469 $raw =~ s/^/$pad/mg;
470
471 return {
472 type => 'yaml',
473 raw => $raw,
474 data => $data
475 };
476}
477
2a7f4b9b
SP
478sub _make_pragma_token {
479 my ( $self, $line, $pragmas ) = @_;
480 return {
481 type => 'pragma',
482 raw => $line,
483 pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
484 };
485}
486
b965d173
NC
487sub _trim {
488 my $data = shift;
489
490 return '' unless defined $data;
491
492 $data =~ s/^\s+//;
493 $data =~ s/\s+$//;
494 return $data;
495}
496
f7c69158
NC
4971;
498
b965d173
NC
499=head1 TAP GRAMMAR
500
501B<NOTE:> This grammar is slightly out of date. There's still some discussion
502about it and a new one will be provided when we have things better defined.
503
504The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
505stream-based protocol. In fact, it's quite legal to have an infinite stream.
506For the same reason that we don't apply regexes to streams, we're not using a
507formal grammar here. Instead, we parse the TAP in lines.
508
686add28 509For purposes for forward compatibility, any result which does not match the
b965d173
NC
510following grammar is currently referred to as
511L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
512
513A formal grammar would look similar to the following:
514
515 (*
516 For the time being, I'm cheating on the EBNF by allowing
517 certain terms to be defined by POSIX character classes by
518 using the following syntax:
519
520 digit ::= [:digit:]
521
522 As far as I am aware, that's not valid EBNF. Sue me. I
523 didn't know how to write "char" otherwise (Unicode issues).
524 Suggestions welcome.
525 *)
526
527 tap ::= version? { comment | unknown } leading_plan lines
528 |
529 lines trailing_plan {comment}
530
531 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
532
533 leading_plan ::= plan skip_directive? "\n"
534
535 trailing_plan ::= plan "\n"
536
537 plan ::= '1..' nonNegativeInteger
538
539 lines ::= line {line}
540
541 line ::= (comment | test | unknown | bailout ) "\n"
542
543 test ::= status positiveInteger? description? directive?
544
545 status ::= 'not '? 'ok '
546
547 description ::= (character - (digit | '#')) {character - '#'}
548
549 directive ::= todo_directive | skip_directive
550
551 todo_directive ::= hash_mark 'TODO' ' ' {character}
552
553 skip_directive ::= hash_mark 'SKIP' ' ' {character}
554
555 comment ::= hash_mark {character}
556
557 hash_mark ::= '#' {' '}
558
559 bailout ::= 'Bail out!' {character}
560
561 unknown ::= { (character - "\n") }
562
563 (* POSIX character classes and other terminals *)
564
565 digit ::= [:digit:]
566 character ::= ([:print:] - "\n")
567 positiveInteger ::= ( digit - '0' ) {digit}
568 nonNegativeInteger ::= digit {digit}
569
f7c69158 570=head1 SUBCLASSING
b965d173 571
f7c69158 572Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
b965d173 573
f7c69158
NC
574If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
575do is read through the code. There's no easy way of summarizing it here.
576
577=head1 SEE ALSO
578
579L<TAP::Object>,
580L<TAP::Parser>,
581L<TAP::Parser::Iterator>,
582L<TAP::Parser::Result>,
583
584=cut