Commit | Line | Data |
---|---|---|
b965d173 NC |
1 | package TAP::Parser::Grammar; |
2 | ||
3 | use strict; | |
befb5359 | 4 | use warnings; |
b965d173 | 5 | |
f7c69158 | 6 | use TAP::Parser::ResultFactory (); |
b965d173 NC |
7 | use TAP::Parser::YAMLish::Reader (); |
8 | ||
406e3fef | 9 | use base 'TAP::Object'; |
f7c69158 | 10 | |
b965d173 NC |
11 | =head1 NAME |
12 | ||
13 | TAP::Parser::Grammar - A grammar for the Test Anything Protocol. | |
14 | ||
15 | =head1 VERSION | |
16 | ||
158ffeeb | 17 | Version 3.42 |
b965d173 NC |
18 | |
19 | =cut | |
20 | ||
158ffeeb | 21 | our $VERSION = '3.42'; |
f7c69158 NC |
22 | |
23 | =head1 SYNOPSIS | |
24 | ||
25 | use TAP::Parser::Grammar; | |
26 | my $grammar = $self->make_grammar({ | |
6d313664 CBW |
27 | iterator => $tap_parser_iterator, |
28 | parser => $tap_parser, | |
29 | version => 12, | |
f7c69158 NC |
30 | }); |
31 | ||
32 | my $result = $grammar->tokenize; | |
b965d173 NC |
33 | |
34 | =head1 DESCRIPTION | |
35 | ||
6d313664 CBW |
36 | C<TAP::Parser::Grammar> tokenizes lines from a L<TAP::Parser::Iterator> and |
37 | constructs L<TAP::Parser::Result> subclasses to represent the tokens. | |
b965d173 NC |
38 | |
39 | Do not attempt to use this class directly. It won't make sense. It's mainly | |
40 | here to ensure that we will be able to have pluggable grammars when TAP is | |
41 | expanded at some future date (plus, this stuff was really cluttering the | |
42 | parser). | |
43 | ||
f7c69158 | 44 | =head1 METHODS |
b965d173 NC |
45 | |
46 | =head2 Class Methods | |
47 | ||
b965d173 NC |
48 | =head3 C<new> |
49 | ||
f7c69158 | 50 | my $grammar = TAP::Parser::Grammar->new({ |
6d313664 CBW |
51 | iterator => $iterator, |
52 | parser => $parser, | |
53 | version => $version, | |
f7c69158 | 54 | }); |
b965d173 | 55 | |
6d313664 CBW |
56 | Returns L<TAP::Parser> grammar object that will parse the TAP stream from the |
57 | specified iterator. Both C<iterator> and C<parser> are required arguments. | |
58 | If C<version> is not set it defaults to C<12> (see L</set_version> for more | |
59 | details). | |
b965d173 NC |
60 | |
61 | =cut | |
62 | ||
f7c69158 NC |
63 | # new() implementation supplied by TAP::Object |
64 | sub _initialize { | |
65 | my ( $self, $args ) = @_; | |
6d313664 CBW |
66 | $self->{iterator} = $args->{iterator}; # TODO: accessor |
67 | $self->{iterator} ||= $args->{stream}; # deprecated | |
68 | $self->{parser} = $args->{parser}; # TODO: accessor | |
f7c69158 | 69 | $self->set_version( $args->{version} || 12 ); |
b965d173 NC |
70 | return $self; |
71 | } | |
72 | ||
73 | my %language_for; | |
74 | ||
75 | { | |
76 | ||
77 | # XXX the 'not' and 'ok' might be on separate lines in VMS ... | |
78 | my $ok = qr/(?:not )?ok\b/; | |
79 | my $num = qr/\d+/; | |
80 | ||
81 | my %v12 = ( | |
82 | version => { | |
83 | syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i, | |
84 | handler => sub { | |
85 | my ( $self, $line ) = @_; | |
86 | my $version = $1; | |
87 | return $self->_make_version_token( $line, $version, ); | |
88 | }, | |
89 | }, | |
90 | plan => { | |
91 | syntax => qr/^1\.\.(\d+)\s*(.*)\z/, | |
92 | handler => sub { | |
93 | my ( $self, $line ) = @_; | |
94 | my ( $tests_planned, $tail ) = ( $1, $2 ); | |
95 | my $explanation = undef; | |
96 | my $skip = ''; | |
97 | ||
98 | if ( $tail =~ /^todo((?:\s+\d+)+)/ ) { | |
99 | my @todo = split /\s+/, _trim($1); | |
100 | return $self->_make_plan_token( | |
101 | $line, $tests_planned, 'TODO', | |
102 | '', \@todo | |
103 | ); | |
104 | } | |
105 | elsif ( 0 == $tests_planned ) { | |
f7c69158 | 106 | $skip = 'SKIP'; |
b965d173 | 107 | |
f7c69158 | 108 | # If we can't match # SKIP the directive should be undef. |
a39e16d8 | 109 | ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i; |
b965d173 NC |
110 | } |
111 | elsif ( $tail !~ /^\s*$/ ) { | |
112 | return $self->_make_unknown_token($line); | |
113 | } | |
114 | ||
115 | $explanation = '' unless defined $explanation; | |
116 | ||
117 | return $self->_make_plan_token( | |
118 | $line, $tests_planned, $skip, | |
119 | $explanation, [] | |
120 | ); | |
121 | ||
122 | }, | |
123 | }, | |
124 | ||
125 | # An optimization to handle the most common test lines without | |
126 | # directives. | |
127 | simple_test => { | |
128 | syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x, | |
129 | handler => sub { | |
130 | my ( $self, $line ) = @_; | |
131 | my ( $ok, $num, $desc ) = ( $1, $2, $3 ); | |
132 | ||
133 | return $self->_make_test_token( | |
134 | $line, $ok, $num, | |
135 | $desc | |
136 | ); | |
137 | }, | |
138 | }, | |
139 | test => { | |
140 | syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x, | |
141 | handler => sub { | |
142 | my ( $self, $line ) = @_; | |
143 | my ( $ok, $num, $desc ) = ( $1, $2, $3 ); | |
144 | my ( $dir, $explanation ) = ( '', '' ); | |
145 | if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* ) | |
146 | \# \s* (SKIP|TODO) \b \s* (.*) $/ix | |
147 | ) | |
148 | { | |
149 | ( $desc, $dir, $explanation ) = ( $1, $2, $3 ); | |
150 | } | |
151 | return $self->_make_test_token( | |
27fc0087 NC |
152 | $line, $ok, $num, $desc, |
153 | $dir, $explanation | |
b965d173 NC |
154 | ); |
155 | }, | |
156 | }, | |
157 | comment => { | |
158 | syntax => qr/^#(.*)/, | |
159 | handler => sub { | |
160 | my ( $self, $line ) = @_; | |
161 | my $comment = $1; | |
162 | return $self->_make_comment_token( $line, $comment ); | |
163 | }, | |
164 | }, | |
165 | bailout => { | |
686add28 | 166 | syntax => qr/^\s*Bail out!\s*(.*)/, |
b965d173 NC |
167 | handler => sub { |
168 | my ( $self, $line ) = @_; | |
169 | my $explanation = $1; | |
170 | return $self->_make_bailout_token( | |
171 | $line, | |
172 | $explanation | |
173 | ); | |
174 | }, | |
175 | }, | |
176 | ); | |
177 | ||
178 | my %v13 = ( | |
179 | %v12, | |
180 | plan => { | |
8fe9e9ec | 181 | syntax => qr/^1\.\.(\d+)\s*(?:\s*#\s*SKIP\b(.*))?\z/i, |
b965d173 NC |
182 | handler => sub { |
183 | my ( $self, $line ) = @_; | |
184 | my ( $tests_planned, $explanation ) = ( $1, $2 ); | |
185 | my $skip | |
186 | = ( 0 == $tests_planned || defined $explanation ) | |
187 | ? 'SKIP' | |
188 | : ''; | |
189 | $explanation = '' unless defined $explanation; | |
190 | return $self->_make_plan_token( | |
191 | $line, $tests_planned, $skip, | |
192 | $explanation, [] | |
193 | ); | |
194 | }, | |
195 | }, | |
196 | yaml => { | |
197 | syntax => qr/^ (\s+) (---.*) $/x, | |
198 | handler => sub { | |
199 | my ( $self, $line ) = @_; | |
200 | my ( $pad, $marker ) = ( $1, $2 ); | |
201 | return $self->_make_yaml_token( $pad, $marker ); | |
202 | }, | |
203 | }, | |
2a7f4b9b SP |
204 | pragma => { |
205 | syntax => | |
206 | qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x, | |
207 | handler => sub { | |
208 | my ( $self, $line ) = @_; | |
209 | my $pragmas = $1; | |
210 | return $self->_make_pragma_token( $line, $pragmas ); | |
211 | }, | |
212 | }, | |
b965d173 NC |
213 | ); |
214 | ||
215 | %language_for = ( | |
216 | '12' => { | |
217 | tokens => \%v12, | |
218 | }, | |
219 | '13' => { | |
220 | tokens => \%v13, | |
221 | setup => sub { | |
6d313664 | 222 | shift->{iterator}->handle_unicode; |
b965d173 NC |
223 | }, |
224 | }, | |
225 | ); | |
226 | } | |
227 | ||
228 | ############################################################################## | |
229 | ||
230 | =head2 Instance Methods | |
231 | ||
232 | =head3 C<set_version> | |
233 | ||
234 | $grammar->set_version(13); | |
235 | ||
236 | Tell the grammar which TAP syntax version to support. The lowest | |
237 | supported version is 12. Although 'TAP version' isn't valid version 12 | |
238 | syntax it is accepted so that higher version numbers may be parsed. | |
239 | ||
240 | =cut | |
241 | ||
242 | sub set_version { | |
243 | my $self = shift; | |
244 | my $version = shift; | |
245 | ||
246 | if ( my $language = $language_for{$version} ) { | |
f7c69158 NC |
247 | $self->{version} = $version; |
248 | $self->{tokens} = $language->{tokens}; | |
b965d173 NC |
249 | |
250 | if ( my $setup = $language->{setup} ) { | |
251 | $self->$setup(); | |
252 | } | |
253 | ||
254 | $self->_order_tokens; | |
255 | } | |
256 | else { | |
257 | require Carp; | |
258 | Carp::croak("Unsupported syntax version: $version"); | |
259 | } | |
260 | } | |
261 | ||
262 | # Optimization to put the most frequent tokens first. | |
263 | sub _order_tokens { | |
264 | my $self = shift; | |
265 | ||
266 | my %copy = %{ $self->{tokens} }; | |
267 | my @ordered_tokens = grep {defined} | |
268 | map { delete $copy{$_} } qw( simple_test test comment plan ); | |
269 | push @ordered_tokens, values %copy; | |
270 | ||
271 | $self->{ordered_tokens} = \@ordered_tokens; | |
272 | } | |
273 | ||
274 | ############################################################################## | |
275 | ||
276 | =head3 C<tokenize> | |
277 | ||
278 | my $token = $grammar->tokenize; | |
279 | ||
280 | This method will return a L<TAP::Parser::Result> object representing the | |
281 | current line of TAP. | |
282 | ||
283 | =cut | |
284 | ||
285 | sub tokenize { | |
286 | my $self = shift; | |
287 | ||
6d313664 | 288 | my $line = $self->{iterator}->next; |
f7c69158 NC |
289 | unless ( defined $line ) { |
290 | delete $self->{parser}; # break circular ref | |
291 | return; | |
292 | } | |
b965d173 NC |
293 | |
294 | my $token; | |
295 | ||
6d313664 | 296 | for my $token_data ( @{ $self->{ordered_tokens} } ) { |
b965d173 NC |
297 | if ( $line =~ $token_data->{syntax} ) { |
298 | my $handler = $token_data->{handler}; | |
299 | $token = $self->$handler($line); | |
300 | last; | |
301 | } | |
302 | } | |
303 | ||
304 | $token = $self->_make_unknown_token($line) unless $token; | |
305 | ||
f7c69158 | 306 | return $self->{parser}->make_result($token); |
b965d173 NC |
307 | } |
308 | ||
309 | ############################################################################## | |
310 | ||
311 | =head3 C<token_types> | |
312 | ||
313 | my @types = $grammar->token_types; | |
314 | ||
315 | Returns the different types of tokens which this grammar can parse. | |
316 | ||
317 | =cut | |
318 | ||
319 | sub token_types { | |
320 | my $self = shift; | |
321 | return keys %{ $self->{tokens} }; | |
322 | } | |
323 | ||
324 | ############################################################################## | |
325 | ||
326 | =head3 C<syntax_for> | |
327 | ||
328 | my $syntax = $grammar->syntax_for($token_type); | |
329 | ||
330 | Returns a pre-compiled regular expression which will match a chunk of TAP | |
331 | corresponding to the token type. For example (not that you should really pay | |
332 | attention to this, C<< $grammar->syntax_for('comment') >> will return | |
333 | C<< qr/^#(.*)/ >>. | |
334 | ||
335 | =cut | |
336 | ||
337 | sub syntax_for { | |
338 | my ( $self, $type ) = @_; | |
339 | return $self->{tokens}->{$type}->{syntax}; | |
340 | } | |
341 | ||
342 | ############################################################################## | |
343 | ||
344 | =head3 C<handler_for> | |
345 | ||
346 | my $handler = $grammar->handler_for($token_type); | |
347 | ||
348 | Returns a code reference which, when passed an appropriate line of TAP, | |
349 | returns the lexed token corresponding to that line. As a result, the basic | |
350 | TAP parsing loop looks similar to the following: | |
351 | ||
352 | my @tokens; | |
353 | my $grammar = TAP::Grammar->new; | |
354 | LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) { | |
6d313664 | 355 | for my $type ( $grammar->token_types ) { |
b965d173 NC |
356 | my $syntax = $grammar->syntax_for($type); |
357 | if ( $line =~ $syntax ) { | |
358 | my $handler = $grammar->handler_for($type); | |
359 | push @tokens => $grammar->$handler($line); | |
360 | next LINE; | |
361 | } | |
362 | } | |
363 | push @tokens => $grammar->_make_unknown_token($line); | |
364 | } | |
365 | ||
366 | =cut | |
367 | ||
368 | sub handler_for { | |
369 | my ( $self, $type ) = @_; | |
370 | return $self->{tokens}->{$type}->{handler}; | |
371 | } | |
372 | ||
373 | sub _make_version_token { | |
374 | my ( $self, $line, $version ) = @_; | |
375 | return { | |
376 | type => 'version', | |
377 | raw => $line, | |
378 | version => $version, | |
379 | }; | |
380 | } | |
381 | ||
382 | sub _make_plan_token { | |
383 | my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_; | |
384 | ||
f7c69158 NC |
385 | if ( $directive eq 'SKIP' |
386 | && 0 != $tests_planned | |
387 | && $self->{version} < 13 ) | |
388 | { | |
b965d173 NC |
389 | warn |
390 | "Specified SKIP directive in plan but more than 0 tests ($line)\n"; | |
391 | } | |
f7c69158 | 392 | |
b965d173 NC |
393 | return { |
394 | type => 'plan', | |
395 | raw => $line, | |
396 | tests_planned => $tests_planned, | |
397 | directive => $directive, | |
398 | explanation => _trim($explanation), | |
399 | todo_list => $todo, | |
400 | }; | |
401 | } | |
402 | ||
403 | sub _make_test_token { | |
404 | my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_; | |
27fc0087 | 405 | return { |
b965d173 | 406 | ok => $ok, |
dbd04185 NC |
407 | |
408 | # forcing this to be an integer (and not a string) reduces memory | |
409 | # consumption. RT #84939 | |
410 | test_num => ( defined $num ? 0 + $num : undef ), | |
b965d173 | 411 | description => _trim($desc), |
27fc0087 | 412 | directive => ( defined $dir ? uc $dir : '' ), |
b965d173 NC |
413 | explanation => _trim($explanation), |
414 | raw => $line, | |
415 | type => 'test', | |
27fc0087 | 416 | }; |
b965d173 NC |
417 | } |
418 | ||
419 | sub _make_unknown_token { | |
420 | my ( $self, $line ) = @_; | |
421 | return { | |
422 | raw => $line, | |
423 | type => 'unknown', | |
424 | }; | |
425 | } | |
426 | ||
427 | sub _make_comment_token { | |
428 | my ( $self, $line, $comment ) = @_; | |
429 | return { | |
430 | type => 'comment', | |
431 | raw => $line, | |
432 | comment => _trim($comment) | |
433 | }; | |
434 | } | |
435 | ||
436 | sub _make_bailout_token { | |
437 | my ( $self, $line, $explanation ) = @_; | |
438 | return { | |
439 | type => 'bailout', | |
440 | raw => $line, | |
441 | bailout => _trim($explanation) | |
442 | }; | |
443 | } | |
444 | ||
445 | sub _make_yaml_token { | |
446 | my ( $self, $pad, $marker ) = @_; | |
447 | ||
448 | my $yaml = TAP::Parser::YAMLish::Reader->new; | |
449 | ||
6d313664 | 450 | my $iterator = $self->{iterator}; |
b965d173 NC |
451 | |
452 | # Construct a reader that reads from our input stripping leading | |
453 | # spaces from each line. | |
454 | my $leader = length($pad); | |
455 | my $strip = qr{ ^ (\s{$leader}) (.*) $ }x; | |
456 | my @extra = ($marker); | |
457 | my $reader = sub { | |
458 | return shift @extra if @extra; | |
6d313664 | 459 | my $line = $iterator->next; |
b965d173 NC |
460 | return $2 if $line =~ $strip; |
461 | return; | |
462 | }; | |
463 | ||
464 | my $data = $yaml->read($reader); | |
465 | ||
466 | # Reconstitute input. This is convoluted. Maybe we should just | |
467 | # record it on the way in... | |
468 | chomp( my $raw = $yaml->get_raw ); | |
469 | $raw =~ s/^/$pad/mg; | |
470 | ||
471 | return { | |
472 | type => 'yaml', | |
473 | raw => $raw, | |
474 | data => $data | |
475 | }; | |
476 | } | |
477 | ||
2a7f4b9b SP |
478 | sub _make_pragma_token { |
479 | my ( $self, $line, $pragmas ) = @_; | |
480 | return { | |
481 | type => 'pragma', | |
482 | raw => $line, | |
483 | pragmas => [ split /\s*,\s*/, _trim($pragmas) ], | |
484 | }; | |
485 | } | |
486 | ||
b965d173 NC |
487 | sub _trim { |
488 | my $data = shift; | |
489 | ||
490 | return '' unless defined $data; | |
491 | ||
492 | $data =~ s/^\s+//; | |
493 | $data =~ s/\s+$//; | |
494 | return $data; | |
495 | } | |
496 | ||
f7c69158 NC |
497 | 1; |
498 | ||
b965d173 NC |
499 | =head1 TAP GRAMMAR |
500 | ||
501 | B<NOTE:> This grammar is slightly out of date. There's still some discussion | |
502 | about it and a new one will be provided when we have things better defined. | |
503 | ||
504 | The L<TAP::Parser> does not use a formal grammar because TAP is essentially a | |
505 | stream-based protocol. In fact, it's quite legal to have an infinite stream. | |
506 | For the same reason that we don't apply regexes to streams, we're not using a | |
507 | formal grammar here. Instead, we parse the TAP in lines. | |
508 | ||
686add28 | 509 | For purposes for forward compatibility, any result which does not match the |
b965d173 NC |
510 | following grammar is currently referred to as |
511 | L<TAP::Parser::Result::Unknown>. It is I<not> a parse error. | |
512 | ||
513 | A formal grammar would look similar to the following: | |
514 | ||
515 | (* | |
516 | For the time being, I'm cheating on the EBNF by allowing | |
517 | certain terms to be defined by POSIX character classes by | |
518 | using the following syntax: | |
519 | ||
520 | digit ::= [:digit:] | |
521 | ||
522 | As far as I am aware, that's not valid EBNF. Sue me. I | |
523 | didn't know how to write "char" otherwise (Unicode issues). | |
524 | Suggestions welcome. | |
525 | *) | |
526 | ||
527 | tap ::= version? { comment | unknown } leading_plan lines | |
528 | | | |
529 | lines trailing_plan {comment} | |
530 | ||
531 | version ::= 'TAP version ' positiveInteger {positiveInteger} "\n" | |
532 | ||
533 | leading_plan ::= plan skip_directive? "\n" | |
534 | ||
535 | trailing_plan ::= plan "\n" | |
536 | ||
537 | plan ::= '1..' nonNegativeInteger | |
538 | ||
539 | lines ::= line {line} | |
540 | ||
541 | line ::= (comment | test | unknown | bailout ) "\n" | |
542 | ||
543 | test ::= status positiveInteger? description? directive? | |
544 | ||
545 | status ::= 'not '? 'ok ' | |
546 | ||
547 | description ::= (character - (digit | '#')) {character - '#'} | |
548 | ||
549 | directive ::= todo_directive | skip_directive | |
550 | ||
551 | todo_directive ::= hash_mark 'TODO' ' ' {character} | |
552 | ||
553 | skip_directive ::= hash_mark 'SKIP' ' ' {character} | |
554 | ||
555 | comment ::= hash_mark {character} | |
556 | ||
557 | hash_mark ::= '#' {' '} | |
558 | ||
559 | bailout ::= 'Bail out!' {character} | |
560 | ||
561 | unknown ::= { (character - "\n") } | |
562 | ||
563 | (* POSIX character classes and other terminals *) | |
564 | ||
565 | digit ::= [:digit:] | |
566 | character ::= ([:print:] - "\n") | |
567 | positiveInteger ::= ( digit - '0' ) {digit} | |
568 | nonNegativeInteger ::= digit {digit} | |
569 | ||
f7c69158 | 570 | =head1 SUBCLASSING |
b965d173 | 571 | |
f7c69158 | 572 | Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview. |
b965d173 | 573 | |
f7c69158 NC |
574 | If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to |
575 | do is read through the code. There's no easy way of summarizing it here. | |
576 | ||
577 | =head1 SEE ALSO | |
578 | ||
579 | L<TAP::Object>, | |
580 | L<TAP::Parser>, | |
581 | L<TAP::Parser::Iterator>, | |
582 | L<TAP::Parser::Result>, | |
583 | ||
584 | =cut |