[perl5.git] / cpan / Test-Harness / lib / TAP / Parser / Grammar.pm

package TAP::Parser::Grammar;

use strict;
use warnings;

use TAP::Parser::ResultFactory   ();
use TAP::Parser::YAMLish::Reader ();

use base 'TAP::Object';

=head1 NAME

TAP::Parser::Grammar - A grammar for the Test Anything Protocol.

=head1 VERSION

Version 3.42

=cut

our $VERSION = '3.42';

=head1 SYNOPSIS

  use TAP::Parser::Grammar;
  my $grammar = $self->make_grammar({
    iterator => $tap_parser_iterator,
    parser   => $tap_parser,
    version  => 12,
  });

  my $result = $grammar->tokenize;

=head1 DESCRIPTION

C<TAP::Parser::Grammar> tokenizes lines from a L<TAP::Parser::Iterator> and
constructs L<TAP::Parser::Result> subclasses to represent the tokens.

Do not attempt to use this class directly.  It won't make sense.  It's mainly
here to ensure that we will be able to have pluggable grammars when TAP is
expanded at some future date (plus, this stuff was really cluttering the
parser).

=head1 METHODS

=head2 Class Methods

=head3 C<new>

  my $grammar = TAP::Parser::Grammar->new({
      iterator => $iterator,
      parser   => $parser,
      version  => $version,
  });

Returns L<TAP::Parser> grammar object that will parse the TAP stream from the
specified iterator.  Both C<iterator> and C<parser> are required arguments.
If C<version> is not set it defaults to C<12> (see L</set_version> for more
details).

=cut

# new() implementation supplied by TAP::Object
sub _initialize {
    my ( $self, $args ) = @_;
    $self->{iterator} = $args->{iterator};    # TODO: accessor
    $self->{iterator} ||= $args->{stream};    # deprecated
    $self->{parser} = $args->{parser};        # TODO: accessor
    $self->set_version( $args->{version} || 12 );
    return $self;
}

my %language_for;

{

    # XXX the 'not' and 'ok' might be on separate lines in VMS ...
    my $ok  = qr/(?:not )?ok\b/;
    my $num = qr/\d+/;

    my %v12 = (
        version => {
            syntax  => qr/^TAP\s+version\s+(\d+)\s*\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my $version = $1;
                return $self->_make_version_token( $line, $version, );
            },
        },
        plan => {
            syntax  => qr/^1\.\.(\d+)\s*(.*)\z/,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $tail ) = ( $1, $2 );
                my $explanation = undef;
                my $skip        = '';

                if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
                    my @todo = split /\s+/, _trim($1);
                    return $self->_make_plan_token(
                        $line, $tests_planned, 'TODO',
                        '',    \@todo
                    );
                }
                elsif ( 0 == $tests_planned ) {
                    $skip = 'SKIP';

                    # If we can't match # SKIP the directive should be undef.
                    ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
                }
                elsif ( $tail !~ /^\s*$/ ) {
                    return $self->_make_unknown_token($line);
                }

                $explanation = '' unless defined $explanation;

                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );

            },
        },

        # An optimization to handle the most common test lines without
        # directives.
        simple_test => {
            syntax  => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );

                return $self->_make_test_token(
                    $line, $ok, $num,
                    $desc
                );
            },
        },
        test => {
            syntax  => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
                my ( $dir, $explanation ) = ( '', '' );
                if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
                       \# \s* (SKIP|TODO) \b \s* (.*) $/ix
                  )
                {
                    ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
                }
                return $self->_make_test_token(
                    $line, $ok, $num, $desc,
                    $dir,  $explanation
                );
            },
        },
        comment => {
            syntax  => qr/^#(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $comment = $1;
                return $self->_make_comment_token( $line, $comment );
            },
        },
        bailout => {
            syntax  => qr/^\s*Bail out!\s*(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $explanation = $1;
                return $self->_make_bailout_token(
                    $line,
                    $explanation
                );
            },
        },
    );

    my %v13 = (
        %v12,
        plan => {
            syntax  => qr/^1\.\.(\d+)\s*(?:\s*#\s*SKIP\b(.*))?\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $explanation ) = ( $1, $2 );
                my $skip
                  = ( 0 == $tests_planned || defined $explanation )
                  ? 'SKIP'
                  : '';
                $explanation = '' unless defined $explanation;
                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );
            },
        },
        yaml => {
            syntax  => qr/^ (\s+) (---.*) $/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $pad, $marker ) = ( $1, $2 );
                return $self->_make_yaml_token( $pad, $marker );
            },
        },
        pragma => {
            syntax =>
              qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my $pragmas = $1;
                return $self->_make_pragma_token( $line, $pragmas );
            },
        },
    );

    %language_for = (
        '12' => {
            tokens => \%v12,
        },
        '13' => {
            tokens => \%v13,
            setup  => sub {
                shift->{iterator}->handle_unicode;
            },
        },
    );
}

##############################################################################

=head2 Instance Methods

=head3 C<set_version>

  $grammar->set_version(13);

Tell the grammar which TAP syntax version to support. The lowest
supported version is 12. Although 'TAP version' isn't valid version 12
syntax it is accepted so that higher version numbers may be parsed.

=cut

sub set_version {
    my $self    = shift;
    my $version = shift;

    if ( my $language = $language_for{$version} ) {
        $self->{version} = $version;
        $self->{tokens}  = $language->{tokens};

        if ( my $setup = $language->{setup} ) {
            $self->$setup();
        }

        $self->_order_tokens;
    }
    else {
        require Carp;
        Carp::croak("Unsupported syntax version: $version");
    }
}

# Optimization to put the most frequent tokens first.
sub _order_tokens {
    my $self = shift;

    my %copy = %{ $self->{tokens} };
    my @ordered_tokens = grep {defined}
      map { delete $copy{$_} } qw( simple_test test comment plan );
    push @ordered_tokens, values %copy;

    $self->{ordered_tokens} = \@ordered_tokens;
}

##############################################################################

=head3 C<tokenize>

  my $token = $grammar->tokenize;

This method will return a L<TAP::Parser::Result> object representing the
current line of TAP.

=cut

sub tokenize {
    my $self = shift;

    my $line = $self->{iterator}->next;
    unless ( defined $line ) {
        delete $self->{parser};    # break circular ref
        return;
    }

    my $token;

    for my $token_data ( @{ $self->{ordered_tokens} } ) {
        if ( $line =~ $token_data->{syntax} ) {
            my $handler = $token_data->{handler};
            $token = $self->$handler($line);
            last;
        }
    }

    $token = $self->_make_unknown_token($line) unless $token;

    return $self->{parser}->make_result($token);
}

##############################################################################

=head3 C<token_types>

  my @types = $grammar->token_types;

Returns the different types of tokens which this grammar can parse.

=cut

sub token_types {
    my $self = shift;
    return keys %{ $self->{tokens} };
}

##############################################################################

=head3 C<syntax_for>

  my $syntax = $grammar->syntax_for($token_type);

Returns a pre-compiled regular expression which will match a chunk of TAP
corresponding to the token type.  For example (not that you should really pay
attention to this, C<< $grammar->syntax_for('comment') >> will return
C<< qr/^#(.*)/ >>.

=cut

sub syntax_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{syntax};
}

##############################################################################

=head3 C<handler_for>

  my $handler = $grammar->handler_for($token_type);

Returns a code reference which, when passed an appropriate line of TAP,
returns the lexed token corresponding to that line.  As a result, the basic
TAP parsing loop looks similar to the following:

 my @tokens;
 my $grammar = TAP::Grammar->new;
 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
     for my $type ( $grammar->token_types ) {
         my $syntax  = $grammar->syntax_for($type);
         if ( $line =~ $syntax ) {
             my $handler = $grammar->handler_for($type);
             push @tokens => $grammar->$handler($line);
             next LINE;
         }
     }
     push @tokens => $grammar->_make_unknown_token($line);
 }

=cut

sub handler_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{handler};
}

sub _make_version_token {
    my ( $self, $line, $version ) = @_;
    return {
        type    => 'version',
        raw     => $line,
        version => $version,
    };
}

sub _make_plan_token {
    my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;

    if (   $directive eq 'SKIP'
        && 0 != $tests_planned
        && $self->{version} < 13 )
    {
        warn
          "Specified SKIP directive in plan but more than 0 tests ($line)\n";
    }

    return {
        type          => 'plan',
        raw           => $line,
        tests_planned => $tests_planned,
        directive     => $directive,
        explanation   => _trim($explanation),
        todo_list     => $todo,
    };
}

sub _make_test_token {
    my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
    return {
        ok          => $ok,

        # forcing this to be an integer (and not a string) reduces memory
        # consumption. RT #84939
        test_num    => ( defined $num ? 0 + $num : undef ),
        description => _trim($desc),
        directive   => ( defined $dir ? uc $dir : '' ),
        explanation => _trim($explanation),
        raw         => $line,
        type        => 'test',
    };
}

sub _make_unknown_token {
    my ( $self, $line ) = @_;
    return {
        raw  => $line,
        type => 'unknown',
    };
}

sub _make_comment_token {
    my ( $self, $line, $comment ) = @_;
    return {
        type    => 'comment',
        raw     => $line,
        comment => _trim($comment)
    };
}

sub _make_bailout_token {
    my ( $self, $line, $explanation ) = @_;
    return {
        type    => 'bailout',
        raw     => $line,
        bailout => _trim($explanation)
    };
}

sub _make_yaml_token {
    my ( $self, $pad, $marker ) = @_;

    my $yaml = TAP::Parser::YAMLish::Reader->new;

    my $iterator = $self->{iterator};

    # Construct a reader that reads from our input stripping leading
    # spaces from each line.
    my $leader = length($pad);
    my $strip  = qr{ ^ (\s{$leader}) (.*) $ }x;
    my @extra  = ($marker);
    my $reader = sub {
        return shift @extra if @extra;
        my $line = $iterator->next;
        return $2 if $line =~ $strip;
        return;
    };

    my $data = $yaml->read($reader);

    # Reconstitute input. This is convoluted. Maybe we should just
    # record it on the way in...
    chomp( my $raw = $yaml->get_raw );
    $raw =~ s/^/$pad/mg;

    return {
        type => 'yaml',
        raw  => $raw,
        data => $data
    };
}

sub _make_pragma_token {
    my ( $self, $line, $pragmas ) = @_;
    return {
        type    => 'pragma',
        raw     => $line,
        pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
    };
}

sub _trim {
    my $data = shift;

    return '' unless defined $data;

    $data =~ s/^\s+//;
    $data =~ s/\s+$//;
    return $data;
}

1;

=head1 TAP GRAMMAR

B<NOTE:>  This grammar is slightly out of date.  There's still some discussion
about it and a new one will be provided when we have things better defined.

The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
stream-based protocol.  In fact, it's quite legal to have an infinite stream.
For the same reason that we don't apply regexes to streams, we're not using a
formal grammar here.  Instead, we parse the TAP in lines.

For purposes for forward compatibility, any result which does not match the
following grammar is currently referred to as
L<TAP::Parser::Result::Unknown>.  It is I<not> a parse error.

A formal grammar would look similar to the following:

 (*
     For the time being, I'm cheating on the EBNF by allowing
     certain terms to be defined by POSIX character classes by
     using the following syntax:

       digit ::= [:digit:]

     As far as I am aware, that's not valid EBNF.  Sue me.  I
     didn't know how to write "char" otherwise (Unicode issues).
     Suggestions welcome.
 *)

 tap            ::= version? { comment | unknown } leading_plan lines
                    |
                    lines trailing_plan {comment}

 version        ::= 'TAP version ' positiveInteger {positiveInteger} "\n"

 leading_plan   ::= plan skip_directive? "\n"

 trailing_plan  ::= plan "\n"

 plan           ::= '1..' nonNegativeInteger

 lines          ::= line {line}

 line           ::= (comment | test | unknown | bailout ) "\n"

 test           ::= status positiveInteger? description? directive?

 status         ::= 'not '? 'ok '

 description    ::= (character - (digit | '#')) {character - '#'}

 directive      ::= todo_directive | skip_directive

 todo_directive ::= hash_mark 'TODO' ' ' {character}

 skip_directive ::= hash_mark 'SKIP' ' ' {character}

 comment        ::= hash_mark {character}

 hash_mark      ::= '#' {' '}

 bailout        ::= 'Bail out!' {character}

 unknown        ::= { (character - "\n") }

 (* POSIX character classes and other terminals *)

 digit              ::= [:digit:]
 character          ::= ([:print:] - "\n")
 positiveInteger    ::= ( digit - '0' ) {digit}
 nonNegativeInteger ::= digit {digit}

=head1 SUBCLASSING

Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.

If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
do is read through the code.  There's no easy way of summarizing it here.

=head1 SEE ALSO

L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Iterator>,
L<TAP::Parser::Result>,

=cut
Commit	Line	Data
b965d173 NC	1	package TAP::Parser::Grammar;
	2
	3	use strict;
befb5359	4	use warnings;
b965d173	5
f7c69158	6	use TAP::Parser::ResultFactory ();
b965d173 NC	7	use TAP::Parser::YAMLish::Reader ();
b965d173 NC	8
406e3fef	9	use base 'TAP::Object';
f7c69158	10
b965d173 NC	11	=head1 NAME
	12
	13	TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
	14
	15	=head1 VERSION
	16
158ffeeb	17	Version 3.42
b965d173 NC	18
	19	=cut
	20
158ffeeb	21	our $VERSION = '3.42';
f7c69158 NC	22
	23	=head1 SYNOPSIS
	24
	25	use TAP::Parser::Grammar;
	26	my $grammar = $self->make_grammar({
6d313664 CBW	27	iterator => $tap_parser_iterator,
	28	parser => $tap_parser,
	29	version => 12,
f7c69158 NC	30	});
	31
	32	my $result = $grammar->tokenize;
b965d173 NC	33
	34	=head1 DESCRIPTION
	35
6d313664 CBW	36	C<TAP::Parser::Grammar> tokenizes lines from a L<TAP::Parser::Iterator> and
6d313664 CBW	37	constructs L<TAP::Parser::Result> subclasses to represent the tokens.
b965d173 NC	38
	39	Do not attempt to use this class directly. It won't make sense. It's mainly
	40	here to ensure that we will be able to have pluggable grammars when TAP is
	41	expanded at some future date (plus, this stuff was really cluttering the
	42	parser).
	43
f7c69158	44	=head1 METHODS
b965d173 NC	45
	46	=head2 Class Methods
	47
b965d173 NC	48	=head3 C<new>
b965d173 NC	49
f7c69158	50	my $grammar = TAP::Parser::Grammar->new({
6d313664 CBW	51	iterator => $iterator,
	52	parser => $parser,
	53	version => $version,
f7c69158	54	});
b965d173	55
6d313664 CBW	56	Returns L<TAP::Parser> grammar object that will parse the TAP stream from the
	57	specified iterator. Both C<iterator> and C<parser> are required arguments.
	58	If C<version> is not set it defaults to C<12> (see L</set_version> for more
	59	details).
b965d173 NC	60
	61	=cut
	62
f7c69158 NC	63	# new() implementation supplied by TAP::Object
	64	sub _initialize {
	65	my ( $self, $args ) = @_;
6d313664 CBW	66	$self->{iterator} = $args->{iterator}; # TODO: accessor
	67	$self->{iterator} \|\|= $args->{stream}; # deprecated
	68	$self->{parser} = $args->{parser}; # TODO: accessor
f7c69158	69	$self->set_version( $args->{version} \|\| 12 );
b965d173 NC	70	return $self;
	71	}
	72
	73	my %language_for;
	74
	75	{
	76
	77	# XXX the 'not' and 'ok' might be on separate lines in VMS ...
	78	my $ok = qr/(?:not )?ok\b/;
	79	my $num = qr/\d+/;
	80
	81	my %v12 = (
	82	version => {
	83	syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
	84	handler => sub {
	85	my ( $self, $line ) = @_;
	86	my $version = $1;
	87	return $self->_make_version_token( $line, $version, );
	88	},
	89	},
	90	plan => {
	91	syntax => qr/^1\.\.(\d+)\s(.)\z/,
	92	handler => sub {
	93	my ( $self, $line ) = @_;
	94	my ( $tests_planned, $tail ) = ( $1, $2 );
	95	my $explanation = undef;
	96	my $skip = '';
	97
	98	if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
	99	my @todo = split /\s+/, _trim($1);
	100	return $self->_make_plan_token(
	101	$line, $tests_planned, 'TODO',
	102	'', \@todo
	103	);
	104	}
	105	elsif ( 0 == $tests_planned ) {
f7c69158	106	$skip = 'SKIP';
b965d173	107
f7c69158	108	# If we can't match # SKIP the directive should be undef.
a39e16d8	109	($explanation) = $tail =~ /^#\sSKIP\S\s+(.*)/i;
b965d173 NC	110	}
	111	elsif ( $tail !~ /^\s*$/ ) {
	112	return $self->_make_unknown_token($line);
	113	}
	114
	115	$explanation = '' unless defined $explanation;
	116
	117	return $self->_make_plan_token(
	118	$line, $tests_planned, $skip,
	119	$explanation, []
	120	);
	121
	122	},
	123	},
	124
	125	# An optimization to handle the most common test lines without
	126	# directives.
	127	simple_test => {
	128	syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
	129	handler => sub {
	130	my ( $self, $line ) = @_;
	131	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
	132
	133	return $self->_make_test_token(
	134	$line, $ok, $num,
	135	$desc
	136	);
	137	},
	138	},
	139	test => {
	140	syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
	141	handler => sub {
	142	my ( $self, $line ) = @_;
	143	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
	144	my ( $dir, $explanation ) = ( '', '' );
	145	if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
	146	\# \s* (SKIP\|TODO) \b \s* (.*) $/ix
	147	)
	148	{
	149	( $desc, $dir, $explanation ) = ( $1, $2, $3 );
	150	}
	151	return $self->_make_test_token(
27fc0087 NC	152	$line, $ok, $num, $desc,
27fc0087 NC	153	$dir, $explanation
b965d173 NC	154	);
	155	},
	156	},
	157	comment => {
	158	syntax => qr/^#(.*)/,
	159	handler => sub {
	160	my ( $self, $line ) = @_;
	161	my $comment = $1;
	162	return $self->_make_comment_token( $line, $comment );
	163	},
	164	},
	165	bailout => {
686add28	166	syntax => qr/^\sBail out!\s(.*)/,
b965d173 NC	167	handler => sub {
	168	my ( $self, $line ) = @_;
	169	my $explanation = $1;
	170	return $self->_make_bailout_token(
	171	$line,
	172	$explanation
	173	);
	174	},
	175	},
	176	);
	177
	178	my %v13 = (
	179	%v12,
	180	plan => {
8fe9e9ec	181	syntax => qr/^1\.\.(\d+)\s(?:\s#\sSKIP\b(.))?\z/i,
b965d173 NC	182	handler => sub {
	183	my ( $self, $line ) = @_;
	184	my ( $tests_planned, $explanation ) = ( $1, $2 );
	185	my $skip
	186	= ( 0 == $tests_planned \|\| defined $explanation )
	187	? 'SKIP'
	188	: '';
	189	$explanation = '' unless defined $explanation;
	190	return $self->_make_plan_token(
	191	$line, $tests_planned, $skip,
	192	$explanation, []
	193	);
	194	},
	195	},
	196	yaml => {
	197	syntax => qr/^ (\s+) (---.*) $/x,
	198	handler => sub {
	199	my ( $self, $line ) = @_;
	200	my ( $pad, $marker ) = ( $1, $2 );
	201	return $self->_make_yaml_token( $pad, $marker );
	202	},
	203	},
2a7f4b9b SP	204	pragma => {
	205	syntax =>
	206	qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
	207	handler => sub {
	208	my ( $self, $line ) = @_;
	209	my $pragmas = $1;
	210	return $self->_make_pragma_token( $line, $pragmas );
	211	},
	212	},
b965d173 NC	213	);
	214
	215	%language_for = (
	216	'12' => {
	217	tokens => \%v12,
	218	},
	219	'13' => {
	220	tokens => \%v13,
	221	setup => sub {
6d313664	222	shift->{iterator}->handle_unicode;
b965d173 NC	223	},
	224	},
	225	);
	226	}
	227
	228	##############################################################################
	229
	230	=head2 Instance Methods
	231
	232	=head3 C<set_version>
	233
	234	$grammar->set_version(13);
	235
	236	Tell the grammar which TAP syntax version to support. The lowest
	237	supported version is 12. Although 'TAP version' isn't valid version 12
	238	syntax it is accepted so that higher version numbers may be parsed.
	239
	240	=cut
	241
	242	sub set_version {
	243	my $self = shift;
	244	my $version = shift;
	245
	246	if ( my $language = $language_for{$version} ) {
f7c69158 NC	247	$self->{version} = $version;
f7c69158 NC	248	$self->{tokens} = $language->{tokens};
b965d173 NC	249
	250	if ( my $setup = $language->{setup} ) {
	251	$self->$setup();
	252	}
	253
	254	$self->_order_tokens;
	255	}
	256	else {
	257	require Carp;
	258	Carp::croak("Unsupported syntax version: $version");
	259	}
	260	}
	261
	262	# Optimization to put the most frequent tokens first.
	263	sub _order_tokens {
	264	my $self = shift;
	265
	266	my %copy = %{ $self->{tokens} };
	267	my @ordered_tokens = grep {defined}
	268	map { delete $copy{$_} } qw( simple_test test comment plan );
	269	push @ordered_tokens, values %copy;
	270
	271	$self->{ordered_tokens} = \@ordered_tokens;
	272	}
	273
	274	##############################################################################
	275
	276	=head3 C<tokenize>
	277
	278	my $token = $grammar->tokenize;
	279
	280	This method will return a L<TAP::Parser::Result> object representing the
	281	current line of TAP.
	282
	283	=cut
	284
	285	sub tokenize {
	286	my $self = shift;
	287
6d313664	288	my $line = $self->{iterator}->next;
f7c69158 NC	289	unless ( defined $line ) {
	290	delete $self->{parser}; # break circular ref
	291	return;
	292	}
b965d173 NC	293
	294	my $token;
	295
6d313664	296	for my $token_data ( @{ $self->{ordered_tokens} } ) {
b965d173 NC	297	if ( $line =~ $token_data->{syntax} ) {
	298	my $handler = $token_data->{handler};
	299	$token = $self->$handler($line);
	300	last;
	301	}
	302	}
	303
	304	$token = $self->_make_unknown_token($line) unless $token;
	305
f7c69158	306	return $self->{parser}->make_result($token);
b965d173 NC	307	}
	308
	309	##############################################################################
	310
	311	=head3 C<token_types>
	312
	313	my @types = $grammar->token_types;
	314
	315	Returns the different types of tokens which this grammar can parse.
	316
	317	=cut
	318
	319	sub token_types {
	320	my $self = shift;
	321	return keys %{ $self->{tokens} };
	322	}
	323
	324	##############################################################################
	325
	326	=head3 C<syntax_for>
	327
	328	my $syntax = $grammar->syntax_for($token_type);
	329
	330	Returns a pre-compiled regular expression which will match a chunk of TAP
	331	corresponding to the token type. For example (not that you should really pay
	332	attention to this, C<< $grammar->syntax_for('comment') >> will return
	333	C<< qr/^#(.*)/ >>.
	334
	335	=cut
	336
	337	sub syntax_for {
	338	my ( $self, $type ) = @_;
	339	return $self->{tokens}->{$type}->{syntax};
	340	}
	341
	342	##############################################################################
	343
	344	=head3 C<handler_for>
	345
	346	my $handler = $grammar->handler_for($token_type);
	347
	348	Returns a code reference which, when passed an appropriate line of TAP,
	349	returns the lexed token corresponding to that line. As a result, the basic
	350	TAP parsing loop looks similar to the following:
	351
	352	my @tokens;
	353	my $grammar = TAP::Grammar->new;
	354	LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
6d313664	355	for my $type ( $grammar->token_types ) {
b965d173 NC	356	my $syntax = $grammar->syntax_for($type);
	357	if ( $line =~ $syntax ) {
	358	my $handler = $grammar->handler_for($type);
	359	push @tokens => $grammar->$handler($line);
	360	next LINE;
	361	}
	362	}
	363	push @tokens => $grammar->_make_unknown_token($line);
	364	}
	365
	366	=cut
	367
	368	sub handler_for {
	369	my ( $self, $type ) = @_;
	370	return $self->{tokens}->{$type}->{handler};
	371	}
	372
	373	sub _make_version_token {
	374	my ( $self, $line, $version ) = @_;
	375	return {
	376	type => 'version',
	377	raw => $line,
	378	version => $version,
	379	};
	380	}
	381
	382	sub _make_plan_token {
	383	my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
	384
f7c69158 NC	385	if ( $directive eq 'SKIP'
	386	&& 0 != $tests_planned
	387	&& $self->{version} < 13 )
	388	{
b965d173 NC	389	warn
	390	"Specified SKIP directive in plan but more than 0 tests ($line)\n";
	391	}
f7c69158	392
b965d173 NC	393	return {
	394	type => 'plan',
	395	raw => $line,
	396	tests_planned => $tests_planned,
	397	directive => $directive,
	398	explanation => _trim($explanation),
	399	todo_list => $todo,
	400	};
	401	}
	402
	403	sub _make_test_token {
	404	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
27fc0087	405	return {
b965d173	406	ok => $ok,
dbd04185 NC	407
	408	# forcing this to be an integer (and not a string) reduces memory
	409	# consumption. RT #84939
	410	test_num => ( defined $num ? 0 + $num : undef ),
b965d173	411	description => _trim($desc),
27fc0087	412	directive => ( defined $dir ? uc $dir : '' ),
b965d173 NC	413	explanation => _trim($explanation),
	414	raw => $line,
	415	type => 'test',
27fc0087	416	};
b965d173 NC	417	}
	418
	419	sub _make_unknown_token {
	420	my ( $self, $line ) = @_;
	421	return {
	422	raw => $line,
	423	type => 'unknown',
	424	};
	425	}
	426
	427	sub _make_comment_token {
	428	my ( $self, $line, $comment ) = @_;
	429	return {
	430	type => 'comment',
	431	raw => $line,
	432	comment => _trim($comment)
	433	};
	434	}
	435
	436	sub _make_bailout_token {
	437	my ( $self, $line, $explanation ) = @_;
	438	return {
	439	type => 'bailout',
	440	raw => $line,
	441	bailout => _trim($explanation)
	442	};
	443	}
	444
	445	sub _make_yaml_token {
	446	my ( $self, $pad, $marker ) = @_;
	447
	448	my $yaml = TAP::Parser::YAMLish::Reader->new;
	449
6d313664	450	my $iterator = $self->{iterator};
b965d173 NC	451
	452	# Construct a reader that reads from our input stripping leading
	453	# spaces from each line.
	454	my $leader = length($pad);
	455	my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
	456	my @extra = ($marker);
	457	my $reader = sub {
	458	return shift @extra if @extra;
6d313664	459	my $line = $iterator->next;
b965d173 NC	460	return $2 if $line =~ $strip;
	461	return;
	462	};
	463
	464	my $data = $yaml->read($reader);
	465
	466	# Reconstitute input. This is convoluted. Maybe we should just
	467	# record it on the way in...
	468	chomp( my $raw = $yaml->get_raw );
	469	$raw =~ s/^/$pad/mg;
	470
	471	return {
	472	type => 'yaml',
	473	raw => $raw,
	474	data => $data
	475	};
	476	}
	477
2a7f4b9b SP	478	sub _make_pragma_token {
	479	my ( $self, $line, $pragmas ) = @_;
	480	return {
	481	type => 'pragma',
	482	raw => $line,
	483	pragmas => [ split /\s,\s/, _trim($pragmas) ],
	484	};
	485	}
	486
b965d173 NC	487	sub _trim {
	488	my $data = shift;
	489
	490	return '' unless defined $data;
	491
	492	$data =~ s/^\s+//;
	493	$data =~ s/\s+$//;
	494	return $data;
	495	}
	496
f7c69158 NC	497	1;
f7c69158 NC	498
b965d173 NC	499	=head1 TAP GRAMMAR
	500
	501	B<NOTE:> This grammar is slightly out of date. There's still some discussion
	502	about it and a new one will be provided when we have things better defined.
	503
	504	The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
	505	stream-based protocol. In fact, it's quite legal to have an infinite stream.
	506	For the same reason that we don't apply regexes to streams, we're not using a
	507	formal grammar here. Instead, we parse the TAP in lines.
	508
686add28	509	For purposes for forward compatibility, any result which does not match the
b965d173 NC	510	following grammar is currently referred to as
	511	L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
	512
	513	A formal grammar would look similar to the following:
	514
	515	(*
	516	For the time being, I'm cheating on the EBNF by allowing
	517	certain terms to be defined by POSIX character classes by
	518	using the following syntax:
	519
	520	digit ::= [:digit:]
	521
	522	As far as I am aware, that's not valid EBNF. Sue me. I
	523	didn't know how to write "char" otherwise (Unicode issues).
	524	Suggestions welcome.
	525	*)
	526
	527	tap ::= version? { comment \| unknown } leading_plan lines
	528	\|
	529	lines trailing_plan {comment}
	530
	531	version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
	532
	533	leading_plan ::= plan skip_directive? "\n"
	534
	535	trailing_plan ::= plan "\n"
	536
	537	plan ::= '1..' nonNegativeInteger
	538
	539	lines ::= line {line}
	540
	541	line ::= (comment \| test \| unknown \| bailout ) "\n"
	542
	543	test ::= status positiveInteger? description? directive?
	544
	545	status ::= 'not '? 'ok '
	546
	547	description ::= (character - (digit \| '#')) {character - '#'}
	548
	549	directive ::= todo_directive \| skip_directive
	550
	551	todo_directive ::= hash_mark 'TODO' ' ' {character}
	552
	553	skip_directive ::= hash_mark 'SKIP' ' ' {character}
	554
	555	comment ::= hash_mark {character}
	556
	557	hash_mark ::= '#' {' '}
	558
	559	bailout ::= 'Bail out!' {character}
	560
	561	unknown ::= { (character - "\n") }
	562
	563	(* POSIX character classes and other terminals *)
	564
	565	digit ::= [:digit:]
	566	character ::= ([:print:] - "\n")
	567	positiveInteger ::= ( digit - '0' ) {digit}
	568	nonNegativeInteger ::= digit {digit}
	569
f7c69158	570	=head1 SUBCLASSING
b965d173	571
f7c69158	572	Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
b965d173	573
f7c69158 NC	574	If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
	575	do is read through the code. There's no easy way of summarizing it here.
	576
	577	=head1 SEE ALSO
	578
	579	L<TAP::Object>,
	580	L<TAP::Parser>,
	581	L<TAP::Parser::Iterator>,
	582	L<TAP::Parser::Result>,
	583
	584	=cut