perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!./perl
	2
	3	# The tests are in a separate file 't/re/re_tests'.
	4	# Each line in that file is a separate test.
	5	# There are five columns, separated by tabs.
	6	#
	7	# Column 1 contains the pattern, optionally enclosed in C<''>.
	8	# Modifiers can be put after the closing C<'>.
	9	#
	10	# Column 2 contains the string to be matched.
	11	#
	12	# Column 3 contains the expected result:
	13	# y expect a match
	14	# n expect no match
	15	# c expect an error
	16	# T the test is a TODO (can be combined with y/n/c)
	17	# M skip test on miniperl (combine with y/n/c/T)
	18	# B test exposes a known bug in Perl, should be skipped
	19	# b test exposes a known bug in Perl, should be skipped if noamp
	20	# t test exposes a bug with threading, TODO if qr_embed_thr
	21	# s test should only be run for regex_sets_compat.t
	22	# S test should not be run for regex_sets_compat.t
	23	#
	24	# Columns 4 and 5 are used only if column 3 contains C<y> or C<c>.
	25	#
	26	# Column 4 contains a string, usually C<$&>.
	27	#
	28	# Column 5 contains the expected result of double-quote
	29	# interpolating that string after the match, or start of error message.
	30	#
	31	# Column 6, if present, contains a reason why the test is skipped.
	32	# This is printed with "skipped", for harness to pick up.
	33	#
	34	# \n in the tests are interpolated, as are variables of the form ${\w+}.
	35	#
	36	# Blanks lines are treated as PASSING tests to keep the line numbers
	37	# linked to the test number.
	38	#
	39	# If you want to add a regular expression test that can't be expressed
	40	# in this format, don't add it here: put it in re/pat.t instead.
	41	#
	42	# Note that the inputs get passed on as "m're'", so the re bypasses the lexer.
	43	# This means this file cannot be used for testing anything that the lexer
	44	# handles; in 5.12 this means just \N{NAME} and \N{U+...}.
	45	#
	46	# Note that columns 2,3 and 5 are all enclosed in double quotes and then
	47	# evalled; so something like a\"\x{100}$1 has length 3+length($1).
	48
	49	my ($file, $iters);
	50	BEGIN {
	51	$iters = shift \|\| 1; # Poor man performance suite, 10000 is OK.
	52
	53	# Do this open before any chdir
	54	$file = shift;
	55	if (defined $file) {
	56	open TESTS, $file or die "Can't open $file";
	57	}
	58
	59	chdir 't' if -d 't';
	60	@INC = '../lib';
	61
	62	}
	63
	64	sub _comment {
	65	return map { /^#/ ? "$_\n" : "# $_\n" }
	66	map { split /\n/ } @_;
	67	}
	68
	69	use strict;
	70	use warnings FATAL=>"all";
	71	use vars qw($bang $ffff $nulnul); # used by the tests
	72	use vars qw($qr $skip_amp $qr_embed $qr_embed_thr $regex_sets); # set by our callers
	73
	74
	75
	76	if (!defined $file) {
	77	open TESTS, 're/re_tests' or die "Can't open re/re_tests: $!";
	78	}
	79
	80	my @tests = <TESTS>;
	81
	82	close TESTS;
	83
	84	$bang = sprintf "\\%03o", ord "!"; # \41 would not be portable.
	85	$ffff = chr(0xff) x 2;
	86	$nulnul = "\0" x 2;
	87	my $OP = $qr ? 'qr' : 'm';
	88
	89	$\| = 1;
	90	printf "1..%d\n# $iters iterations\n", scalar @tests;
	91
	92	my $test;
	93	TEST:
	94	foreach (@tests) {
	95	$test++;
	96	if (!/\S/ \|\| /^\s*#/ \|\| /^__END__$/) {
	97	print "ok $test # (Blank line or comment)\n";
	98	if (/#/) { print $_ };
	99	next;
	100	}
	101	chomp;
	102	s/\\n/\n/g unless $regex_sets;
	103	my ($pat, $subject, $result, $repl, $expect, $reason) = split(/\t/,$_,6);
	104	if (!defined $subject) {
	105	die "Bad test definition on line $test: $_\n";
	106	}
	107	$reason = '' unless defined $reason;
	108	my $input = join(':',$pat,$subject,$result,$repl,$expect);
	109	# the double '' below keeps simple syntax highlighters from going crazy
	110	$pat = "'$pat'" unless $pat =~ /^[:''\/]/;
	111	$pat =~ s/(\$\{\w+\})/$1/eeg;
	112	$pat =~ s/\\n/\n/g unless $regex_sets;
	113	$subject = eval qq("$subject"); die $@ if $@;
	114	$expect = eval qq("$expect"); die $@ if $@;
	115	$expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
	116	my $todo_qr = $qr_embed_thr && ($result =~ s/t//);
	117	my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
	118	++$skip if $result =~ s/M// && !defined &DynaLoader::boot_DynaLoader;
	119	if ($result =~ s/ ( [Ss] ) //x) {
	120	if (($1 eq 'S' && $regex_sets) \|\| ($1 eq 's' && ! $regex_sets)) {
	121	$skip++;
	122	$reason = "Test not valid for $0";
	123	}
	124	}
	125	$reason = 'skipping $&' if $reason eq '' && $skip_amp;
	126	$result =~ s/B//i unless $skip;
	127	my $todo= $result =~ s/T// ? " # TODO" : "";
	128	if (! $skip && $regex_sets) {
	129
	130	# If testing regex sets, change the [bracketed] classes into
	131	# (?[bracketed]).
	132
	133	if ($pat !~ / \[ /x) {
	134
	135	$skip++;
	136	$reason = "Pattern doesn't contain [brackets]";
	137	}
	138	else { # Use non-regex features of Perl to accomplish this.
	139	my $modified = "";
	140	my $in_brackets = 0;
	141
	142	# Go through the pattern character-by-character. We also add
	143	# blanks around each token to test the /x parts of (?[ ])
	144	my $pat_len = length($pat);
	145	CHAR: for (my $i = 0; $i < $pat_len; $i++) {
	146	my $curchar = substr($pat, $i, 1);
	147	if ($curchar eq '\\') {
	148	$modified .= " " if $in_brackets;
	149	$modified .= $curchar;
	150	$i++;
	151
	152	# Get the character the backslash is escaping
	153	$curchar = substr($pat, $i, 1);
	154	$modified .= $curchar;
	155
	156	# If the character following that is a '{}', treat the
	157	# entire amount as a single token
	158	if ($i < $pat_len -1 && substr($pat, $i+1, 1) eq '{') {
	159	my $j = index($pat, '}', $i+2);
	160	if ($j < 0) {
	161	last unless $in_brackets;
	162	if ($result eq 'c') {
	163	$skip++;
	164	$reason = "Can't handle compilation errors with unmatched '{'";
	165	}
	166	else {
	167	print "not ok $test # Problem in $0; original = '$pat'; mod = '$modified'\n";
	168	next TEST;
	169	}
	170	}
	171	$modified .= substr($pat, $i+1, $j - $i);
	172	$i = $j;
	173	}
	174	elsif ($curchar eq 'x') {
	175
	176	# \x without brackets is supposed to be followed by 2
	177	# hex digits. Take up to 2, and then add a blank
	178	# after the last one. This avoids getting errors from
	179	# (?[ ]) for run-ons, like \xabc
	180	my $j = $i + 1;
	181	for (; $j < $i + 3 && $j < $pat_len; $j++) {
	182	my $curord = ord(substr($pat, $j, 1));
	183	if (!(($curord >= ord("A") && $curord <= ord("F"))
	184	\|\| ($curord >= ord("a") && $curord <= ord("f"))
	185	\|\| ($curord >= ord("0") && $curord <= ord("9"))))
	186	{
	187	$j++;
	188	last;
	189	}
	190	}
	191	$j--;
	192	$modified .= substr($pat, $i + 1, $j - $i) . " ";
	193	$i = $j;
	194	}
	195	elsif (ord($curchar) >= ord('0')
	196	&& (ord($curchar) <= ord('7')))
	197	{
	198	# Similarly, octal constants have up to 3 digits.
	199	my $j = $i + 1;
	200	for (; $j < $i + 3 && $j < $pat_len; $j++) {
	201	my $curord = ord(substr($pat, $j, 1));
	202	if (! ($curord >= ord("0") && $curord <= ord("7"))) {
	203	$j++;
	204	last;
	205	}
	206	}
	207	$j--;
	208	$modified .= substr($pat, $i + 1, $j - $i);
	209	$i = $j;
	210	}
	211
	212	next;
	213	} # End of processing a backslash sequence
	214
	215	if (! $in_brackets # Skip (?{ })
	216	&& $curchar eq '('
	217	&& $i < $pat_len - 2
	218	&& substr($pat, $i+1, 1) eq '?'
	219	&& substr($pat, $i+2, 1) eq '{')
	220	{
	221	$skip++;
	222	$reason = "Pattern contains '(?{'";
	223	last;
	224	}
	225
	226	# Closing ']'
	227	if ($curchar eq ']' && $in_brackets) {
	228	$modified .= " ] ])";
	229	$in_brackets = 0;
	230	next;
	231	}
	232
	233	# A regular character.
	234	if ($curchar ne '[') {
	235	if (! $in_brackets) {
	236	$modified .= $curchar;
	237	}
	238	else {
	239	$modified .= " $curchar ";
	240	}
	241	next;
	242	}
	243
	244	# Here is a '['; If not in a bracketed class, treat as the
	245	# beginning of one.
	246	if (! $in_brackets) {
	247	$in_brackets = 1;
	248	$modified .= "(?[ [ ";
	249
	250	# An immediately following ']' or '^]' is not the ending
	251	# of the class, but is to be treated literally.
	252	if ($i < $pat_len - 1
	253	&& substr($pat, $i+1, 1) eq ']')
	254	{
	255	$i ++;
	256	$modified .= " ] ";
	257	}
	258	elsif ($i < $pat_len - 2
	259	&& substr($pat, $i+1, 1) eq '^'
	260	&& substr($pat, $i+2, 1) eq ']')
	261	{
	262	$i += 2;
	263	$modified .= " ^ ] ";
	264	}
	265	next;
	266	}
	267
	268	# Here is a plain '[' within [ ]. Could mean wants to
	269	# match a '[', or it could be a posix class that has a
	270	# corresponding ']'. Absorb either
	271
	272	$modified .= ' [';
	273	last if $i >= $pat_len - 1;
	274
	275	$i++;
	276	$curchar = substr($pat, $i, 1);
	277	if ($curchar =~ /[:=.]/) {
	278	for (my $j = $i + 1; $j < $pat_len; $j++) {
	279	next unless substr($pat, $j, 1) eq ']';
	280	last if $j - $i < 2;
	281	if (substr($pat, $j - 1, 1) eq $curchar) {
	282	# Here, is a posix class
	283	$modified .= substr($pat, $i, $j - $i + 1) . " ";
	284	$i = $j;
	285	next CHAR;
	286	}
	287	}
	288	}
	289
	290	# Here wasn't a posix class, just process normally
	291	$modified .= " $curchar ";
	292	}
	293
	294	if ($in_brackets && ! $skip) {
	295	if ($result eq 'c') {
	296	$skip++;
	297	$reason = "Can't figure out where to put the (?[ and ]) since is a compilation error";
	298	}
	299	else {
	300	print "not ok $test # Problem in $0; original = '$pat'; mod = '$modified'\n";
	301	next TEST;
	302	}
	303	}
	304
	305	# Use our modified pattern instead of the original
	306	$pat = $modified;
	307	}
	308	}
	309
	310	for my $study ('', 'study $subject', 'utf8::upgrade($subject)',
	311	'utf8::upgrade($subject); study $subject') {
	312	# Need to make a copy, else the utf8::upgrade of an already studied
	313	# scalar confuses things.
	314	my $subject = $subject;
	315	my $c = $iters;
	316	my ($code, $match, $got);
	317	if ($repl eq 'pos') {
	318	$code= <<EOFCODE;
	319	$study;
	320	pos(\$subject)=0;
	321	\$match = ( \$subject =~ m${pat}g );
	322	\$got = pos(\$subject);
	323	EOFCODE
	324	}
	325	elsif ($qr_embed) {
	326	$code= <<EOFCODE;
	327	my \$RE = qr$pat;
	328	$study;
	329	\$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--;
	330	\$got = "$repl";
	331	EOFCODE
	332	}
	333	elsif ($qr_embed_thr) {
	334	$code= <<EOFCODE;
	335	# Can't run the match in a subthread, but can do this and
	336	# clone the pattern the other way.
	337	my \$RE = threads->new(sub {qr$pat})->join();
	338	$study;
	339	\$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--;
	340	\$got = "$repl";
	341	EOFCODE
	342	}
	343	else {
	344	$code= <<EOFCODE;
	345	$study;
	346	\$match = (\$subject =~ $OP$pat) while \$c--;
	347	\$got = "$repl";
	348	EOFCODE
	349	}
	350	$code = "no warnings 'experimental::regex_sets';$code" if $regex_sets;
	351	#$code.=qq[\n\$expect="$expect";\n];
	352	#use Devel::Peek;
	353	#die Dump($code) if $pat=~/\\h/ and $subject=~/\x{A0}/;
	354	{
	355	# Probably we should annotate specific tests with which warnings
	356	# categories they're known to trigger, and hence should be
	357	# disabled just for that test
	358	no warnings qw(uninitialized regexp);
	359	eval $code;
	360	}
	361	chomp( my $err = $@ );
	362	if ( $skip ) {
	363	print "ok $test # skipped", length($reason) ? ". $reason" : '', "\n";
	364	next TEST;
	365	}
	366	elsif ($result eq 'c') {
	367	if ($err !~ m!^\Q$expect!) { print "not ok $test$todo (compile) $input => '$err'\n"; next TEST }
	368	last; # no need to study a syntax error
	369	}
	370	elsif ( $todo_qr ) {
	371	print "not ok $test # TODO", length($reason) ? " - $reason" : '', "\n";
	372	next TEST;
	373	}
	374	elsif ($@) {
	375	print "not ok $test$todo $input => error '$err'\n", _comment("$code\n$@\n"); next TEST;
	376	}
	377	elsif ($result =~ /^n/) {
	378	if ($match) { print "not ok $test$todo ($study) $input => false positive\n"; next TEST }
	379	}
	380	else {
	381	if (!$match \|\| $got ne $expect) {
	382	eval { require Data::Dumper };
	383	no warnings "utf8"; # But handle should be utf8
	384	if ($@ \|\| !defined &DynaLoader::boot_DynaLoader) {
	385	# Data::Dumper will load on miniperl, but fail when used in
	386	# anger as it tries to load B. I'd prefer to keep the
	387	# regular calls below outside of an eval so that real
	388	# (unknown) failures get spotted, not ignored.
	389	print "not ok $test$todo ($study) $input => '$got', match=$match\n", _comment("$code\n");
	390	}
	391	else { # better diagnostics
	392	my $s = Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump;
	393	my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump;
	394	print "not ok $test$todo ($study) $input => '$got', match=$match\n", _comment("$s\n$g\n$code\n");
	395	}
	396	next TEST;
	397	}
	398	}
	399	}
	400	print "ok $test$todo\n";
	401	}
	402
	403	1;