perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!./perl
	2
	3	# The tests are in a separate file 't/re/re_tests'.
	4	# Each line in that file is a separate test.
	5	# There are five columns, separated by tabs.
	6	# An optional sixth column is used to give a reason, only when skipping tests
	7	#
	8	# Column 1 contains the pattern, optionally enclosed in C<''>.
	9	# Modifiers can be put after the closing C<'>.
	10	#
	11	# Column 2 contains the string to be matched.
	12	#
	13	# Column 3 contains the expected result:
	14	# y expect a match
	15	# n expect no match
	16	# c expect an error
	17	# T the test is a TODO (can be combined with y/n/c)
	18	# M skip test on miniperl (combine with y/n/c/T)
	19	# B test exposes a known bug in Perl, should be skipped
	20	# b test exposes a known bug in Perl, should be skipped if noamp
	21	# t test exposes a bug with threading, TODO if qr_embed_thr
	22	# s test should only be run for regex_sets_compat.t
	23	# S test should not be run for regex_sets_compat.t
	24	# a test should only be run on ASCII platforms
	25	# e test should only be run on EBCDIC platforms
	26	#
	27	# Columns 4 and 5 are used only if column 3 contains C<y> or C<c>.
	28	#
	29	# Column 4 contains a string, usually C<$&>.
	30	#
	31	# Column 5 contains the expected result of double-quote
	32	# interpolating that string after the match, or start of error message.
	33	#
	34	# Column 6, if present, contains a reason why the test is skipped.
	35	# This is printed with "skipped", for harness to pick up.
	36	#
	37	# Column 7 can be used for comments
	38	#
	39	# \n in the tests are interpolated, as are variables of the form ${\w+}.
	40	#
	41	# Blanks lines are treated as PASSING tests to keep the line numbers
	42	# linked to the test number.
	43	#
	44	# If you want to add a regular expression test that can't be expressed
	45	# in this format, don't add it here: put it in re/pat.t instead.
	46	#
	47	# Note that the inputs get passed on as "m're'", so the re bypasses the lexer.
	48	# This means this file cannot be used for testing anything that the lexer
	49	# handles; in 5.12 this means just \N{NAME} and \N{U+...}.
	50	#
	51	# Note that columns 2,3 and 5 are all enclosed in double quotes and then
	52	# evalled; so something like a\"\x{100}$1 has length 3+length($1).
	53	#
	54	# \x... and \o{...} constants are automatically converted to the native
	55	# character set if necessary. \[0-7] constants aren't
	56
	57	my ($file, $iters);
	58	BEGIN {
	59	$iters = shift \|\| 1; # Poor man performance suite, 10000 is OK.
	60
	61	# Do this open before any chdir
	62	$file = shift;
	63	if (defined $file) {
	64	open TESTS, $file or die "Can't open $file";
	65	}
	66
	67	chdir 't' if -d 't';
	68	@INC = qw '../lib ../ext/re';
	69	if (!defined &DynaLoader::boot_DynaLoader) { # miniperl
	70	print("1..0 # Skip Unicode tables not built yet\n"), exit
	71	unless eval 'require "unicore/Heavy.pl"';
	72	}
	73	}
	74
	75	sub _comment {
	76	return map { /^#/ ? "$_\n" : "# $_\n" }
	77	map { split /\n/ } @_;
	78	}
	79
	80	sub convert_from_ascii {
	81	my $string = shift;
	82
	83	#my $save = $string;
	84	# Convert \x{...}, \o{...}
	85	$string =~ s/ (?<! \\ ) \\x\{ ( .*? ) } / "\\x{" . sprintf("%X", utf8::unicode_to_native(hex $1)) . "}" /gex;
	86	$string =~ s/ (?<! \\ ) \\o\{ ( .*? ) } / "\\o{" . sprintf("%o", utf8::unicode_to_native(oct $1)) . "}" /gex;
	87
	88	# Convert \xAB
	89	$string =~ s/ (?<! \\ ) \\x ( [A-Fa-f0-9]{2} ) / "\\x" . sprintf("%02X", utf8::unicode_to_native(hex $1)) /gex;
	90
	91	# Convert \xA
	92	$string =~ s/ (?<! \\ ) \\x ( [A-Fa-f0-9] ) (?! [A-Fa-f0-9] ) / "\\x" . sprintf("%X", utf8::unicode_to_native(hex $1)) /gex;
	93
	94	#print STDERR __LINE__, ": $save\n$string\n" if $save ne $string;
	95	return $string;
	96	}
	97
	98	use strict;
	99	use warnings FATAL=>"all";
	100	use vars qw($bang $ffff $nulnul); # used by the tests
	101	use vars qw($qr $skip_amp $qr_embed $qr_embed_thr $regex_sets); # set by our callers
	102
	103
	104
	105	if (!defined $file) {
	106	open TESTS, 're/re_tests' or die "Can't open re/re_tests: $!";
	107	}
	108
	109	my @tests = <TESTS>;
	110
	111	close TESTS;
	112
	113	$bang = sprintf "\\%03o", ord "!"; # \41 would not be portable.
	114	$ffff = chr(0xff) x 2;
	115	$nulnul = "\0" x 2;
	116	my $OP = $qr ? 'qr' : 'm';
	117
	118	$\| = 1;
	119	printf "1..%d\n# $iters iterations\n", scalar @tests;
	120
	121	my $test;
	122	TEST:
	123	foreach (@tests) {
	124	$test++;
	125	if (!/\S/ \|\| /^\s*#/ \|\| /^__END__$/) {
	126	chomp;
	127	my ($not,$comment)= split /\s#\s/, $_, 2;
	128	$comment \|\|= "(blank line)";
	129	print "ok $test # $comment\n";
	130	next;
	131	}
	132	chomp;
	133	s/\\n/\n/g unless $regex_sets;
	134	my ($pat, $subject, $result, $repl, $expect, $reason, $comment) = split(/\t/,$_,7);
	135	if (!defined $subject) {
	136	die "Bad test definition on line $test: $_\n";
	137	}
	138	$reason = '' unless defined $reason;
	139	my $input = join(':',$pat,$subject,$result,$repl,$expect);
	140
	141	# the double '' below keeps simple syntax highlighters from going crazy
	142	$pat = "'$pat'" unless $pat =~ /^[:''\/]/;
	143	$pat =~ s/(\$\{\w+\})/$1/eeg;
	144	$pat =~ s/\\n/\n/g unless $regex_sets;
	145	$pat = convert_from_ascii($pat) if ord("A") != 65;
	146
	147	$subject = convert_from_ascii($subject) if ord("A") != 65;
	148	$subject = eval qq("$subject"); die $@ if $@;
	149
	150	$expect = convert_from_ascii($expect) if ord("A") != 65;
	151	$expect = eval qq("$expect"); die $@ if $@;
	152	$expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
	153
	154	my $todo_qr = $qr_embed_thr && ($result =~ s/t//);
	155	my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
	156	++$skip if $result =~ s/M// && !defined &DynaLoader::boot_DynaLoader;
	157	if ($result =~ s/ ( [Ss] ) //x) {
	158	if (($1 eq 'S' && $regex_sets) \|\| ($1 eq 's' && ! $regex_sets)) {
	159	$skip++;
	160	$reason = "Test not valid for $0";
	161	}
	162	}
	163	if ($result =~ s/a// && ord("A") != 65) {
	164	$skip++;
	165	$reason = "Test is only valid for ASCII platforms. $reason";
	166	}
	167	if ($result =~ s/e// && ord("A") != 193) {
	168	$skip++;
	169	$reason = "Test is only valid for EBCDIC platforms. $reason";
	170	}
	171	$reason = 'skipping $&' if $reason eq '' && $skip_amp;
	172	$result =~ s/B//i unless $skip;
	173	my $todo= $result =~ s/T// ? " # TODO" : "";
	174	my $testname= $test;
	175	if ($comment) {
	176	$comment=~s/^\s(?:#\s)?//;
	177	$testname .= " - $comment" if $comment;
	178	}
	179	if (! $skip && $regex_sets) {
	180
	181	# If testing regex sets, change the [bracketed] classes into
	182	# (?[bracketed]). But note that '\[' and '\c[' don't introduce such a
	183	# class. (We don't bother looking for an odd number of backslashes,
	184	# as this hasn't been needed so far.)
	185	if ($pat !~ / (?<!\\c) (?<!\\) \[ /x) {
	186	$skip++;
	187	$reason = "Pattern doesn't contain [brackets]";
	188	}
	189	else { # Use non-regex features of Perl to accomplish this.
	190	my $modified = "";
	191	my $in_brackets = 0;
	192
	193	# Go through the pattern character-by-character. We also add
	194	# blanks around each token to test the /x parts of (?[ ])
	195	my $pat_len = length($pat);
	196	CHAR: for (my $i = 0; $i < $pat_len; $i++) {
	197	my $curchar = substr($pat, $i, 1);
	198	if ($curchar eq '\\') {
	199	$modified .= " " if $in_brackets;
	200	$modified .= $curchar;
	201	$i++;
	202
	203	# Get the character the backslash is escaping
	204	$curchar = substr($pat, $i, 1);
	205	$modified .= $curchar;
	206
	207	# If the character following that is a '{}', treat the
	208	# entire amount as a single token
	209	if ($i < $pat_len -1 && substr($pat, $i+1, 1) eq '{') {
	210	my $j = index($pat, '}', $i+2);
	211	if ($j < 0) {
	212	last unless $in_brackets;
	213	if ($result eq 'c') {
	214	$skip++;
	215	$reason = "Can't handle compilation errors with unmatched '{'";
	216	}
	217	else {
	218	print "not ok $testname # Problem in $0; original = '$pat'; mod = '$modified'\n";
	219	next TEST;
	220	}
	221	}
	222	$modified .= substr($pat, $i+1, $j - $i);
	223	$i = $j;
	224	}
	225	elsif ($curchar eq 'x') {
	226
	227	# \x without brackets is supposed to be followed by 2
	228	# hex digits. Take up to 2, and then add a blank
	229	# after the last one. This avoids getting errors from
	230	# (?[ ]) for run-ons, like \xabc
	231	my $j = $i + 1;
	232	for (; $j < $i + 3 && $j < $pat_len; $j++) {
	233	my $curord = ord(substr($pat, $j, 1));
	234	if (!(($curord >= ord("A") && $curord <= ord("F"))
	235	\|\| ($curord >= ord("a") && $curord <= ord("f"))
	236	\|\| ($curord >= ord("0") && $curord <= ord("9"))))
	237	{
	238	$j++;
	239	last;
	240	}
	241	}
	242	$j--;
	243	$modified .= substr($pat, $i + 1, $j - $i) . " ";
	244	$i = $j;
	245	}
	246	elsif (ord($curchar) >= ord('0')
	247	&& (ord($curchar) <= ord('7')))
	248	{
	249	# Similarly, octal constants have up to 3 digits.
	250	my $j = $i + 1;
	251	for (; $j < $i + 3 && $j < $pat_len; $j++) {
	252	my $curord = ord(substr($pat, $j, 1));
	253	if (! ($curord >= ord("0") && $curord <= ord("7"))) {
	254	$j++;
	255	last;
	256	}
	257	}
	258	$j--;
	259	$modified .= substr($pat, $i + 1, $j - $i);
	260	$i = $j;
	261	}
	262
	263	next;
	264	} # End of processing a backslash sequence
	265
	266	if (! $in_brackets # Skip (?{ })
	267	&& $curchar eq '('
	268	&& $i < $pat_len - 2
	269	&& substr($pat, $i+1, 1) eq '?'
	270	&& substr($pat, $i+2, 1) eq '{')
	271	{
	272	$skip++;
	273	$reason = "Pattern contains '(?{'";
	274	last;
	275	}
	276
	277	# Closing ']'
	278	if ($curchar eq ']' && $in_brackets) {
	279	$modified .= " ] ])";
	280	$in_brackets = 0;
	281	next;
	282	}
	283
	284	# A regular character.
	285	if ($curchar ne '[') {
	286	if (! $in_brackets) {
	287	$modified .= $curchar;
	288	}
	289	else {
	290	$modified .= " $curchar ";
	291	}
	292	next;
	293	}
	294
	295	# Here is a '['; If not in a bracketed class, treat as the
	296	# beginning of one.
	297	if (! $in_brackets) {
	298	$in_brackets = 1;
	299	$modified .= "(?[ [ ";
	300
	301	# An immediately following ']' or '^]' is not the ending
	302	# of the class, but is to be treated literally.
	303	if ($i < $pat_len - 1
	304	&& substr($pat, $i+1, 1) eq ']')
	305	{
	306	$i ++;
	307	$modified .= " ] ";
	308	}
	309	elsif ($i < $pat_len - 2
	310	&& substr($pat, $i+1, 1) eq '^'
	311	&& substr($pat, $i+2, 1) eq ']')
	312	{
	313	$i += 2;
	314	$modified .= " ^ ] ";
	315	}
	316	next;
	317	}
	318
	319	# Here is a plain '[' within [ ]. Could mean wants to
	320	# match a '[', or it could be a posix class that has a
	321	# corresponding ']'. Absorb either
	322
	323	$modified .= ' [';
	324	last if $i >= $pat_len - 1;
	325
	326	$i++;
	327	$curchar = substr($pat, $i, 1);
	328	if ($curchar =~ /[:=.]/) {
	329	for (my $j = $i + 1; $j < $pat_len; $j++) {
	330	next unless substr($pat, $j, 1) eq ']';
	331	last if $j - $i < 2;
	332	if (substr($pat, $j - 1, 1) eq $curchar) {
	333	# Here, is a posix class
	334	$modified .= substr($pat, $i, $j - $i + 1) . " ";
	335	$i = $j;
	336	next CHAR;
	337	}
	338	}
	339	}
	340
	341	# Here wasn't a posix class, just process normally
	342	$modified .= " $curchar ";
	343	}
	344
	345	if ($in_brackets && ! $skip) {
	346	if ($result eq 'c') {
	347	$skip++;
	348	$reason = "Can't figure out where to put the (?[ and ]) since is a compilation error";
	349	}
	350	else {
	351	print "not ok $testname # Problem in $0; original = '$pat'; mod = '$modified'\n";
	352	next TEST;
	353	}
	354	}
	355
	356	# Use our modified pattern instead of the original
	357	$pat = $modified;
	358	}
	359	}
	360
	361	for my $study ('', 'study $subject', 'utf8::upgrade($subject)',
	362	'utf8::upgrade($subject); study $subject') {
	363	# Need to make a copy, else the utf8::upgrade of an already studied
	364	# scalar confuses things.
	365	my $subject = $subject;
	366	my $c = $iters;
	367	my ($code, $match, $got);
	368	if ($repl eq 'pos') {
	369	$code= <<EOFCODE;
	370	$study;
	371	pos(\$subject)=0;
	372	\$match = ( \$subject =~ m${pat}g );
	373	\$got = pos(\$subject);
	374	EOFCODE
	375	}
	376	elsif ($qr_embed) {
	377	$code= <<EOFCODE;
	378	my \$RE = qr$pat;
	379	$study;
	380	\$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--;
	381	\$got = "$repl";
	382	EOFCODE
	383	}
	384	elsif ($qr_embed_thr) {
	385	$code= <<EOFCODE;
	386	# Can't run the match in a subthread, but can do this and
	387	# clone the pattern the other way.
	388	my \$RE = threads->new(sub {qr$pat})->join();
	389	$study;
	390	\$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--;
	391	\$got = "$repl";
	392	EOFCODE
	393	}
	394	else {
	395	$code= <<EOFCODE;
	396	$study;
	397	\$match = (\$subject =~ $OP$pat) while \$c--;
	398	\$got = "$repl";
	399	EOFCODE
	400	}
	401	$code = "no warnings 'experimental::regex_sets';$code" if $regex_sets;
	402	#$code.=qq[\n\$expect="$expect";\n];
	403	#use Devel::Peek;
	404	#die Dump($code) if $pat=~/\\h/ and $subject=~/\x{A0}/;
	405	{
	406	# Probably we should annotate specific tests with which warnings
	407	# categories they're known to trigger, and hence should be
	408	# disabled just for that test
	409	no warnings qw(uninitialized regexp deprecated);
	410	eval $code;
	411	}
	412	chomp( my $err = $@ );
	413	if ( $skip ) {
	414	print "ok $testname # skipped", length($reason) ? ". $reason" : '', "\n";
	415	next TEST;
	416	}
	417	elsif ($result eq 'c') {
	418	if ($err !~ m!^\Q$expect!) { print "not ok $testname$todo (compile) $input => '$err'\n"; next TEST }
	419	last; # no need to study a syntax error
	420	}
	421	elsif ( $todo_qr ) {
	422	print "not ok $testname # TODO", length($reason) ? " - $reason" : '', "\n";
	423	next TEST;
	424	}
	425	elsif ($@) {
	426	print "not ok $testname$todo $input => error '$err'\n", _comment("$code\n$@\n"); next TEST;
	427	}
	428	elsif ($result =~ /^n/) {
	429	if ($match) { print "not ok $testname$todo ($study) $input => false positive\n"; next TEST }
	430	}
	431	else {
	432	if (!$match \|\| $got ne $expect) {
	433	eval { require Data::Dumper };
	434	no warnings "utf8"; # But handle should be utf8
	435	if ($@ \|\| !defined &DynaLoader::boot_DynaLoader) {
	436	# Data::Dumper will load on miniperl, but fail when used in
	437	# anger as it tries to load B. I'd prefer to keep the
	438	# regular calls below outside of an eval so that real
	439	# (unknown) failures get spotted, not ignored.
	440	print "not ok $testname$todo ($study) $input => '$got', match=$match\n", _comment("$code\n");
	441	}
	442	else { # better diagnostics
	443	my $s = Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump;
	444	my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump;
	445	my $e = Data::Dumper->new([$expect],['expected'])->Useqq(1)->Dump;
	446	print "not ok $testname$todo ($study) $input => '$got', match=$match\n", _comment("$s\n$code\n$g\n$e\n");
	447	}
	448	next TEST;
	449	}
	450	}
	451	}
	452	print "ok $testname$todo\n";
	453	}
	454
	455	1;