This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add t/re/anyof.t
[perl5.git] / t / re / anyof.t
CommitLineData
847612ec
KW
1use utf8;
2
3# This tests that the ANYOF nodes generated by bracketed character classes are
4# as expected. The representation of these is not guaranteed, and this test
5# may need to be updated if it changes. But it is here to make sure that no
6# unexpected changes occur. These could come from faulty generation of the
7# node, or faulty display of them (or both). Because these causes come from
8# very different parts of the regex compiler, it is unlikely that a commit
9# would change both of them, so this test will adequately serve to test both.
10
11BEGIN {
12 chdir 't' if -d 't';
13 @INC = ('../lib','.','../ext/re');
14 require Config; import Config;
15 require './test.pl';
16 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
17}
18
19# An array is used instead of a hash, so that the tests are carried out in the
20# order given by this file. Even-numbered indices are the regexes to compile.
21# The next higher element is the expected compilation result.
22#
23# It is painful to port some of these to EBCDIC, as not only do the code point
24# numbers change (for those < 256), but the order changes, as the compiled
25# version is sorted by native code point order. On EBCDIC, \r comes before
26# \n, and 'k' before "K', for example. So, the tests where there are
27# differences are skipped on EBCDIC. They are all at the beginning of the
28# array, and a special marker entry is used to delmit the boundary between
29# skipped and not skipped.
30
31my @tests = (
32 '[[{]' => 'ANYOF[[\{]',
33 '[^\n\r]' => 'ANYOF[^\n\r][0100-INFINITY]',
34 '[^\/\|,\$\%%\@\ \%"\<\>\:\#\&\*\{\}\[\]\(\)]' => 'ANYOF[^ "#$%&()*,/:<>@[\]\{|\}][0100-INFINITY]',
35 '[ [:blank:]]' => 'ANYOFD[\t {utf8}\xA0][1680 2000-200A 202F 205F 3000]',
36 '[\xA0[:^blank:]]' => 'ANYOF[^\t ][0100-167F 1681-1FFF 200B-202E 2030-205E 2060-2FFF 3001-INFINITY]',
37 '(?il:[\x{212A}])' => 'ANYOFL{i}[{utf8 locale}Kk][212A]',
38 '(?il:(?[\x{212A}]))' => 'ANYOFL{utf8-locale-reqd}[Kk][212A]',
39
40 'ebcdic_ok_below_this_marker',
41
42 '(?l:[\x{212A}])' => 'ANYOFL[212A]',
43 '(?l:[\s\x{212A}])' => 'ANYOFL[\s][1680 2000-200A 2028-2029 202F 205F 212A 3000]',
44 '(?l:[^\S\x{202F}])' => 'ANYOFL[^\\S][1680 2000-200A 2028-2029 205F 3000]',
45 '(?i:[^:])' => 'ANYOF[^:][0100-INFINITY]',
46);
47
48# 2**32-1 or 2**64-1
49my $highest_cp_string = "F" x (($Config{uvsize} < 8) ? 8 : 16);
50
51my $next_highest_cp_string = $highest_cp_string =~ s/ F $ /E/xr;
52
53my $highest_cp = "\\x{$highest_cp_string}";
54my $next_highest_cp = "\\x{$next_highest_cp_string}";
55
56plan(scalar (@tests - 1) / 2); # -1 because of the marker.
57
58my $skip_ebcdic = $::IS_EBCDIC;
59while (defined (my $test = shift @tests)) {
60
61 if ($test eq 'ebcdic_ok_below_this_marker') {
62 $skip_ebcdic = 0;
63 next;
64 }
65
66 my $expected = shift @tests;
67
68 SKIP: {
69 skip("test not ported to EBCDIC", 1) if $skip_ebcdic;
70
71 my $display_expected = $expected
72 =~ s/ INFINITY_minus_1 /$next_highest_cp/xgr;
73
74 # Convert platform-independent values to what is suitable for the
75 # platform
76 $test =~ s/{INFINITY}/$highest_cp/g;
77 $test =~ s/{INFINITY_minus_1}/$next_highest_cp/g;
78
79 $test = "qr/$test/";
80 my $actual_test = "use re qw(Debug COMPILE); $test";
81
82 my $result = fresh_perl($actual_test);
83 if ($? != 0) { # Re-run so as to display STDERR.
84 fail($test);
85 fresh_perl($actual_test, { stderr => 0, verbose => 1 });
86 next;
87 }
88
89 # The Debug output will come back as a bunch of lines. We are
90 # interested only in the line after /Final program/
91 my @lines = split /\n/, $result;
92 while (defined ($_ = shift @lines)) {
93 next unless /Final program/;
94 $_ = shift @lines;
95
96 s/ \s* \( \d+ \) \s* //x; # Get rid of the node branch
97 s/ ^ \s* \d+ : \s* //x; # ... And the node number
98
99 # Use platform-independent values
100 s/$highest_cp_string/INFINITY/g;
101 s/$next_highest_cp_string/INFINITY_minus_1/g;
102
103 is($_, $expected,
104 "Verify compilation of $test displays as $display_expected");
105 last; # Discard the rest of this test's output
106 }
107 }
108}