This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Improve -Dr output
[perl5.git] / t / re / anyof.t
CommitLineData
847612ec
KW
1use utf8;
2
3# This tests that the ANYOF nodes generated by bracketed character classes are
4# as expected. The representation of these is not guaranteed, and this test
5# may need to be updated if it changes. But it is here to make sure that no
6# unexpected changes occur. These could come from faulty generation of the
7# node, or faulty display of them (or both). Because these causes come from
8# very different parts of the regex compiler, it is unlikely that a commit
9# would change both of them, so this test will adequately serve to test both.
10
11BEGIN {
12 chdir 't' if -d 't';
13 @INC = ('../lib','.','../ext/re');
14 require Config; import Config;
15 require './test.pl';
16 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
17}
18
19# An array is used instead of a hash, so that the tests are carried out in the
20# order given by this file. Even-numbered indices are the regexes to compile.
21# The next higher element is the expected compilation result.
22#
23# It is painful to port some of these to EBCDIC, as not only do the code point
24# numbers change (for those < 256), but the order changes, as the compiled
25# version is sorted by native code point order. On EBCDIC, \r comes before
26# \n, and 'k' before "K', for example. So, the tests where there are
27# differences are skipped on EBCDIC. They are all at the beginning of the
28# array, and a special marker entry is used to delmit the boundary between
29# skipped and not skipped.
30
31my @tests = (
32 '[[{]' => 'ANYOF[[\{]',
33 '[^\n\r]' => 'ANYOF[^\n\r][0100-INFINITY]',
34 '[^\/\|,\$\%%\@\ \%"\<\>\:\#\&\*\{\}\[\]\(\)]' => 'ANYOF[^ "#$%&()*,/:<>@[\]\{|\}][0100-INFINITY]',
35 '[ [:blank:]]' => 'ANYOFD[\t {utf8}\xA0][1680 2000-200A 202F 205F 3000]',
d555b9dd 36 '[_[:^blank:]]' => 'ANYOFD[^\t {utf8}\xA0][0100-167F 1681-1FFF 200B-202E 2030-205E 2060-2FFF 3001-INFINITY]',
847612ec 37 '[\xA0[:^blank:]]' => 'ANYOF[^\t ][0100-167F 1681-1FFF 200B-202E 2030-205E 2060-2FFF 3001-INFINITY]',
d555b9dd
KW
38 '[ [:blank:]]' => 'ANYOFD[\t {utf8}\xA0][1680 2000-200A 202F 205F 3000]',
39 '[_[:^blank:]]' => 'ANYOFD[^\t {utf8}\xA0][0100-167F 1681-1FFF 200B-202E 2030-205E 2060-2FFF 3001-INFINITY]',
40 '[\xA0[:^blank:]]' => 'ANYOF[^\t ][0100-167F 1681-1FFF 200B-202E 2030-205E 2060-2FFF 3001-INFINITY]',
41 '(?d:[_[:^blank:]])' => 'ANYOFD[^\t {utf8}\xA0][0100-167F 1681-1FFF 200B-202E 2030-205E 2060-2FFF 3001-INFINITY]',
58f79e73 42 '[\x{07}-\x{0B}]' => 'ANYOF[\a\b\t\n\x0B]',
847612ec
KW
43 '(?il:[\x{212A}])' => 'ANYOFL{i}[{utf8 locale}Kk][212A]',
44 '(?il:(?[\x{212A}]))' => 'ANYOFL{utf8-locale-reqd}[Kk][212A]',
45
46 'ebcdic_ok_below_this_marker',
47
48 '(?l:[\x{212A}])' => 'ANYOFL[212A]',
49 '(?l:[\s\x{212A}])' => 'ANYOFL[\s][1680 2000-200A 2028-2029 202F 205F 212A 3000]',
50 '(?l:[^\S\x{202F}])' => 'ANYOFL[^\\S][1680 2000-200A 2028-2029 205F 3000]',
51 '(?i:[^:])' => 'ANYOF[^:][0100-INFINITY]',
ecfe5375
KW
52 '[\p{Any}]' => 'ANYOF[\x00-\xFF][0100-10FFFF]',
53 '[\p{IsMyRuntimeProperty}]' => 'ANYOF[+utf8::IsMyRuntimeProperty]',
54 '[^\p{IsMyRuntimeProperty}]' => 'ANYOF[^{+utf8::IsMyRuntimeProperty}]',
55 '[a\p{IsMyRuntimeProperty}]' => 'ANYOF[a][+utf8::IsMyRuntimeProperty]',
56 '[^a\p{IsMyRuntimeProperty}]' => 'ANYOF[^a{+utf8::IsMyRuntimeProperty}]',
57 '[^a\x{100}\p{IsMyRuntimeProperty}]' => 'ANYOF[^a{+utf8::IsMyRuntimeProperty}0100]',
847612ec
KW
58);
59
60# 2**32-1 or 2**64-1
61my $highest_cp_string = "F" x (($Config{uvsize} < 8) ? 8 : 16);
62
63my $next_highest_cp_string = $highest_cp_string =~ s/ F $ /E/xr;
64
65my $highest_cp = "\\x{$highest_cp_string}";
66my $next_highest_cp = "\\x{$next_highest_cp_string}";
67
68plan(scalar (@tests - 1) / 2); # -1 because of the marker.
69
70my $skip_ebcdic = $::IS_EBCDIC;
71while (defined (my $test = shift @tests)) {
72
73 if ($test eq 'ebcdic_ok_below_this_marker') {
74 $skip_ebcdic = 0;
75 next;
76 }
77
78 my $expected = shift @tests;
79
80 SKIP: {
81 skip("test not ported to EBCDIC", 1) if $skip_ebcdic;
82
83 my $display_expected = $expected
84 =~ s/ INFINITY_minus_1 /$next_highest_cp/xgr;
85
86 # Convert platform-independent values to what is suitable for the
87 # platform
88 $test =~ s/{INFINITY}/$highest_cp/g;
89 $test =~ s/{INFINITY_minus_1}/$next_highest_cp/g;
90
91 $test = "qr/$test/";
92 my $actual_test = "use re qw(Debug COMPILE); $test";
93
94 my $result = fresh_perl($actual_test);
95 if ($? != 0) { # Re-run so as to display STDERR.
96 fail($test);
97 fresh_perl($actual_test, { stderr => 0, verbose => 1 });
98 next;
99 }
100
101 # The Debug output will come back as a bunch of lines. We are
102 # interested only in the line after /Final program/
103 my @lines = split /\n/, $result;
104 while (defined ($_ = shift @lines)) {
105 next unless /Final program/;
106 $_ = shift @lines;
107
108 s/ \s* \( \d+ \) \s* //x; # Get rid of the node branch
109 s/ ^ \s* \d+ : \s* //x; # ... And the node number
110
111 # Use platform-independent values
112 s/$highest_cp_string/INFINITY/g;
113 s/$next_highest_cp_string/INFINITY_minus_1/g;
114
115 is($_, $expected,
116 "Verify compilation of $test displays as $display_expected");
117 last; # Discard the rest of this test's output
118 }
119 }
120}