This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
handy.h: Change some macros to use new inRANGE
[perl5.git] / t / re / regex_sets.t
CommitLineData
9d1a5160
KW
1#!./perl
2
3# This tests (?[...]). XXX These are just basic tests, as full ones would be
4# best done with an infrastructure change to allow getting out the inversion
5# list of the constructed set and then comparing it character by character
6# with the expected result.
7
9d1a5160
KW
8BEGIN {
9 chdir 't' if -d 't';
9d1a5160 10 require './test.pl';
a0bd1a30
KW
11 require './charset_tools.pl';
12 require './loc_tools.pl';
624c42e2 13 set_up_inc( '../lib','.','../ext/re' );
9d1a5160
KW
14}
15
624c42e2
N
16skip_all_without_unicode_tables();
17
0b1b7115
JH
18use strict;
19use warnings;
20
21$| = 1;
22
9d1a5160
KW
23use utf8;
24no warnings 'experimental::regex_sets';
25
26like("a", qr/(?[ [a] # This is a comment
27 ])/, 'Can ignore a comment');
28like("a", qr/(?[ [a] # [[:notaclass:]]
29 ])/, 'A comment isn\'t parsed');
e9f74f49 30unlike(uni_to_native("\x85"), qr/(?[ \t\85 ])/, 'NEL is white space');
e9f74f49
KW
31like(uni_to_native("\x85"), qr/(?[ \t + \\85 ])/, 'can escape NEL to match');
32like(uni_to_native("\x85"), qr/(?[ [\\85] ])/, '... including within nested []');
9d1a5160
KW
33like("\t", qr/(?[ \t + \\85 ])/, 'can do basic union');
34like("\cK", qr/(?[ \s ])/, '\s matches \cK');
35unlike("\cK", qr/(?[ \s - \cK ])/, 'can do basic subtraction');
36like(" ", qr/(?[ \s - \cK ])/, 'can do basic subtraction');
37like(":", qr/(?[ [:] ])/, '[:] is not a posix class');
38unlike("\t", qr/(?[ ! \t ])/, 'can do basic complement');
39like("\t", qr/(?[ ! [ ^ \t ] ])/, 'can do basic complement');
40unlike("\r", qr/(?[ \t ])/, '\r doesn\'t match \t ');
41like("\r", qr/(?[ ! \t ])/, 'can do basic complement');
42like("0", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection');
43unlike("A", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection');
44like("0", qr/(?[[:word:]&[:digit:]])/, 'spaces around internal [] aren\'t required');
45
46like("a", qr/(?[ [a] | [b] ])/, '| means union');
47like("b", qr/(?[ [a] | [b] ])/, '| means union');
48unlike("c", qr/(?[ [a] | [b] ])/, '| means union');
49
50like("a", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works');
51unlike("b", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works');
52like("c", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works');
53
54like("2", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping');
55unlike("a", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping');
56
b5864679
KW
57unlike("\x{17f}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i doesn\'t affect \p{}');
58like("\N{KELVIN SIGN}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i does affect literals');
59
60my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/;
61my $thai_or_lao_digit = qr/(?[ \p{Digit} & $thai_or_lao ])/;
62like("\N{THAI DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
63unlike(chr(ord("\N{THAI DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
64like("\N{THAI DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
65unlike(chr(ord("\N{THAI DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
66like("\N{LAO DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
67unlike(chr(ord("\N{LAO DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
68like("\N{LAO DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
69unlike(chr(ord("\N{LAO DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
9d1a5160 70
589b831d 71my $ascii_word = qr/(?[ \w ])/a;
6798c95d 72my $ascii_digits_plus_all_of_arabic = qr/(?[ \p{Arabic} + \p{Digit} & $ascii_word ])/;
589b831d
KW
73like("9", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII in the set");
74unlike("A", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII not in the set");
75unlike("\N{BENGALI DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII not in either set");
76unlike("\N{BENGALI LETTER A}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII in one set");
6798c95d
KW
77like("\N{ARABIC LETTER HAMZA}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union");
78like("\N{EXTENDED ARABIC-INDIC DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union");
79
80like("\r", qr/(?[ \p{lb=cr} ])/, '\r matches \p{lb=cr}');
81unlike("\r", qr/(?[ ! \p{lb=cr} ])/, '\r doesnt match ! \p{lb=cr}');
82like("\r", qr/(?[ ! ! \p{lb=cr} ])/, 'Two ! ! are the original');
83unlike("\r", qr/(?[ ! ! ! \p{lb=cr} ])/, 'Three ! ! ! are the complement');
84# left associatve
589b831d
KW
85
86my $kelvin = qr/(?[ \N{KELVIN SIGN} ])/;
87my $fold = qr/(?[ $kelvin ])/i;
88like("\N{KELVIN SIGN}", $kelvin, '"\N{KELVIN SIGN}" matches compiled qr/(?[ \N{KELVIN SIGN} ])/');
89unlike("K", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one");
90unlike("k", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one");
91
92my $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i;
93my $still_fold = qr/(?[ $kelvin_fold ])/;
94like("K", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i");
95like("k", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i");
9d1a5160 96
1cb2b5d4
FC
97eval 'my $x = qr/(?[ [a] ])/; qr/(?[ $x ])/';
98is($@, "", 'qr/(?[ [a] ])/ can be interpolated');
99
ab87267c
KW
100like("B", qr/(?[ [B] | ! ( [^B] ) ])/, "[perl #125892]");
101
361446f1
KW
102like("a", qr/(?[ (?#comment) [a]])/, "Can have (?#comments)");
103
a0bd1a30
KW
104if (! is_miniperl() && locales_enabled('LC_CTYPE')) {
105 my $utf8_locale = find_utf8_ctype_locale;
106 SKIP: {
107 skip("No utf8 locale available on this platform", 8) unless $utf8_locale;
108
109 setlocale(&POSIX::LC_ALL, "C");
110 use locale;
111
112 $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i;
113 my $single_char_class = qr/(?[ \: ])/;
114
115 setlocale(&POSIX::LC_ALL, $utf8_locale);
116
117 like("\N{KELVIN SIGN}", $kelvin_fold,
118 '(?[ \N{KELVIN SIGN} ]) matches itself under /i in UTF8-locale');
119 like("K", $kelvin_fold,
120 '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in UTF8-locale');
121 like("k", $kelvin_fold,
122 '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in UTF8-locale');
123 like(":", $single_char_class,
124 '(?[ : ]) matches itself in UTF8-locale (a single character class)');
125
126 setlocale(&POSIX::LC_ALL, "C");
127
128 # These should generate warnings (the above 4 shouldn't), but like()
129 # suppresses them, so the warnings tests are in t/lib/warnings/regexec
d9f326ed
KW
130 $^W = 0; # Suppress the warnings that occur when run by hand with
131 # the -w option
a0bd1a30
KW
132 like("\N{KELVIN SIGN}", $kelvin_fold,
133 '(?[ \N{KELVIN SIGN} ]) matches itself under /i in C locale');
134 like("K", $kelvin_fold,
135 '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in C locale');
136 like("k", $kelvin_fold,
137 '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in C locale');
138 like(":", $single_char_class,
139 '(?[ : ]) matches itself in C locale (a single character class)');
140 }
141}
142
2d1a1cb3
KW
143# Tests that no warnings given for valid Unicode digit range.
144my $arabic_digits = qr/(?[ [ ٠ - ٩ ] ])/;
145for my $char ("٠", "٥", "٩") {
146 use charnames ();
147 my @got = capture_warnings(sub {
148 like("٠", $arabic_digits, "Matches "
149 . charnames::viacode(ord $char));
150 });
151 is (@got, 0, "... without warnings");
152}
153
4a84d6e8
VA
154# RT #126181: \cX behaves strangely inside (?[])
155{
156 no warnings qw(syntax regexp);
157
158 eval { $_ = '/(?[(\c]) /'; qr/$_/ };
159 like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
160 eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ };
19a498a4 161 like($@, qr/^Unexpected/, '/(?[(\c]) / should not panic');
4a84d6e8
VA
162 eval { $_ = '(?[(\c])'; qr/$_/ };
163 like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error');
164 eval { $_ = '(?[(\c]) ]\b'; qr/$_/ };
19a498a4 165 like($@, qr/^Unexpected/, '/(?[(\c]) ]\b/ should be a syntax error');
4a84d6e8 166 eval { $_ = '(?[\c[]](])'; qr/$_/ };
19a498a4 167 like($@, qr/^Unexpected/, '/(?[\c[]](])/ should be a syntax error');
583bfb0c 168 like("\c#", qr/(?[\c#])/, '\c# should match itself');
4a84d6e8
VA
169 like("\c[", qr/(?[\c[])/, '\c[ should match itself');
170 like("\c\ ", qr/(?[\c\])/, '\c\ should match itself');
171 like("\c]", qr/(?[\c]])/, '\c] should match itself');
172}
a0bd1a30 173
a82f4918
KW
174# RT #126481 !! with syntax error panics
175{
176 fresh_perl_like('no warnings "experimental::regex_sets"; qr/(?[ ! ! (\w])/',
177 qr/^Unmatched \(/, {},
178 'qr/(?[ ! ! (\w])/ doesnt panic');
624c42e2 179
a82f4918
KW
180 # The following didn't panic before, but easy to add this here with a
181 # paren between the !!
182 fresh_perl_like('no warnings "experimental::regex_sets";qr/(?[ ! ( ! (\w)])/',
183 qr/^Unmatched \(/, {},
184 'qr/qr/(?[ ! ( ! (\w)])/');
185}
186
c333712c
KW
187{ # RT #129122
188 my $pat = '(?[ ( [ABC] - [B] ) + ( [abc] - [b] ) + [def] ])';
189 like("A", qr/$pat/, "'A' matches /$pat/");
190 unlike("B", qr/$pat/, "'B' doesn't match /$pat/");
191 like("C", qr/$pat/, "'C' matches /$pat/");
192 unlike("D", qr/$pat/, "'D' doesn't match /$pat/");
193 like("a", qr/$pat/, "'a' matches /$pat/");
194 unlike("b", qr/$pat/, "'b' doesn't match /$pat/");
195 like("c", qr/$pat/, "'c' matches /$pat/");
196 like("d", qr/$pat/, "'d' matches /$pat/");
197 like("e", qr/$pat/, "'e' matches /$pat/");
198 like("f", qr/$pat/, "'f' matches /$pat/");
199 unlike("g", qr/$pat/, "'g' doesn't match /$pat/");
200}
201
a5540cf9
KW
202{ # [perl #129322 ] This crashed perl, so keep after the ones that don't
203 my $pat = '(?[[!]&[0]^[!]&[0]+[a]])';
204 like("a", qr/$pat/, "/$pat/ compiles and matches 'a'");
205}
206
8121278a
KW
207{ # [perl #132167]
208 fresh_perl_is('no warnings "experimental::regex_sets";
209 print "c" =~ qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ])/;',
210 1, {},
211 'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ]) compiles and properly matches');
212 fresh_perl_is('no warnings "experimental::regex_sets";
213 print "b" =~ qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ])/;',
214 "", {},
215 'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ]) compiles and properly matches');
216}
217
9d1a5160
KW
218done_testing();
219
2201;