Commit | Line | Data |
---|---|---|
9d1a5160 KW |
1 | #!./perl |
2 | ||
3 | # This tests (?[...]). XXX These are just basic tests, as full ones would be | |
4 | # best done with an infrastructure change to allow getting out the inversion | |
5 | # list of the constructed set and then comparing it character by character | |
6 | # with the expected result. | |
7 | ||
9d1a5160 KW |
8 | BEGIN { |
9 | chdir 't' if -d 't'; | |
9d1a5160 | 10 | require './test.pl'; |
a0bd1a30 KW |
11 | require './charset_tools.pl'; |
12 | require './loc_tools.pl'; | |
624c42e2 | 13 | set_up_inc( '../lib','.','../ext/re' ); |
9d1a5160 KW |
14 | } |
15 | ||
624c42e2 N |
16 | skip_all_without_unicode_tables(); |
17 | ||
0b1b7115 JH |
18 | use strict; |
19 | use warnings; | |
20 | ||
21 | $| = 1; | |
22 | ||
9d1a5160 KW |
23 | use utf8; |
24 | no warnings 'experimental::regex_sets'; | |
25 | ||
26 | like("a", qr/(?[ [a] # This is a comment | |
27 | ])/, 'Can ignore a comment'); | |
28 | like("a", qr/(?[ [a] # [[:notaclass:]] | |
29 | ])/, 'A comment isn\'t parsed'); | |
e9f74f49 | 30 | unlike(uni_to_native("\x85"), qr/(?[ \t\85 ])/, 'NEL is white space'); |
e9f74f49 KW |
31 | like(uni_to_native("\x85"), qr/(?[ \t + \\85 ])/, 'can escape NEL to match'); |
32 | like(uni_to_native("\x85"), qr/(?[ [\\85] ])/, '... including within nested []'); | |
9d1a5160 KW |
33 | like("\t", qr/(?[ \t + \\85 ])/, 'can do basic union'); |
34 | like("\cK", qr/(?[ \s ])/, '\s matches \cK'); | |
35 | unlike("\cK", qr/(?[ \s - \cK ])/, 'can do basic subtraction'); | |
36 | like(" ", qr/(?[ \s - \cK ])/, 'can do basic subtraction'); | |
37 | like(":", qr/(?[ [:] ])/, '[:] is not a posix class'); | |
38 | unlike("\t", qr/(?[ ! \t ])/, 'can do basic complement'); | |
39 | like("\t", qr/(?[ ! [ ^ \t ] ])/, 'can do basic complement'); | |
40 | unlike("\r", qr/(?[ \t ])/, '\r doesn\'t match \t '); | |
41 | like("\r", qr/(?[ ! \t ])/, 'can do basic complement'); | |
42 | like("0", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection'); | |
43 | unlike("A", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection'); | |
44 | like("0", qr/(?[[:word:]&[:digit:]])/, 'spaces around internal [] aren\'t required'); | |
45 | ||
46 | like("a", qr/(?[ [a] | [b] ])/, '| means union'); | |
47 | like("b", qr/(?[ [a] | [b] ])/, '| means union'); | |
48 | unlike("c", qr/(?[ [a] | [b] ])/, '| means union'); | |
49 | ||
50 | like("a", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works'); | |
51 | unlike("b", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works'); | |
52 | like("c", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works'); | |
53 | ||
54 | like("2", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping'); | |
55 | unlike("a", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping'); | |
56 | ||
b5864679 KW |
57 | unlike("\x{17f}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i doesn\'t affect \p{}'); |
58 | like("\N{KELVIN SIGN}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i does affect literals'); | |
59 | ||
60 | my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/; | |
61 | my $thai_or_lao_digit = qr/(?[ \p{Digit} & $thai_or_lao ])/; | |
62 | like("\N{THAI DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
63 | unlike(chr(ord("\N{THAI DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
64 | like("\N{THAI DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
65 | unlike(chr(ord("\N{THAI DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
66 | like("\N{LAO DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
67 | unlike(chr(ord("\N{LAO DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
68 | like("\N{LAO DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
69 | unlike(chr(ord("\N{LAO DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); | |
9d1a5160 | 70 | |
589b831d | 71 | my $ascii_word = qr/(?[ \w ])/a; |
6798c95d | 72 | my $ascii_digits_plus_all_of_arabic = qr/(?[ \p{Arabic} + \p{Digit} & $ascii_word ])/; |
589b831d KW |
73 | like("9", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII in the set"); |
74 | unlike("A", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII not in the set"); | |
75 | unlike("\N{BENGALI DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII not in either set"); | |
76 | unlike("\N{BENGALI LETTER A}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII in one set"); | |
6798c95d KW |
77 | like("\N{ARABIC LETTER HAMZA}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union"); |
78 | like("\N{EXTENDED ARABIC-INDIC DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union"); | |
79 | ||
80 | like("\r", qr/(?[ \p{lb=cr} ])/, '\r matches \p{lb=cr}'); | |
81 | unlike("\r", qr/(?[ ! \p{lb=cr} ])/, '\r doesnt match ! \p{lb=cr}'); | |
82 | like("\r", qr/(?[ ! ! \p{lb=cr} ])/, 'Two ! ! are the original'); | |
83 | unlike("\r", qr/(?[ ! ! ! \p{lb=cr} ])/, 'Three ! ! ! are the complement'); | |
84 | # left associatve | |
589b831d KW |
85 | |
86 | my $kelvin = qr/(?[ \N{KELVIN SIGN} ])/; | |
87 | my $fold = qr/(?[ $kelvin ])/i; | |
88 | like("\N{KELVIN SIGN}", $kelvin, '"\N{KELVIN SIGN}" matches compiled qr/(?[ \N{KELVIN SIGN} ])/'); | |
89 | unlike("K", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one"); | |
90 | unlike("k", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one"); | |
91 | ||
92 | my $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i; | |
93 | my $still_fold = qr/(?[ $kelvin_fold ])/; | |
94 | like("K", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i"); | |
95 | like("k", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i"); | |
9d1a5160 | 96 | |
1cb2b5d4 FC |
97 | eval 'my $x = qr/(?[ [a] ])/; qr/(?[ $x ])/'; |
98 | is($@, "", 'qr/(?[ [a] ])/ can be interpolated'); | |
99 | ||
ab87267c KW |
100 | like("B", qr/(?[ [B] | ! ( [^B] ) ])/, "[perl #125892]"); |
101 | ||
361446f1 KW |
102 | like("a", qr/(?[ (?#comment) [a]])/, "Can have (?#comments)"); |
103 | ||
a0bd1a30 KW |
104 | if (! is_miniperl() && locales_enabled('LC_CTYPE')) { |
105 | my $utf8_locale = find_utf8_ctype_locale; | |
106 | SKIP: { | |
107 | skip("No utf8 locale available on this platform", 8) unless $utf8_locale; | |
108 | ||
109 | setlocale(&POSIX::LC_ALL, "C"); | |
110 | use locale; | |
111 | ||
112 | $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i; | |
113 | my $single_char_class = qr/(?[ \: ])/; | |
114 | ||
115 | setlocale(&POSIX::LC_ALL, $utf8_locale); | |
116 | ||
117 | like("\N{KELVIN SIGN}", $kelvin_fold, | |
118 | '(?[ \N{KELVIN SIGN} ]) matches itself under /i in UTF8-locale'); | |
119 | like("K", $kelvin_fold, | |
120 | '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in UTF8-locale'); | |
121 | like("k", $kelvin_fold, | |
122 | '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in UTF8-locale'); | |
123 | like(":", $single_char_class, | |
124 | '(?[ : ]) matches itself in UTF8-locale (a single character class)'); | |
125 | ||
126 | setlocale(&POSIX::LC_ALL, "C"); | |
127 | ||
128 | # These should generate warnings (the above 4 shouldn't), but like() | |
129 | # suppresses them, so the warnings tests are in t/lib/warnings/regexec | |
d9f326ed KW |
130 | $^W = 0; # Suppress the warnings that occur when run by hand with |
131 | # the -w option | |
a0bd1a30 KW |
132 | like("\N{KELVIN SIGN}", $kelvin_fold, |
133 | '(?[ \N{KELVIN SIGN} ]) matches itself under /i in C locale'); | |
134 | like("K", $kelvin_fold, | |
135 | '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in C locale'); | |
136 | like("k", $kelvin_fold, | |
137 | '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in C locale'); | |
138 | like(":", $single_char_class, | |
139 | '(?[ : ]) matches itself in C locale (a single character class)'); | |
140 | } | |
141 | } | |
142 | ||
2d1a1cb3 KW |
143 | # Tests that no warnings given for valid Unicode digit range. |
144 | my $arabic_digits = qr/(?[ [ ٠ - ٩ ] ])/; | |
145 | for my $char ("٠", "٥", "٩") { | |
146 | use charnames (); | |
147 | my @got = capture_warnings(sub { | |
148 | like("٠", $arabic_digits, "Matches " | |
149 | . charnames::viacode(ord $char)); | |
150 | }); | |
151 | is (@got, 0, "... without warnings"); | |
152 | } | |
153 | ||
4a84d6e8 VA |
154 | # RT #126181: \cX behaves strangely inside (?[]) |
155 | { | |
156 | no warnings qw(syntax regexp); | |
157 | ||
158 | eval { $_ = '/(?[(\c]) /'; qr/$_/ }; | |
159 | like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic'); | |
160 | eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ }; | |
19a498a4 | 161 | like($@, qr/^Unexpected/, '/(?[(\c]) / should not panic'); |
4a84d6e8 VA |
162 | eval { $_ = '(?[(\c])'; qr/$_/ }; |
163 | like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error'); | |
164 | eval { $_ = '(?[(\c]) ]\b'; qr/$_/ }; | |
19a498a4 | 165 | like($@, qr/^Unexpected/, '/(?[(\c]) ]\b/ should be a syntax error'); |
4a84d6e8 | 166 | eval { $_ = '(?[\c[]](])'; qr/$_/ }; |
19a498a4 | 167 | like($@, qr/^Unexpected/, '/(?[\c[]](])/ should be a syntax error'); |
583bfb0c | 168 | like("\c#", qr/(?[\c#])/, '\c# should match itself'); |
4a84d6e8 VA |
169 | like("\c[", qr/(?[\c[])/, '\c[ should match itself'); |
170 | like("\c\ ", qr/(?[\c\])/, '\c\ should match itself'); | |
171 | like("\c]", qr/(?[\c]])/, '\c] should match itself'); | |
172 | } | |
a0bd1a30 | 173 | |
a82f4918 KW |
174 | # RT #126481 !! with syntax error panics |
175 | { | |
176 | fresh_perl_like('no warnings "experimental::regex_sets"; qr/(?[ ! ! (\w])/', | |
177 | qr/^Unmatched \(/, {}, | |
178 | 'qr/(?[ ! ! (\w])/ doesnt panic'); | |
624c42e2 | 179 | |
a82f4918 KW |
180 | # The following didn't panic before, but easy to add this here with a |
181 | # paren between the !! | |
182 | fresh_perl_like('no warnings "experimental::regex_sets";qr/(?[ ! ( ! (\w)])/', | |
183 | qr/^Unmatched \(/, {}, | |
184 | 'qr/qr/(?[ ! ( ! (\w)])/'); | |
185 | } | |
186 | ||
c333712c KW |
187 | { # RT #129122 |
188 | my $pat = '(?[ ( [ABC] - [B] ) + ( [abc] - [b] ) + [def] ])'; | |
189 | like("A", qr/$pat/, "'A' matches /$pat/"); | |
190 | unlike("B", qr/$pat/, "'B' doesn't match /$pat/"); | |
191 | like("C", qr/$pat/, "'C' matches /$pat/"); | |
192 | unlike("D", qr/$pat/, "'D' doesn't match /$pat/"); | |
193 | like("a", qr/$pat/, "'a' matches /$pat/"); | |
194 | unlike("b", qr/$pat/, "'b' doesn't match /$pat/"); | |
195 | like("c", qr/$pat/, "'c' matches /$pat/"); | |
196 | like("d", qr/$pat/, "'d' matches /$pat/"); | |
197 | like("e", qr/$pat/, "'e' matches /$pat/"); | |
198 | like("f", qr/$pat/, "'f' matches /$pat/"); | |
199 | unlike("g", qr/$pat/, "'g' doesn't match /$pat/"); | |
200 | } | |
201 | ||
a5540cf9 KW |
202 | { # [perl #129322 ] This crashed perl, so keep after the ones that don't |
203 | my $pat = '(?[[!]&[0]^[!]&[0]+[a]])'; | |
204 | like("a", qr/$pat/, "/$pat/ compiles and matches 'a'"); | |
205 | } | |
206 | ||
8121278a KW |
207 | { # [perl #132167] |
208 | fresh_perl_is('no warnings "experimental::regex_sets"; | |
209 | print "c" =~ qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ])/;', | |
210 | 1, {}, | |
211 | 'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ]) compiles and properly matches'); | |
212 | fresh_perl_is('no warnings "experimental::regex_sets"; | |
213 | print "b" =~ qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ])/;', | |
214 | "", {}, | |
215 | 'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ]) compiles and properly matches'); | |
216 | } | |
217 | ||
9d1a5160 KW |
218 | done_testing(); |
219 | ||
220 | 1; |