Commit | Line | Data |
---|---|---|
c8e3bb4c | 1 | # tr.t |
f4240379 | 2 | $|=1; |
c8e3bb4c | 3 | |
f05dd7cc JH |
4 | BEGIN { |
5 | chdir 't' if -d 't'; | |
953ab6e5 | 6 | require './test.pl'; |
43ece5b1 | 7 | set_up_inc('../lib'); |
47918419 JH |
8 | if (is_miniperl()) { |
9 | eval 'require utf8'; | |
caab73c0 | 10 | if ($@) { skip_all("miniperl, no 'utf8'") } |
47918419 | 11 | } |
f05dd7cc | 12 | } |
a5095b95 | 13 | |
47918419 | 14 | use utf8; |
33042aaf | 15 | require Config; |
47918419 | 16 | |
9f31bc5d | 17 | plan tests => 315; |
953ab6e5 | 18 | |
f605e527 FC |
19 | # Test this first before we extend the stack with other operations. |
20 | # This caused an asan failure due to a bad write past the end of the stack. | |
21 | eval { my $x; die 1..127, $x =~ y/// }; | |
22 | ||
c8e3bb4c GS |
23 | $_ = "abcdefghijklmnopqrstuvwxyz"; |
24 | ||
25 | tr/a-z/A-Z/; | |
26 | ||
953ab6e5 | 27 | is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 'uc'); |
c8e3bb4c GS |
28 | |
29 | tr/A-Z/a-z/; | |
30 | ||
953ab6e5 | 31 | is($_, "abcdefghijklmnopqrstuvwxyz", 'lc'); |
c8e3bb4c GS |
32 | |
33 | tr/b-y/B-Y/; | |
953ab6e5 | 34 | is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc'); |
c8e3bb4c | 35 | |
8efef67c KW |
36 | tr/a-a/AB/; |
37 | is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYz", 'single char range a-a'); | |
38 | ||
f4240379 KW |
39 | eval 'tr/a/\N{KATAKANA LETTER AINU P}/;'; |
40 | like $@, | |
92e8e650 | 41 | qr/\\N\{KATAKANA LETTER AINU P\} must not be a named sequence in transliteration operator/, |
f4240379 KW |
42 | "Illegal to tr/// named sequence"; |
43 | ||
44 | eval 'tr/\x{101}-\x{100}//;'; | |
45 | like $@, | |
92e8e650 | 46 | qr/Invalid range "\\x\{0101\}-\\x\{0100\}" in transliteration operator/, |
f4240379 KW |
47 | "UTF-8 range with min > max"; |
48 | ||
79f0ed31 KW |
49 | $_ = "0123456789"; |
50 | tr/10/01/; | |
51 | is($_, "1023456789", 'swapping 0 and 1'); | |
52 | tr/01/10/; | |
53 | is($_, "0123456789", 'swapping 0 and 1'); | |
33252809 DM |
54 | |
55 | # Test /c and variants, with all the search and replace chars being | |
56 | # non-utf8, but with both non-utf8 and utf8 strings. | |
57 | ||
79f0ed31 | 58 | SKIP: { |
33252809 DM |
59 | my $all255 = join '', map chr, 0..0xff; |
60 | my $all255_twice = join '', map chr, map { ($_, $_) } 0..0xff; | |
79f0ed31 KW |
61 | my $plus = join '', map chr, 0x100..0x11f; |
62 | my $plus_twice = join '', map chr, map { ($_, $_) } 0x100..0x11f; | |
63 | my $all255_plus = $all255 . $plus; | |
64 | my $all255_twice_plus = $all255_twice . $plus_twice; | |
33252809 DM |
65 | my ($c, $s); |
66 | ||
33252809 DM |
67 | # length(replacement) == 0 |
68 | # non-utf8 string | |
69 | ||
70 | $s = $all255; | |
71 | $c = $s =~ tr/\x40-\xbf//c; | |
72 | is $s, $all255, "/c ==0"; | |
73 | is $c, 0x80, "/c ==0 count"; | |
74 | ||
75 | $s = $all255; | |
76 | $c = $s =~ tr/\x40-\xbf//cd; | |
79f0ed31 | 77 | is $s, join('', map chr, 0x40.. 0xbf), "/cd ==0"; |
33252809 DM |
78 | is $c, 0x80, "/cd ==0 count"; |
79 | ||
80 | $s = $all255_twice; | |
81 | $c = $s =~ tr/\x40-\xbf//cs; | |
82 | is $s, join('', map chr, | |
83 | 0x00..0x3f, | |
84 | (map { ($_, $_) } 0x40..0xbf), | |
85 | 0xc0..0xff, | |
86 | ), | |
87 | "/cs ==0"; | |
88 | is $c, 0x100, "/cs ==0 count"; | |
89 | ||
90 | $s = $all255_twice; | |
91 | $c = $s =~ tr/\x40-\xbf//csd; | |
92 | is $s, join('', map chr, (map { ($_, $_) } 0x40..0xbf)), "/csd ==0"; | |
93 | is $c, 0x100, "/csd ==0 count"; | |
94 | ||
95 | ||
96 | # length(search) > length(replacement) | |
97 | # non-utf8 string | |
98 | ||
99 | $s = $all255; | |
100 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/c; | |
101 | is $s, join('', map chr, | |
102 | 0x80..0xbf, | |
103 | 0x40..0xbf, | |
104 | 0x00..0x2f, | |
105 | ((0x2f) x 16), | |
106 | ), | |
107 | "/c >"; | |
108 | is $c, 0x80, "/c > count"; | |
109 | ||
110 | $s = $all255; | |
111 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cd; | |
112 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f), | |
113 | "/cd >"; | |
114 | is $c, 0x80, "/cd > count"; | |
115 | ||
116 | $s = $all255_twice; | |
117 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cs; | |
118 | is $s, join('', map chr, | |
119 | 0x80..0xbf, | |
120 | (map { ($_, $_) } 0x40..0xbf), | |
121 | 0x00..0x2f, | |
122 | ), | |
123 | "/cs >"; | |
124 | is $c, 0x100, "/cs > count"; | |
125 | ||
126 | $s = $all255_twice; | |
127 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/csd; | |
128 | is $s, join('', map chr, | |
129 | 0x80..0xbf, | |
130 | (map { ($_, $_) } 0x40..0xbf), | |
131 | 0x00..0x2f, | |
132 | ), | |
133 | "/csd >"; | |
134 | is $c, 0x100, "/csd > count"; | |
135 | ||
136 | ||
137 | # length(search) == length(replacement) | |
138 | # non-utf8 string | |
139 | ||
140 | $s = $all255; | |
141 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/c; | |
142 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/c =="; | |
143 | is $c, 0x80, "/c == count"; | |
144 | ||
145 | $s = $all255; | |
146 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cd; | |
147 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/cd =="; | |
148 | is $c, 0x80, "/cd == count"; | |
149 | ||
150 | $s = $all255_twice; | |
151 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cs; | |
152 | is $s, join('', map chr, | |
153 | 0x80..0xbf, | |
154 | (map { ($_, $_) } 0x40..0xbf), | |
155 | 0x00..0x3f, | |
156 | ), | |
157 | "/cs =="; | |
158 | is $c, 0x100, "/cs == count"; | |
159 | ||
160 | $s = $all255_twice; | |
161 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/csd; | |
162 | is $s, join('', map chr, | |
163 | 0x80..0xbf, | |
164 | (map { ($_, $_) } 0x40..0xbf), | |
165 | 0x00..0x3f, | |
166 | ), | |
167 | "/csd =="; | |
168 | is $c, 0x100, "/csd == count"; | |
169 | ||
170 | # length(search) == length(replacement) - 1 | |
171 | # non-utf8 string | |
172 | ||
173 | ||
174 | $s = $all255; | |
175 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/c; | |
176 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff), | |
177 | "/c =-"; | |
178 | is $c, 0x70, "/c =- count"; | |
179 | ||
180 | $s = $all255; | |
181 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/cd; | |
182 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff), | |
183 | "/cd =-"; | |
184 | is $c, 0x70, "/cd =- count"; | |
185 | ||
186 | $s = $all255_twice; | |
187 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/cs; | |
188 | is $s, join('', map chr, | |
189 | 0x80..0xbf, | |
190 | (map { ($_, $_) } 0x40..0xbf), | |
191 | 0x00..0x2f, | |
192 | (map { ($_, $_) } 0xf0..0xff), | |
193 | ), | |
194 | "/cs =-"; | |
195 | is $c, 0xe0, "/cs =- count"; | |
196 | ||
197 | $s = $all255_twice; | |
198 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/csd; | |
199 | is $s, join('', map chr, | |
200 | 0x80..0xbf, | |
201 | (map { ($_, $_) } 0x40..0xbf), | |
202 | 0x00..0x2f, | |
203 | (map { ($_, $_) } 0xf0..0xff), | |
204 | ), | |
205 | "/csd =-"; | |
206 | is $c, 0xe0, "/csd =- count"; | |
207 | ||
208 | # length(search) < length(replacement) | |
209 | # non-utf8 string | |
210 | ||
211 | $s = $all255; | |
212 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/c; | |
213 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff), | |
214 | "/c <"; | |
215 | is $c, 0x70, "/c < count"; | |
216 | ||
217 | $s = $all255; | |
218 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cd; | |
219 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff), | |
220 | "/cd <"; | |
221 | is $c, 0x70, "/cd < count"; | |
222 | ||
223 | $s = $all255_twice; | |
224 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cs; | |
225 | is $s, join('', map chr, | |
226 | 0x80..0xbf, | |
227 | (map { ($_, $_) } 0x40..0xbf), | |
228 | 0x00..0x2f, | |
229 | (map { ($_, $_) } 0xf0..0xff), | |
230 | ), | |
231 | "/cs <"; | |
232 | is $c, 0xe0, "/cs < count"; | |
233 | ||
234 | $s = $all255_twice; | |
235 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/csd; | |
236 | is $s, join('', map chr, | |
237 | 0x80..0xbf, | |
238 | (map { ($_, $_) } 0x40..0xbf), | |
239 | 0x00..0x2f, | |
240 | (map { ($_, $_) } 0xf0..0xff), | |
241 | ), | |
242 | "/csd <"; | |
243 | is $c, 0xe0, "/csd < count"; | |
244 | ||
245 | ||
246 | # length(replacement) == 0 | |
247 | # with some >= 0x100 utf8 chars in the string to be modified | |
248 | ||
249 | $s = $all255_plus; | |
250 | $c = $s =~ tr/\x40-\xbf//c; | |
251 | is $s, $all255_plus, "/c ==0U"; | |
252 | is $c, 0xa0, "/c ==0U count"; | |
253 | ||
254 | $s = $all255_plus; | |
255 | $c = $s =~ tr/\x40-\xbf//cd; | |
256 | is $s, join('', map chr, 0x40..0xbf), "/cd ==0U"; | |
257 | is $c, 0xa0, "/cd ==0U count"; | |
258 | ||
259 | $s = $all255_twice_plus; | |
260 | $c = $s =~ tr/\x40-\xbf//cs; | |
261 | is $s, join('', map chr, | |
262 | 0x00..0x3f, | |
263 | (map { ($_, $_) } 0x40..0xbf), | |
264 | 0xc0..0x11f, | |
265 | ), | |
266 | "/cs ==0U"; | |
267 | is $c, 0x140, "/cs ==0U count"; | |
268 | ||
269 | $s = $all255_twice_plus; | |
270 | $c = $s =~ tr/\x40-\xbf//csd; | |
271 | is $s, join('', map chr, (map { ($_, $_) } 0x40..0xbf)), "/csd ==0U"; | |
272 | is $c, 0x140, "/csd ==0U count"; | |
273 | ||
274 | # length(search) > length(replacement) | |
275 | # with some >= 0x100 utf8 chars in the string to be modified | |
276 | ||
277 | $s = $all255_plus; | |
278 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/c; | |
279 | is $s, join('', map chr, | |
280 | 0x80..0xbf, | |
281 | 0x40..0xbf, | |
282 | 0x00..0x2f, | |
283 | ((0x2f) x 48), | |
284 | ), | |
285 | "/c >U"; | |
286 | is $c, 0xa0, "/c >U count"; | |
287 | ||
288 | $s = $all255_plus; | |
289 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cd; | |
290 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f), | |
291 | "/cd >U"; | |
292 | is $c, 0xa0, "/cd >U count"; | |
293 | ||
294 | $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}"; | |
295 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cs; | |
296 | is $s, join('', map chr, | |
297 | 0x80..0xbf, | |
298 | (map { ($_, $_) } 0x40..0xbf), | |
299 | 0x00..0x2f, | |
300 | 0xbf, | |
301 | 0x2f, | |
302 | ), | |
303 | "/cs >U"; | |
304 | is $c, 0x144, "/cs >U count"; | |
305 | ||
306 | $s = $all255_twice_plus; | |
307 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/csd; | |
308 | is $s, join('', map chr, | |
309 | 0x80..0xbf, | |
310 | (map { ($_, $_) } 0x40..0xbf), | |
311 | 0x00..0x2f, | |
312 | ), | |
313 | "/csd >U"; | |
314 | is $c, 0x140, "/csd >U count"; | |
315 | ||
316 | # length(search) == length(replacement) | |
317 | # with some >= 0x100 utf8 chars in the string to be modified | |
318 | ||
319 | $s = $all255_plus; | |
320 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/c; | |
321 | is $s, join('', map chr, | |
322 | 0x80..0xbf, | |
323 | 0x40..0xbf, | |
324 | 0x00..0x3f, | |
325 | ((0x3f) x 32), | |
326 | ), | |
327 | "/c ==U"; | |
328 | is $c, 0xa0, "/c ==U count"; | |
329 | ||
330 | $s = $all255_plus; | |
331 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cd; | |
332 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/cd ==U"; | |
333 | is $c, 0xa0, "/cd ==U count"; | |
334 | ||
335 | $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}"; | |
336 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cs; | |
337 | is $s, join('', map chr, | |
338 | 0x80..0xbf, | |
339 | (map { ($_, $_) } 0x40..0xbf), | |
340 | 0x00..0x3f, | |
341 | 0xbf, | |
342 | 0x3f, | |
343 | ), | |
344 | "/cs ==U"; | |
345 | is $c, 0x144, "/cs ==U count"; | |
346 | ||
347 | $s = $all255_twice_plus; | |
348 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/csd; | |
349 | is $s, join('', map chr, | |
350 | 0x80..0xbf, | |
351 | (map { ($_, $_) } 0x40..0xbf), | |
352 | 0x00..0x3f, | |
353 | ), | |
354 | "/csd ==U"; | |
355 | is $c, 0x140, "/csd ==U count"; | |
356 | ||
357 | ||
358 | # length(search) == length(replacement) - 1 | |
359 | # with some >= 0x100 utf8 chars in the string to be modified | |
360 | ||
361 | $s = $all255_plus; | |
362 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/c; | |
363 | is $s, join('', map chr, | |
364 | 0x80..0xbf, | |
365 | 0x40..0xbf, | |
366 | 0x00..0x40, | |
367 | ((0x40) x 31), | |
368 | ), | |
369 | "/c =-U"; | |
370 | is $c, 0xa0, "/c =-U count"; | |
371 | ||
372 | $s = $all255_plus; | |
373 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/cd; | |
374 | is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x40), "/cd =-U"; | |
375 | is $c, 0xa0, "/cd =-U count"; | |
376 | ||
377 | $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}"; | |
378 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/cs; | |
379 | is $s, join('', map chr, | |
380 | 0x80..0xbf, | |
381 | (map { ($_, $_) } 0x40..0xbf), | |
382 | 0x00..0x40, | |
383 | 0xbf, | |
384 | 0x40, | |
385 | ), | |
386 | "/cs =-U"; | |
387 | is $c, 0x144, "/cs =-U count"; | |
388 | ||
389 | $s = $all255_twice_plus; | |
390 | $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/csd; | |
f146bab6 | 391 | is $s, join('', map chr, |
33252809 DM |
392 | 0x80..0xbf, |
393 | (map { ($_, $_) } 0x40..0xbf), | |
394 | 0x00..0x40, | |
395 | ), | |
396 | "/csd =-U"; | |
33252809 DM |
397 | is $c, 0x140, "/csd =-U count"; |
398 | ||
399 | ||
400 | ||
401 | # length(search) < length(replacement), | |
402 | # with some >= 0x100 utf8 chars in the string to be modified | |
403 | ||
404 | $s = $all255_plus; | |
405 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/c; | |
406 | is $s, join('', map chr, | |
407 | 0x80..0xbf, | |
408 | 0x40..0xbf, | |
409 | 0x00..0x2f, | |
410 | 0xf0..0xff, | |
411 | 0x30..0x3f, | |
412 | ((0x3f)x 16), | |
413 | ), | |
414 | "/c <U"; | |
415 | is $c, 0x90, "/c <U count"; | |
416 | ||
417 | $s = $all255_plus; | |
418 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cd; | |
f146bab6 | 419 | is $s, join('', map chr, |
33252809 DM |
420 | 0x80..0xbf, |
421 | 0x40..0xbf, | |
422 | 0x00..0x2f, | |
423 | 0xf0..0xff, | |
424 | 0x30..0x3f, | |
425 | ), | |
426 | "/cd <U"; | |
33252809 DM |
427 | is $c, 0x90, "/cd <U count"; |
428 | ||
429 | $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}"; | |
430 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cs; | |
431 | is $s, join('', map chr, | |
432 | 0x80..0xbf, | |
433 | (map { ($_, $_) } 0x40..0xbf), | |
434 | 0x00..0x2f, | |
435 | (map { ($_, $_) } 0xf0..0xff), | |
436 | 0x30..0x3f, | |
437 | 0xbf, | |
438 | 0x3f, | |
439 | ), | |
440 | "/cs <U"; | |
441 | is $c, 0x124, "/cs <U count"; | |
442 | ||
443 | $s = $all255_twice_plus; | |
444 | $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/csd; | |
f146bab6 | 445 | is $s, join('', map chr, 0x80..0xbf, |
33252809 DM |
446 | (map { ($_, $_) } 0x40..0xbf), |
447 | 0x00..0x2f, | |
448 | (map { ($_, $_) } 0xf0..0xff), | |
449 | 0x30..0x3f, | |
450 | ), | |
451 | "/csd <U"; | |
33252809 | 452 | is $c, 0x120, "/csd <U count"; |
79f0ed31 KW |
453 | |
454 | if ($::IS_EBCDIC) { | |
455 | skip "Not valid only for EBCDIC", 4; | |
456 | } | |
457 | $s = $all255_twice; | |
79f0ed31 | 458 | $c = $s =~ tr/[](){}<>\x00-\xff/[[(({{<</sd; |
a68ea463 | 459 | is $s, "(<[{", 'tr/[](){}<>\x00-\xff/[[(({{<</sd'; |
79f0ed31 KW |
460 | is $c, 512, "count of above"; |
461 | ||
462 | $s = $all255_plus; | |
463 | $c = $s =~ tr/[](){}<>\x00-\xff/[[(({{<</sd; | |
a68ea463 | 464 | is $s, "(<[{" . $plus, 'tr/[](){}<>\x00-\xff/[[(({{<</sd'; |
79f0ed31 | 465 | is $c, 256, "count of above"; |
33252809 DM |
466 | } |
467 | ||
6d63cc8e DM |
468 | { |
469 | # RT #132608 | |
470 | # the 'extra length' for tr///c was stored as a short, so if the | |
471 | # replacement string had more than 0x7fff chars not paired with | |
472 | # search chars, bad things could happen | |
473 | ||
474 | my ($c, $e, $s); | |
475 | ||
476 | $s = "\x{9000}\x{9001}\x{9002}"; | |
477 | $e = "\$c = \$s =~ tr/\\x00-\\xff/" | |
478 | . ("ABCDEFGHIJKLMNO" x (0xa000 / 15)) | |
479 | . "/c; 1; "; | |
480 | eval $e or die $@; | |
481 | is $s, "IJK", "RT #132608 len=0xa000"; | |
482 | is $c, 3, "RT #132608 len=0xa000 count"; | |
483 | ||
484 | $s = "\x{9003}\x{9004}\x{9005}"; | |
485 | $e = "\$c = \$s =~ tr/\\x00-\\xff/" | |
486 | . ("ABCDEFGHIJKLMNO" x (0x12000 / 15)) | |
487 | . "/c; 1; "; | |
488 | eval $e or die $@; | |
489 | is $s, "LMN", "RT #132608 len=0x12000"; | |
490 | is $c, 3, "RT #132608 len=0x12000 count"; | |
491 | } | |
492 | ||
33252809 | 493 | |
f4240379 KW |
494 | SKIP: { # Test literal range end point special handling |
495 | unless ($::IS_EBCDIC) { | |
496 | skip "Valid only for EBCDIC", 24; | |
497 | } | |
498 | ||
499 | $_ = "\x89"; # is 'i' | |
500 | tr/i-j//d; | |
501 | is($_, "", '"\x89" should match [i-j]'); | |
502 | $_ = "\x8A"; | |
503 | tr/i-j//d; | |
504 | is($_, "\x8A", '"\x8A" shouldnt match [i-j]'); | |
505 | $_ = "\x90"; | |
506 | tr/i-j//d; | |
507 | is($_, "\x90", '"\x90" shouldnt match [i-j]'); | |
508 | $_ = "\x91"; # is 'j' | |
509 | tr/i-j//d; | |
510 | is($_, "", '"\x91" should match [i-j]'); | |
511 | ||
512 | $_ = "\x89"; | |
513 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
514 | is($_, "", '"\x89" should match [i-\N{LATIN SMALL LETTER J}]'); | |
515 | $_ = "\x8A"; | |
516 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
517 | is($_, "\x8A", '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); | |
518 | $_ = "\x90"; | |
519 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
520 | is($_, "\x90", '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); | |
521 | $_ = "\x91"; | |
522 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
523 | is($_, "", '"\x91" should match [i-\N{LATIN SMALL LETTER J}]'); | |
524 | ||
525 | $_ = "\x89"; | |
526 | tr/i-\N{U+6A}//d; | |
527 | is($_, "", '"\x89" should match [i-\N{U+6A}]'); | |
528 | $_ = "\x8A"; | |
529 | tr/i-\N{U+6A}//d; | |
530 | is($_, "\x8A", '"\x8A" shouldnt match [i-\N{U+6A}]'); | |
531 | $_ = "\x90"; | |
532 | tr/i-\N{U+6A}//d; | |
533 | is($_, "\x90", '"\x90" shouldnt match [i-\N{U+6A}]'); | |
534 | $_ = "\x91"; | |
535 | tr/i-\N{U+6A}//d; | |
536 | is($_, "", '"\x91" should match [i-\N{U+6A}]'); | |
537 | ||
538 | $_ = "\x89"; | |
539 | tr/\N{U+69}-\N{U+6A}//d; | |
540 | is($_, "", '"\x89" should match [\N{U+69}-\N{U+6A}]'); | |
541 | $_ = "\x8A"; | |
542 | tr/\N{U+69}-\N{U+6A}//d; | |
543 | is($_, "\x8A", '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]'); | |
544 | $_ = "\x90"; | |
545 | tr/\N{U+69}-\N{U+6A}//d; | |
546 | is($_, "\x90", '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]'); | |
547 | $_ = "\x91"; | |
548 | tr/\N{U+69}-\N{U+6A}//d; | |
549 | is($_, "", '"\x91" should match [\N{U+69}-\N{U+6A}]'); | |
550 | ||
551 | $_ = "\x89"; | |
552 | tr/i-\x{91}//d; | |
553 | is($_, "", '"\x89" should match [i-\x{91}]'); | |
554 | $_ = "\x8A"; | |
555 | tr/i-\x{91}//d; | |
556 | is($_, "", '"\x8A" should match [i-\x{91}]'); | |
557 | $_ = "\x90"; | |
558 | tr/i-\x{91}//d; | |
559 | is($_, "", '"\x90" should match [i-\x{91}]'); | |
560 | $_ = "\x91"; | |
561 | tr/i-\x{91}//d; | |
562 | is($_, "", '"\x91" should match [i-\x{91}]'); | |
563 | ||
564 | # Need to use eval, because tries to compile on ASCII platforms even | |
565 | # though the tests are skipped, and fails because 0x89-j is an illegal | |
566 | # range there. | |
567 | $_ = "\x89"; | |
568 | eval 'tr/\x{89}-j//d'; | |
569 | is($_, "", '"\x89" should match [\x{89}-j]'); | |
570 | $_ = "\x8A"; | |
571 | eval 'tr/\x{89}-j//d'; | |
572 | is($_, "", '"\x8A" should match [\x{89}-j]'); | |
573 | $_ = "\x90"; | |
574 | eval 'tr/\x{89}-j//d'; | |
575 | is($_, "", '"\x90" should match [\x{89}-j]'); | |
576 | $_ = "\x91"; | |
577 | eval 'tr/\x{89}-j//d'; | |
578 | is($_, "", '"\x91" should match [\x{89}-j]'); | |
579 | } | |
580 | ||
c8e3bb4c GS |
581 | |
582 | # In EBCDIC 'I' is \xc9 and 'J' is \0xd1, 'i' is \x89 and 'j' is \x91. | |
583 | # Yes, discontinuities. Regardless, the \xca in the below should stay | |
584 | # untouched (and not became \x8a). | |
5e037136 | 585 | { |
5e037136 | 586 | $_ = "I\xcaJ"; |
c8e3bb4c | 587 | |
5e037136 | 588 | tr/I-J/i-j/; |
c8e3bb4c | 589 | |
ff36f15d | 590 | is($_, "i\xcaj", 'EBCDIC discontinuity'); |
5e037136 | 591 | } |
c8e3bb4c | 592 | # |
2de7b02f | 593 | |
2de7b02f GS |
594 | ($x = 12) =~ tr/1/3/; |
595 | (my $y = 12) =~ tr/1/3/; | |
596 | ($f = 1.5) =~ tr/1/3/; | |
597 | (my $g = 1.5) =~ tr/1/3/; | |
953ab6e5 MS |
598 | is($x + $y + $f + $g, 71, 'tr cancels IOK and NOK'); |
599 | ||
bb16bae8 FC |
600 | # /r |
601 | $_ = 'adam'; | |
602 | is y/dam/ve/rd, 'eve', '/r'; | |
603 | is $_, 'adam', '/r leaves param alone'; | |
604 | $g = 'ruby'; | |
605 | is $g =~ y/bury/repl/r, 'perl', '/r with explicit param'; | |
606 | is $g, 'ruby', '/r leaves explicit param alone'; | |
607 | is "aaa" =~ y\a\b\r, 'bbb', '/r with constant param'; | |
608 | ok !eval '$_ !~ y///r', "!~ y///r is forbidden"; | |
609 | like $@, qr\^Using !~ with tr///r doesn't make sense\, | |
610 | "!~ y///r error message"; | |
611 | { | |
612 | my $w; | |
613 | my $wc; | |
614 | local $SIG{__WARN__} = sub { $w = shift; ++$wc }; | |
615 | local $^W = 1; | |
616 | eval 'y///r; 1'; | |
617 | like $w, qr '^Useless use of non-destructive transliteration \(tr///r\)', | |
618 | '/r warns in void context'; | |
619 | is $wc, 1, '/r warns just once'; | |
620 | } | |
2de7b02f | 621 | |
ee95e30c | 622 | # perlbug [ID 20000511.005 (#3237)] |
2de7b02f GS |
623 | $_ = 'fred'; |
624 | /([a-z]{2})/; | |
625 | $1 =~ tr/A-Z//; | |
626 | s/^(\s*)f/$1F/; | |
953ab6e5 MS |
627 | is($_, 'Fred', 'harmless if explicitly not updating'); |
628 | ||
629 | ||
630 | # A variant of the above, added in 5.7.2 | |
631 | $_ = 'fred'; | |
632 | /([a-z]{2})/; | |
633 | eval '$1 =~ tr/A-Z/A-Z/;'; | |
634 | s/^(\s*)f/$1F/; | |
635 | is($_, 'Fred', 'harmless if implicitly not updating'); | |
636 | is($@, '', ' no error'); | |
637 | ||
2de7b02f GS |
638 | |
639 | # check tr handles UTF8 correctly | |
640 | ($x = 256.65.258) =~ tr/a/b/; | |
953ab6e5 MS |
641 | is($x, 256.65.258, 'handles UTF8'); |
642 | is(length $x, 3); | |
643 | ||
2de7b02f | 644 | $x =~ tr/A/B/; |
953ab6e5 | 645 | is(length $x, 3); |
83bcbc61 | 646 | if ($::IS_ASCII) { # ASCII |
953ab6e5 | 647 | is($x, 256.66.258); |
67a17885 PP |
648 | } |
649 | else { | |
953ab6e5 | 650 | is($x, 256.65.258); |
67a17885 | 651 | } |
953ab6e5 | 652 | |
cbe7f703 PP |
653 | # EBCDIC variants of the above tests |
654 | ($x = 256.193.258) =~ tr/a/b/; | |
953ab6e5 MS |
655 | is(length $x, 3); |
656 | is($x, 256.193.258); | |
657 | ||
cbe7f703 | 658 | $x =~ tr/A/B/; |
953ab6e5 | 659 | is(length $x, 3); |
83bcbc61 | 660 | if ($::IS_ASCII) { # ASCII |
953ab6e5 | 661 | is($x, 256.193.258); |
cbe7f703 PP |
662 | } |
663 | else { | |
953ab6e5 | 664 | is($x, 256.194.258); |
cbe7f703 | 665 | } |
953ab6e5 | 666 | |
036b4402 | 667 | |
79f0ed31 | 668 | start: |
036b4402 | 669 | { |
953ab6e5 MS |
670 | my $l = chr(300); my $r = chr(400); |
671 | $x = 200.300.400; | |
672 | $x =~ tr/\x{12c}/\x{190}/; | |
673 | is($x, 200.400.400, | |
674 | 'changing UTF8 chars in a UTF8 string, same length'); | |
675 | is(length $x, 3); | |
676 | ||
677 | $x = 200.300.400; | |
678 | $x =~ tr/\x{12c}/\x{be8}/; | |
679 | is($x, 200.3048.400, ' more bytes'); | |
680 | is(length $x, 3); | |
681 | ||
682 | $x = 100.125.60; | |
683 | $x =~ tr/\x{64}/\x{190}/; | |
684 | is($x, 400.125.60, 'Putting UT8 chars into a non-UTF8 string'); | |
685 | is(length $x, 3); | |
686 | ||
687 | $x = 400.125.60; | |
688 | $x =~ tr/\x{190}/\x{64}/; | |
689 | is($x, 100.125.60, 'Removing UTF8 chars from UTF8 string'); | |
690 | is(length $x, 3); | |
691 | ||
692 | $x = 400.125.60.400; | |
693 | $y = $x =~ tr/\x{190}/\x{190}/; | |
694 | is($y, 2, 'Counting UTF8 chars in UTF8 string'); | |
695 | ||
696 | $x = 60.400.125.60.400; | |
697 | $y = $x =~ tr/\x{3c}/\x{3c}/; | |
698 | is($y, 2, ' non-UTF8 chars in UTF8 string'); | |
699 | ||
700 | # 17 - counting UTF8 chars in non-UTF8 string | |
701 | $x = 200.125.60; | |
702 | $y = $x =~ tr/\x{190}/\x{190}/; | |
703 | is($y, 0, ' UTF8 chars in non-UTFs string'); | |
036b4402 | 704 | } |
c2e66d9e | 705 | |
c2e66d9e | 706 | $_ = "abcdefghijklmnopqrstuvwxyz"; |
953ab6e5 MS |
707 | eval 'tr/a-z-9/ /'; |
708 | like($@, qr/^Ambiguous range in transliteration operator/, 'tr/a-z-9//'); | |
c2e66d9e | 709 | |
cbe7f703 | 710 | # 19-21: Make sure leading and trailing hyphens still work |
c2e66d9e GS |
711 | $_ = "car-rot9"; |
712 | tr/-a-m/./; | |
953ab6e5 | 713 | is($_, '..r.rot9', 'hyphens, leading'); |
c2e66d9e GS |
714 | |
715 | $_ = "car-rot9"; | |
716 | tr/a-m-/./; | |
953ab6e5 | 717 | is($_, '..r.rot9', ' trailing'); |
c2e66d9e GS |
718 | |
719 | $_ = "car-rot9"; | |
720 | tr/-a-m-/./; | |
953ab6e5 | 721 | is($_, '..r.rot9', ' both'); |
c2e66d9e GS |
722 | |
723 | $_ = "abcdefghijklmnop"; | |
724 | tr/ae-hn/./; | |
953ab6e5 | 725 | is($_, '.bcd....ijklm.op'); |
c2e66d9e GS |
726 | |
727 | $_ = "abcdefghijklmnop"; | |
728 | tr/a-cf-kn-p/./; | |
953ab6e5 | 729 | is($_, '...de......lm...'); |
c2e66d9e GS |
730 | |
731 | $_ = "abcdefghijklmnop"; | |
732 | tr/a-ceg-ikm-o/./; | |
953ab6e5 MS |
733 | is($_, '...d.f...j.l...p'); |
734 | ||
c2e66d9e | 735 | |
c2e66d9e GS |
736 | # 20000705 MJD |
737 | eval "tr/m-d/ /"; | |
321ecc04 | 738 | like($@, qr/^Invalid range "m-d" in transliteration operator/, |
953ab6e5 | 739 | 'reversed range check'); |
c2e66d9e | 740 | |
d897a58d | 741 | 'abcdef' =~ /(bcd)/; |
953ab6e5 MS |
742 | is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count'); |
743 | is($@, '', ' no error'); | |
d897a58d | 744 | |
953ab6e5 MS |
745 | 'abcdef' =~ /(bcd)/; |
746 | is(eval '$1 =~ tr/abcd/abcd/', 3, 'implicit read-only count'); | |
747 | is($@, '', ' no error'); | |
748 | ||
749 | is(eval '"123" =~ tr/12//', 2, 'LHS of non-updating tr'); | |
d897a58d | 750 | |
94bfe852 | 751 | eval '"123" =~ tr/1/2/'; |
953ab6e5 MS |
752 | like($@, qr|^Can't modify constant item in transliteration \(tr///\)|, |
753 | 'LHS bad on updating tr'); | |
754 | ||
d897a58d | 755 | |
381d18bc JH |
756 | # v300 (0x12c) is UTF-8-encoded as 196 172 (0xc4 0xac) |
757 | # v400 (0x190) is UTF-8-encoded as 198 144 (0xc6 0x90) | |
758 | ||
759 | # Transliterate a byte to a byte, all four ways. | |
760 | ||
761 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/; | |
953ab6e5 | 762 | is($a, v300.197.172.300.197.172, 'byte2byte transliteration'); |
381d18bc JH |
763 | |
764 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{c5}/; | |
953ab6e5 | 765 | is($a, v300.197.172.300.197.172); |
381d18bc JH |
766 | |
767 | ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\xc5/; | |
953ab6e5 | 768 | is($a, v300.197.172.300.197.172); |
381d18bc JH |
769 | |
770 | ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\x{c5}/; | |
953ab6e5 | 771 | is($a, v300.197.172.300.197.172); |
381d18bc | 772 | |
381d18bc JH |
773 | |
774 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/; | |
953ab6e5 | 775 | is($a, v300.301.172.300.301.172, 'byte2wide transliteration'); |
381d18bc JH |
776 | |
777 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc3/; | |
953ab6e5 | 778 | is($a, v195.196.172.195.196.172, ' wide2byte'); |
381d18bc JH |
779 | |
780 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/; | |
953ab6e5 | 781 | is($a, v301.196.172.301.196.172, ' wide2wide'); |
381d18bc | 782 | |
381d18bc JH |
783 | |
784 | ($a = v300.196.172.300.196.172) =~ tr/\xc4\x{12c}/\x{12d}\xc3/; | |
953ab6e5 | 785 | is($a, v195.301.172.195.301.172, 'byte2wide & wide2byte'); |
381d18bc | 786 | |
381d18bc JH |
787 | |
788 | ($a = v300.196.172.300.196.172.400.198.144) =~ | |
789 | tr/\xac\xc4\x{12c}\x{190}/\xad\x{12d}\xc5\x{191}/; | |
953ab6e5 | 790 | is($a, v197.301.173.197.301.173.401.198.144, 'all together now!'); |
381d18bc | 791 | |
381d18bc | 792 | |
953ab6e5 MS |
793 | is((($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/), 2, |
794 | 'transliterate and count'); | |
381d18bc | 795 | |
953ab6e5 | 796 | is((($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/), 2); |
381d18bc | 797 | |
381d18bc JH |
798 | |
799 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/c; | |
953ab6e5 | 800 | is($a, v301.196.301.301.196.301, 'translit w/complement'); |
381d18bc JH |
801 | |
802 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc5/c; | |
79f0ed31 | 803 | is($a, v300.197.197.300.197.197, 'more translit w/complement'); |
381d18bc | 804 | |
381d18bc JH |
805 | |
806 | ($a = v300.196.172.300.196.172) =~ tr/\xc4//d; | |
953ab6e5 | 807 | is($a, v300.172.300.172, 'translit w/deletion'); |
381d18bc JH |
808 | |
809 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}//d; | |
953ab6e5 | 810 | is($a, v196.172.196.172); |
381d18bc | 811 | |
381d18bc JH |
812 | |
813 | ($a = v196.196.172.300.300.196.172) =~ tr/\xc4/\xc5/s; | |
953ab6e5 | 814 | is($a, v197.172.300.300.197.172, 'translit w/squeeze'); |
381d18bc JH |
815 | |
816 | ($a = v196.172.300.300.196.172.172) =~ tr/\x{12c}/\x{12d}/s; | |
953ab6e5 | 817 | is($a, v196.172.301.196.172.172); |
381d18bc | 818 | |
a1874b66 | 819 | |
953ab6e5 | 820 | # Tricky cases (When Simon Cozens Attacks) |
a1874b66 | 821 | ($a = v196.172.200) =~ tr/\x{12c}/a/; |
953ab6e5 | 822 | is(sprintf("%vd", $a), '196.172.200'); |
a1874b66 JH |
823 | |
824 | ($a = v196.172.200) =~ tr/\x{12c}/\x{12c}/; | |
953ab6e5 | 825 | is(sprintf("%vd", $a), '196.172.200'); |
a1874b66 JH |
826 | |
827 | ($a = v196.172.200) =~ tr/\x{12c}//d; | |
953ab6e5 MS |
828 | is(sprintf("%vd", $a), '196.172.200'); |
829 | ||
a1874b66 | 830 | |
8973db79 | 831 | # UTF8 range tests from Inaba Hiroto |
f9a63242 JH |
832 | |
833 | ($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/; | |
953ab6e5 | 834 | is($a, v192.196.172.194.197.172, 'UTF range'); |
f9a63242 JH |
835 | |
836 | ($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/; | |
953ab6e5 MS |
837 | is($a, v300.300.172.302.301.172); |
838 | ||
8973db79 JH |
839 | |
840 | # UTF8 range tests from Karsten Sperling (patch #9008 required) | |
841 | ||
842 | ($a = "\x{0100}") =~ tr/\x00-\x{100}/X/; | |
953ab6e5 | 843 | is($a, "X"); |
8973db79 JH |
844 | |
845 | ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c; | |
953ab6e5 | 846 | is($a, "X"); |
8973db79 JH |
847 | |
848 | ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; | |
953ab6e5 | 849 | is($a, "X"); |
8973db79 JH |
850 | |
851 | ($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; | |
953ab6e5 MS |
852 | is($a, "X"); |
853 | ||
8973db79 | 854 | |
94472101 JH |
855 | # UTF8 range tests from Inaba Hiroto |
856 | ||
857 | ($a = "\x{200}") =~ tr/\x00-\x{100}/X/c; | |
953ab6e5 | 858 | is($a, "X"); |
94472101 JH |
859 | |
860 | ($a = "\x{200}") =~ tr/\x00-\x{100}/X/cs; | |
953ab6e5 MS |
861 | is($a, "X"); |
862 | ||
685b06b5 KW |
863 | # Tricky on EBCDIC: while [a-z] [A-Z] must not match the gap characters (as |
864 | # well as i-j, r-s, I-J, R-S), [\x89-\x91] [\xc9-\xd1] has to match them, | |
6b6bd37b JH |
865 | # from Karsten Sperling. |
866 | ||
867 | $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/; | |
953ab6e5 MS |
868 | is($c, 8); |
869 | is($a, "XXXXXXXX"); | |
4c3a8340 | 870 | |
6b6bd37b | 871 | $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/; |
953ab6e5 MS |
872 | is($c, 8); |
873 | is($a, "XXXXXXXX"); | |
6b6bd37b | 874 | |
4c3a8340 | 875 | SKIP: { |
f4240379 | 876 | skip "EBCDIC-centric tests", 4 unless $::IS_EBCDIC; |
953ab6e5 MS |
877 | |
878 | $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/; | |
879 | is($c, 2); | |
880 | is($a, "X\x8a\x8b\x8c\x8d\x8f\x90X"); | |
881 | ||
882 | $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/; | |
883 | is($c, 2); | |
884 | is($a, "X\xca\xcb\xcc\xcd\xcf\xd0X"); | |
6b6bd37b | 885 | } |
1ed601ec JH |
886 | |
887 | ($a = "\x{100}") =~ tr/\x00-\xff/X/c; | |
953ab6e5 | 888 | is(ord($a), ord("X")); |
1ed601ec JH |
889 | |
890 | ($a = "\x{100}") =~ tr/\x00-\xff/X/cs; | |
953ab6e5 | 891 | is(ord($a), ord("X")); |
1ed601ec JH |
892 | |
893 | ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//c; | |
953ab6e5 | 894 | is($a, "\x{100}\x{100}"); |
1ed601ec JH |
895 | |
896 | ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//cs; | |
953ab6e5 | 897 | is($a, "\x{100}"); |
1ed601ec | 898 | |
629b4584 | 899 | $a = "\xfe\xff"; $a =~ tr/\xfe\xff/\x{1ff}\x{1fe}/; |
953ab6e5 MS |
900 | is($a, "\x{1ff}\x{1fe}"); |
901 | ||
76ef7183 JH |
902 | |
903 | # From David Dyck | |
904 | ($a = "R0_001") =~ tr/R_//d; | |
953ab6e5 | 905 | is(hex($a), 1); |
76ef7183 | 906 | |
800b4dc4 JH |
907 | # From Inaba Hiroto |
908 | @a = (1,2); map { y/1/./ for $_ } @a; | |
953ab6e5 | 909 | is("@a", ". 2"); |
800b4dc4 JH |
910 | |
911 | @a = (1,2); map { y/1/./ for $_.'' } @a; | |
953ab6e5 MS |
912 | is("@a", "1 2"); |
913 | ||
800b4dc4 | 914 | |
bec89253 RH |
915 | # Additional test for Inaba Hiroto patch (robin@kitsite.com) |
916 | ($a = "\x{100}\x{102}\x{101}") =~ tr/\x00-\377/XYZ/c; | |
953ab6e5 MS |
917 | is($a, "XZY"); |
918 | ||
bec89253 | 919 | |
2233f375 NC |
920 | # Used to fail with "Modification of a read-only value attempted" |
921 | %a = (N=>1); | |
922 | foreach (keys %a) { | |
953ab6e5 MS |
923 | eval 'tr/N/n/'; |
924 | is($_, 'n', 'pp_trans needs to unshare shared hash keys'); | |
925 | is($@, '', ' no error'); | |
2233f375 | 926 | } |
94bfe852 | 927 | |
953ab6e5 | 928 | |
94bfe852 | 929 | $x = eval '"1213" =~ tr/1/1/'; |
953ab6e5 MS |
930 | is($x, 2, 'implicit count on constant'); |
931 | is($@, '', ' no error'); | |
932 | ||
933 | ||
934 | my @foo = (); | |
935 | eval '$foo[-1] =~ tr/N/N/'; | |
936 | is( $@, '', 'implicit count outside array bounds, index negative' ); | |
937 | is( scalar @foo, 0, " doesn't extend the array"); | |
938 | ||
939 | eval '$foo[1] =~ tr/N/N/'; | |
940 | is( $@, '', 'implicit count outside array bounds, index positive' ); | |
941 | is( scalar @foo, 0, " doesn't extend the array"); | |
942 | ||
943 | ||
944 | my %foo = (); | |
945 | eval '$foo{bar} =~ tr/N/N/'; | |
946 | is( $@, '', 'implicit count outside hash bounds' ); | |
947 | is( scalar keys %foo, 0, " doesn't extend the hash"); | |
d59e14db RGS |
948 | |
949 | $x = \"foo"; | |
950 | is( $x =~ tr/A/A/, 2, 'non-modifying tr/// on a scalar ref' ); | |
951 | is( ref $x, 'SCALAR', " doesn't stringify its argument" ); | |
0d65d7d5 MS |
952 | |
953 | # rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing | |
954 | # newline allowed. | |
2a91eb11 | 955 | fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '', {}, 'RT #36622 y/// at end of file'); |
9f7f3913 TS |
956 | |
957 | ||
958 | { # [perl #38293] chr(65535) should be allowed in regexes | |
959 | no warnings 'utf8'; # to allow non-characters | |
960 | ||
961 | $s = "\x{d800}\x{ffff}"; | |
962 | $s =~ tr/\0/A/; | |
963 | is($s, "\x{d800}\x{ffff}", "do_trans_simple"); | |
964 | ||
965 | $s = "\x{d800}\x{ffff}"; | |
966 | $i = $s =~ tr/\0//; | |
967 | is($i, 0, "do_trans_count"); | |
968 | ||
969 | $s = "\x{d800}\x{ffff}"; | |
970 | $s =~ tr/\0/A/s; | |
971 | is($s, "\x{d800}\x{ffff}", "do_trans_complex, SQUASH"); | |
972 | ||
973 | $s = "\x{d800}\x{ffff}"; | |
974 | $s =~ tr/\0/A/c; | |
975 | is($s, "AA", "do_trans_complex, COMPLEMENT"); | |
976 | ||
977 | $s = "A\x{ffff}B"; | |
978 | $s =~ tr/\x{ffff}/\x{1ffff}/; | |
979 | is($s, "A\x{1ffff}B", "utf8, SEARCHLIST"); | |
980 | ||
981 | $s = "\x{fffd}\x{fffe}\x{ffff}"; | |
982 | $s =~ tr/\x{fffd}-\x{ffff}/ABC/; | |
983 | is($s, "ABC", "utf8, SEARCHLIST range"); | |
984 | ||
985 | $s = "ABC"; | |
986 | $s =~ tr/ABC/\x{ffff}/; | |
987 | is($s, "\x{ffff}"x3, "utf8, REPLACEMENTLIST"); | |
988 | ||
989 | $s = "ABC"; | |
990 | $s =~ tr/ABC/\x{fffd}-\x{ffff}/; | |
991 | is($s, "\x{fffd}\x{fffe}\x{ffff}", "utf8, REPLACEMENTLIST range"); | |
992 | ||
79f0ed31 | 993 | $s = "A\x{ffff}B\x{100}\0\x{fffe}\x{ffff}"; $i = $s =~ tr/\x{ffff}//; |
9f7f3913 TS |
994 | is($i, 2, "utf8, count"); |
995 | ||
996 | $s = "A\x{ffff}\x{ffff}C"; | |
997 | $s =~ tr/\x{ffff}/\x{100}/s; | |
998 | is($s, "A\x{100}C", "utf8, SQUASH"); | |
999 | ||
1000 | $s = "A\x{ffff}\x{ffff}\x{fffe}\x{fffe}\x{fffe}C"; | |
1001 | $s =~ tr/\x{fffe}\x{ffff}//s; | |
1002 | is($s, "A\x{ffff}\x{fffe}C", "utf8, SQUASH"); | |
1003 | ||
1004 | $s = "xAABBBy"; | |
1005 | $s =~ tr/AB/\x{ffff}/s; | |
1006 | is($s, "x\x{ffff}y", "utf8, SQUASH"); | |
1007 | ||
1008 | $s = "xAABBBy"; | |
1009 | $s =~ tr/AB/\x{fffe}\x{ffff}/s; | |
1010 | is($s, "x\x{fffe}\x{ffff}y", "utf8, SQUASH"); | |
1011 | ||
1012 | $s = "A\x{ffff}B\x{fffe}C"; | |
1013 | $s =~ tr/\x{fffe}\x{ffff}/x/c; | |
1014 | is($s, "x\x{ffff}x\x{fffe}x", "utf8, COMPLEMENT"); | |
1015 | ||
1016 | $s = "A\x{10000}B\x{2abcd}C"; | |
1017 | $s =~ tr/\0-\x{ffff}/x/c; | |
1018 | is($s, "AxBxC", "utf8, COMPLEMENT range"); | |
1019 | ||
1020 | $s = "A\x{fffe}B\x{ffff}C"; | |
1021 | $s =~ tr/\x{fffe}\x{ffff}/x/d; | |
1022 | is($s, "AxBC", "utf8, DELETE"); | |
1023 | ||
1024 | } # non-characters end | |
1025 | ||
1749ea0d TS |
1026 | { # related to [perl #27940] |
1027 | my $c; | |
1028 | ||
1029 | ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d; | |
1030 | is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d"); | |
1031 | ||
1032 | ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d; | |
1033 | is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d"); | |
1034 | } | |
1035 | ||
3e89ba19 | 1036 | SKIP: { |
e3918bb7 | 1037 | if (!eval { require XS::APItest }) { skip "no XS::APItest", 2 } |
33042aaf NC |
1038 | skip "with NODEFAULT_SHAREKEYS there are few COWs", 2 |
1039 | if $Config::Config{ccflags} =~ /-DNODEFAULT_SHAREKEYS\b/; | |
1040 | ||
1041 | ($s) = keys %{{pie => 3}}; | |
e3918bb7 | 1042 | my $wasro = XS::APItest::SvIsCOW($s); |
2203fb5a | 1043 | ok $wasro, "have a COW"; |
3788ef8f | 1044 | $s =~ tr/i//; |
e3918bb7 | 1045 | ok( XS::APItest::SvIsCOW($s), |
3e89ba19 | 1046 | "count-only tr doesn't deCOW COWs" ); |
3788ef8f | 1047 | } |
a5446a64 DM |
1048 | |
1049 | # [ RT #61520 ] | |
1050 | # | |
1051 | # under threads, unicode tr within a cloned closure would SEGV or assert | |
1052 | # fail, since the pointer in the pad to the swash was getting zeroed out | |
1053 | # in the proto-CV | |
1054 | ||
1055 | { | |
1056 | my $x = "\x{142}"; | |
1057 | sub { | |
1058 | $x =~ tr[\x{142}][\x{143}]; | |
1059 | }->(); | |
1060 | is($x,"\x{143}", "utf8 + closure"); | |
1061 | } | |
1062 | ||
9100eeb1 Z |
1063 | # Freeing of trans ops prior to pmtrans() [perl #102858]. |
1064 | eval q{ $a ~= tr/a/b/; }; | |
1065 | ok 1; | |
1066 | SKIP: { | |
55673181 | 1067 | no warnings "deprecated"; |
9100eeb1 Z |
1068 | skip "no encoding", 1 unless eval { require encoding; 1 }; |
1069 | eval q{ use encoding "utf8"; $a ~= tr/a/b/; }; | |
1070 | ok 1; | |
1071 | } | |
a5446a64 | 1072 | |
cb6d3474 KW |
1073 | { # [perl #113584] |
1074 | ||
1075 | my $x = "Perlα"; | |
1076 | $x =~ tr/αα/βγ/; | |
baacc348 | 1077 | { no warnings 'utf8'; print "# $x\n"; } # No note() to avoid wide warning. |
cb6d3474 KW |
1078 | is($x, "Perlβ", "Only first of multiple transliterations is used"); |
1079 | } | |
1080 | ||
d8b516a1 FC |
1081 | # tr/a/b/ should fail even on zero-length read-only strings |
1082 | use constant nullrocow => (keys%{{""=>undef}})[0]; | |
1083 | for ("", nullrocow) { | |
1084 | eval { $_ =~ y/a/b/ }; | |
1085 | like $@, qr/^Modification of a read-only value attempted at /, | |
1086 | 'tr/a/b/ fails on zero-length ro string'; | |
1087 | } | |
1088 | ||
a53bfdae HS |
1089 | # Whether they're permitted or not, non-modifying tr/// should not write |
1090 | # to read-only values, even with funky flags. | |
1091 | { # [perl #123759] | |
1092 | eval q{ ('a' =~ /./) =~ tr///d }; | |
1093 | ok(1, "tr///d on PL_Yes does not assert"); | |
1094 | eval q{ ('a' =~ /./) =~ tr/a-z/a-z/d }; | |
1095 | ok(1, "tr/a-z/a-z/d on PL_Yes does not assert"); | |
1096 | eval q{ ('a' =~ /./) =~ tr///s }; | |
1097 | ok(1, "tr///s on PL_Yes does not assert"); | |
1098 | eval q{ *x =~ tr///d }; | |
1099 | ok(1, "tr///d on glob does not assert"); | |
1100 | } | |
1101 | ||
3a34ca0b | 1102 | { # [perl #128734 |
ce4eed6e | 1103 | my $string = chr utf8::unicode_to_native(0x00e0); |
3a34ca0b KW |
1104 | $string =~ tr/\N{U+00e0}/A/; |
1105 | is($string, "A", 'tr// of \N{U+...} works for upper-Latin1'); | |
79f0ed31 | 1106 | $string = chr utf8::unicode_to_native(0x00e1); |
3a34ca0b KW |
1107 | $string =~ tr/\N{LATIN SMALL LETTER A WITH ACUTE}/A/; |
1108 | is($string, "A", 'tr// of \N{name} works for upper-Latin1'); | |
1109 | } | |
1110 | ||
2108cbcf DM |
1111 | # RT #130198 |
1112 | # a tr/// that is cho(m)ped, possibly with an array as arg | |
1113 | ||
1114 | { | |
1115 | use warnings; | |
1116 | ||
1117 | my ($s, @a); | |
1118 | ||
1119 | my $warn; | |
1120 | local $SIG{__WARN__ } = sub { $warn .= "@_" }; | |
1121 | ||
1122 | for my $c (qw(chop chomp)) { | |
1123 | for my $bind ('', '$s =~ ', '@a =~ ') { | |
1124 | for my $arg2 (qw(a b)) { | |
1125 | for my $r ('', 'r') { | |
1126 | $warn = ''; | |
1127 | # tr/a/b/ modifies its LHS, so if the LHS is an | |
1128 | # array, this should die. The special cases of tr/a/a/ | |
1129 | # and tr/a/b/r don't modify their LHS, so instead | |
1130 | # we croak because cho(m)p is trying to modify it. | |
1131 | # | |
1132 | my $exp = | |
1133 | ($r eq '' && $arg2 eq 'b' && $bind =~ /\@a/) | |
1134 | ? qr/Can't modify private array in transliteration/ | |
1135 | : qr{Can't modify transliteration \(tr///\) in $c}; | |
1136 | ||
1137 | my $expr = "$c(${bind}tr/a/$arg2/$r);"; | |
1138 | eval $expr; | |
1139 | like $@, $exp, "RT #130198 eval: $expr"; | |
1140 | ||
1141 | $exp = | |
1142 | $bind =~ /\@a/ | |
1143 | ? qr{^Applying transliteration \(tr///\) to \@a will act on scalar\(\@a\)} | |
1144 | : qr/^$/; | |
1145 | like $warn, $exp, "RT #130198 warn: $expr"; | |
1146 | } | |
1147 | } | |
1148 | } | |
1149 | } | |
1150 | ||
1151 | ||
1152 | } | |
1153 | ||
fe2ba0a2 KW |
1154 | { # [perl #130656] This bug happens when the tr is split across lines, so |
1155 | # that the first line causes it to go into UTF-8, and the 2nd is only | |
1156 | # things like \x | |
1157 | my $x = "\x{E235}"; | |
1158 | $x =~ tr | |
1159 | [\x{E234}-\x{E342}\x{E5B5}-\x{E5DF}] | |
1160 | [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}]; | |
1161 | ||
1162 | is $x, "\x{E5CE}", '[perl #130656]'; | |
ef65a74a KW |
1163 | |
1164 | } | |
1165 | ||
1166 | { | |
1167 | fresh_perl_like('y/\x{a00}0-\N{}//', qr/Unknown charname/, { }, | |
1168 | 'RT #133880 illegal \N{}'); | |
fe2ba0a2 | 1169 | } |
2108cbcf | 1170 | |
0c311b7c | 1171 | { |
79f0ed31 KW |
1172 | my $c; |
1173 | my $x = "\1\0\0\0\0\0\0\0\0\0\0\0\0"; | |
1174 | $c = $x =~ tr/\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08\x07\x06\x05\x04\x03\x02\x01\x00/FEDCBA9876543210/; | |
1175 | is $x, "1000000000000", "Decreasing ranges work with start at \\0"; | |
1176 | is $c, 13, "Count for above test"; | |
1177 | ||
1178 | $x = "\1\0\0\0\0\0\0\0\0\0\0\0\0"; | |
1179 | $c = $x =~ tr/\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08\x07\x06\x05\x04\x03\x02\x01\x00/\x{FF26}\x{FF25}\x{FF24}\x{FF23}\x{FF22}\x{FF21}\x{FF19}\x{FF18}\x{FF17}\x{FF16}\x{FF15}\x{FF14}\x{FF13}\x{FF12}\x{FF11}\x{FF10}/; | |
1180 | is $x, "\x{FF11}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}\x{FF10}", "Decreasing Above ASCII ranges work with start at \\0"; | |
1181 | is $c, 13, "Count for above test"; | |
1182 | } | |
1183 | ||
1184 | { | |
0c311b7c KW |
1185 | my $c = "\xff"; |
1186 | my $d = "\x{104}"; | |
1187 | eval '$c =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/'; | |
1188 | is($@, "", 'tr/\x{ff}-\x{104}/\x{100}-\x{105}/ compiled'); | |
1189 | is($c, "\x{100}", 'ff -> 100'); | |
1190 | eval '$d =~ tr/\x{ff}-\x{104}/\x{100}-\x{105}/'; | |
1191 | is($d, "\x{105}", '104 -> 105'); | |
1192 | } | |
1193 | ||
9f31bc5d KW |
1194 | { |
1195 | my $c = "cb"; | |
1196 | eval '$c =~ tr{aabc}{d\x{d0000}}'; | |
1197 | is($c, "\x{d0000}\x{d0000}", "Shouldn't generate valgrind errors"); | |
1198 | } | |
1199 | ||
9100eeb1 | 1200 | 1; |