This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
2b208cc167c9c692e3e50d29275f48ad1cc3554d
[perl5.git] / t / pragma / utf8.t
1 #!./perl 
2
3 BEGIN {
4     chdir 't' if -d 't';
5     @INC = '../lib';
6     $ENV{PERL5LIB} = '../lib';
7     if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8         print "1..0 # Skip utf8 tests on ebcdic platform.\n";
9         exit;
10     }
11 }
12
13 print "1..75\n";
14
15 my $test = 1;
16
17 sub ok {
18     my ($got,$expect) = @_;
19     print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
20     print "ok $test\n";
21 }
22
23 sub nok {
24     my ($got,$expect) = @_;
25     print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
26     print "ok $test\n";
27 }
28
29 sub ok_bytes {
30     use bytes;
31     my ($got,$expect) = @_;
32     print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
33     print "ok $test\n";
34 }
35
36 sub nok_bytes {
37     use bytes;
38     my ($got,$expect) = @_;
39     print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
40     print "ok $test\n";
41 }
42
43 {
44     use utf8;
45     $_ = ">\x{263A}<"; 
46     s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; 
47     ok $_, '>&#9786;<';
48     $test++;                            # 1
49
50     $_ = ">\x{263A}<"; 
51     my $rx = "\x{80}-\x{10ffff}";
52     s/([$rx])/"&#".ord($1).";"/eg; 
53     ok $_, '>&#9786;<';
54     $test++;                            # 2
55
56     $_ = ">\x{263A}<"; 
57     my $rx = "\\x{80}-\\x{10ffff}";
58     s/([$rx])/"&#".ord($1).";"/eg; 
59     ok $_, '>&#9786;<';
60     $test++;                            # 3
61
62     $_ = "alpha,numeric"; 
63     m/([[:alpha:]]+)/; 
64     ok $1, 'alpha';
65     $test++;                            # 4
66
67     $_ = "alphaNUMERICstring";
68     m/([[:^lower:]]+)/; 
69     ok $1, 'NUMERIC';
70     $test++;                            # 5
71
72     $_ = "alphaNUMERICstring";
73     m/(\p{Ll}+)/; 
74     ok $1, 'alpha';
75     $test++;                            # 6
76
77     $_ = "alphaNUMERICstring"; 
78     m/(\p{Lu}+)/; 
79     ok $1, 'NUMERIC';
80     $test++;                            # 7
81
82     $_ = "alpha,numeric"; 
83     m/([\p{IsAlpha}]+)/; 
84     ok $1, 'alpha';
85     $test++;                            # 8
86
87     $_ = "alphaNUMERICstring";
88     m/([^\p{IsLower}]+)/; 
89     ok $1, 'NUMERIC';
90     $test++;                            # 9
91
92     $_ = "alpha123numeric456"; 
93     m/([\p{IsDigit}]+)/; 
94     ok $1, '123';
95     $test++;                            # 10
96
97     $_ = "alpha123numeric456"; 
98     m/([^\p{IsDigit}]+)/; 
99     ok $1, 'alpha';
100     $test++;                            # 11
101
102     $_ = ",123alpha,456numeric"; 
103     m/([\p{IsAlnum}]+)/; 
104     ok $1, '123alpha';
105     $test++;                            # 12
106 }
107 {
108     use utf8;
109
110     $_ = "\x{263A}>\x{263A}\x{263A}"; 
111
112     ok length, 4;
113     $test++;                            # 13
114
115     ok length((m/>(.)/)[0]), 1;
116     $test++;                            # 14
117
118     ok length($&), 2;
119     $test++;                            # 15
120
121     ok length($'), 1;
122     $test++;                            # 16
123
124     ok length($`), 1;
125     $test++;                            # 17
126
127     ok length($1), 1;
128     $test++;                            # 18
129
130     ok length($tmp=$&), 2;
131     $test++;                            # 19
132
133     ok length($tmp=$'), 1;
134     $test++;                            # 20
135
136     ok length($tmp=$`), 1;
137     $test++;                            # 21
138
139     ok length($tmp=$1), 1;
140     $test++;                            # 22
141
142     {
143         use bytes;
144
145         my $tmp = $&;
146         ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
147         $test++;                                # 23
148
149         $tmp = $';
150         ok $tmp, pack("C*", 0342, 0230, 0272);
151         $test++;                                # 24
152
153         $tmp = $`;
154         ok $tmp, pack("C*", 0342, 0230, 0272);
155         $test++;                                # 25
156
157         $tmp = $1;
158         ok $tmp, pack("C*", 0342, 0230, 0272);
159         $test++;                                # 26
160     }
161
162     ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
163     $test++;                            # 27
164
165     ok_bytes $', pack("C*", 0342, 0230, 0272);
166     $test++;                            # 28
167
168     ok_bytes $`, pack("C*", 0342, 0230, 0272);
169     $test++;                            # 29
170
171     ok_bytes $1, pack("C*", 0342, 0230, 0272);
172     $test++;                            # 30
173
174     {
175         use bytes;
176         no utf8;
177
178         ok length, 10;
179         $test++;                                # 31
180
181         ok length((m/>(.)/)[0]), 1;
182         $test++;                                # 32
183
184         ok length($&), 2;
185         $test++;                                # 33
186
187         ok length($'), 5;
188         $test++;                                # 34
189
190         ok length($`), 3;
191         $test++;                                # 35
192
193         ok length($1), 1;
194         $test++;                                # 36
195
196         ok $&, pack("C*", ord(">"), 0342);
197         $test++;                                # 37
198
199         ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
200         $test++;                                # 38
201
202         ok $`, pack("C*", 0342, 0230, 0272);
203         $test++;                                # 39
204
205         ok $1, pack("C*", 0342);
206         $test++;                                # 40
207
208     }
209
210
211     {
212         no utf8;
213         $_="\342\230\272>\342\230\272\342\230\272";
214     }
215
216     ok length, 10;
217     $test++;                            # 41
218
219     ok length((m/>(.)/)[0]), 1;
220     $test++;                            # 42
221
222     ok length($&), 2;
223     $test++;                            # 43
224
225     ok length($'), 1;
226     $test++;                            # 44
227
228     ok length($`), 1;
229     $test++;                            # 45
230
231     ok length($1), 1;
232     $test++;                            # 46
233
234     ok length($tmp=$&), 2;
235     $test++;                            # 47
236
237     ok length($tmp=$'), 1;
238     $test++;                            # 48
239
240     ok length($tmp=$`), 1;
241     $test++;                            # 49
242
243     ok length($tmp=$1), 1;
244     $test++;                            # 50
245
246     {
247         use bytes;
248
249         my $tmp = $&;
250         ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
251         $test++;                                # 51
252
253         $tmp = $';
254         ok $tmp, pack("C*", 0342, 0230, 0272);
255         $test++;                                # 52
256
257         $tmp = $`;
258         ok $tmp, pack("C*", 0342, 0230, 0272);
259         $test++;                                # 53
260
261         $tmp = $1;
262         ok $tmp, pack("C*", 0342, 0230, 0272);
263         $test++;                                # 54
264     }
265     {
266         use bytes;
267         no utf8;
268
269         ok length, 10;
270         $test++;                                # 55
271
272         ok length((m/>(.)/)[0]), 1;
273         $test++;                                # 56
274
275         ok length($&), 2;
276         $test++;                                # 57
277
278         ok length($'), 5;
279         $test++;                                # 58
280
281         ok length($`), 3;
282         $test++;                                # 59
283
284         ok length($1), 1;
285         $test++;                                # 60
286
287         ok $&, pack("C*", ord(">"), 0342);
288         $test++;                                # 61
289
290         ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
291         $test++;                                # 62
292
293         ok $`, pack("C*", 0342, 0230, 0272);
294         $test++;                                # 63
295
296         ok $1, pack("C*", 0342);
297         $test++;                                # 64
298
299     }
300
301     ok "\x{ab}" =~ /^\x{ab}$/, 1;
302     $test++;                                    # 65
303 }
304
305 {
306     use utf8;
307     ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
308     $test++;                # 66
309 }
310
311 {
312     use utf8;
313     my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
314     ok "@a", "1234 123 2345";
315     $test++;                # 67
316 }
317
318 {
319     use utf8;
320     my $x = chr(123);
321     my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
322     ok "@a", "1234 2345";
323     $test++;                # 68
324 }
325
326 {
327   my($a,$b);
328   { use bytes; $a = "\xc3\xa4"; }  
329   { use utf8;  $b = "\xe4"; }
330   { use bytes; ok_bytes $a, $b; $test++; } # 69
331   { use utf8;  nok      $a, $b; $test++; } # 70
332 }
333
334 {
335     my @x = ("stra\337e 138","stra\337e 138");
336     for (@x) {
337         s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
338         my($latin) = /^(.+)(?:\s+\d)/;
339         print $latin eq "stra\337e" ? "ok $test\n" :
340             "#latin[$latin]\nnot ok $test\n";
341         $test++;
342         $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
343         use utf8;
344         $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
345     }
346 }
347
348 {
349     $_ = $dx = "\x{10f2}";
350     s/($dx)/$dx$1/;
351     {
352         use bytes;
353         print "not " unless $_ eq "$dx$dx";
354         print "ok $test\n";
355         $test++;
356     }
357
358     $_ = $dx = "\x{10f2}";
359     s/($dx)/$1$dx/;
360     {
361         use bytes;
362         print "not " unless $_ eq "$dx$dx";
363         print "ok $test\n";
364         $test++;
365     }
366
367     $dx = "\x{10f2}";
368     $_  = "\x{10f2}\x{10f2}";
369     s/($dx)($dx)/$1$2/;
370     {
371         use bytes;
372         print "not " unless $_ eq "$dx$dx";
373         print "ok $test\n";
374         $test++;
375     }
376 }