This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
re-implement OPpASSIGN_COMMON mechanism
[perl5.git] / t / op / split_unicode.t
CommitLineData
ab08a362
NC
1#!./perl
2
3BEGIN {
b5efbd1f 4 chdir 't' if -d 't';
ab08a362
NC
5 require './test.pl';
6 skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)");
a9c9e371 7 plan(tests => 145);
ab08a362
NC
8}
9
10{
11 # check the special casing of split /\s/ and unicode
12 use charnames qw(:full);
13 # below test data is extracted from
14 # PropList-5.0.0.txt
15 # Date: 2006-06-07, 23:22:52 GMT [MD]
16 #
17 # Unicode Character Database
18 # Copyright (c) 1991-2006 Unicode, Inc.
19 # For terms of use, see http://www.unicode.org/terms_of_use.html
20 # For documentation, see UCD.html
21 my @spaces=(
22 ord("\t"), # Cc <control-0009>
23 ord("\n"), # Cc <control-000A>
24 # not PerlSpace # Cc <control-000B>
25 ord("\f"), # Cc <control-000C>
26 ord("\r"), # Cc <control-000D>
27 ord(" "), # Zs SPACE
28 ord("\N{NEL}"), # Cc <control-0085>
29 ord("\N{NO-BREAK SPACE}"),
30 # Zs NO-BREAK SPACE
31 0x1680, # Zs OGHAM SPACE MARK
ab08a362
NC
32 0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE
33 0x2028, # Zl LINE SEPARATOR
34 0x2029, # Zp PARAGRAPH SEPARATOR
35 0x202F, # Zs NARROW NO-BREAK SPACE
36 0x205F, # Zs MEDIUM MATHEMATICAL SPACE
37 0x3000 # Zs IDEOGRAPHIC SPACE
38 );
39 #diag "Have @{[0+@spaces]} to test\n";
40 foreach my $cp (@spaces) {
41 my $msg = sprintf "Space: U+%04x", $cp;
42 my $space = chr($cp);
43 my $str="A:$space:B\x{FFFD}";
44 chop $str;
45
46 my @res=split(/\s+/,$str);
47 my $cnt=split(/\s+/,$str);
48 ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/");
49 is($cnt, scalar(@res), "$msg - /\\s+/ (count)");
50
51 my $s2 = "$space$space:A:$space$space:B\x{FFFD}";
52 chop $s2;
53
54 my @r2 = split(' ',$s2);
55 my $c2 = split(' ',$s2);
56 ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '");
57 is($c2, scalar(@r2), "$msg - ' ' (count)");
58
59 my @r3 = split(/\s+/, $s2);
60 my $c3 = split(/\s+/, $s2);
61 ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2");
62 is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)");
63 }
7e0d5ad7
KW
64
65 { # RT #114808
66 warning_is(
67 sub {
68 $p=chr(0x100);
69 for (".","ab\x{101}def") {
70 @q = split /$p/
71 }
72 },
73 undef,
74 'no warnings when part of split cant match non-utf8'
75 );
76 }
77
ab08a362 78}