Commit | Line | Data |
---|---|---|
ab08a362 NC |
1 | #!./perl |
2 | ||
3 | BEGIN { | |
4 | require './test.pl'; | |
5 | skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)"); | |
a9c9e371 | 6 | plan(tests => 145); |
ab08a362 NC |
7 | } |
8 | ||
9 | { | |
10 | # check the special casing of split /\s/ and unicode | |
11 | use charnames qw(:full); | |
12 | # below test data is extracted from | |
13 | # PropList-5.0.0.txt | |
14 | # Date: 2006-06-07, 23:22:52 GMT [MD] | |
15 | # | |
16 | # Unicode Character Database | |
17 | # Copyright (c) 1991-2006 Unicode, Inc. | |
18 | # For terms of use, see http://www.unicode.org/terms_of_use.html | |
19 | # For documentation, see UCD.html | |
20 | my @spaces=( | |
21 | ord("\t"), # Cc <control-0009> | |
22 | ord("\n"), # Cc <control-000A> | |
23 | # not PerlSpace # Cc <control-000B> | |
24 | ord("\f"), # Cc <control-000C> | |
25 | ord("\r"), # Cc <control-000D> | |
26 | ord(" "), # Zs SPACE | |
27 | ord("\N{NEL}"), # Cc <control-0085> | |
28 | ord("\N{NO-BREAK SPACE}"), | |
29 | # Zs NO-BREAK SPACE | |
30 | 0x1680, # Zs OGHAM SPACE MARK | |
ab08a362 NC |
31 | 0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE |
32 | 0x2028, # Zl LINE SEPARATOR | |
33 | 0x2029, # Zp PARAGRAPH SEPARATOR | |
34 | 0x202F, # Zs NARROW NO-BREAK SPACE | |
35 | 0x205F, # Zs MEDIUM MATHEMATICAL SPACE | |
36 | 0x3000 # Zs IDEOGRAPHIC SPACE | |
37 | ); | |
38 | #diag "Have @{[0+@spaces]} to test\n"; | |
39 | foreach my $cp (@spaces) { | |
40 | my $msg = sprintf "Space: U+%04x", $cp; | |
41 | my $space = chr($cp); | |
42 | my $str="A:$space:B\x{FFFD}"; | |
43 | chop $str; | |
44 | ||
45 | my @res=split(/\s+/,$str); | |
46 | my $cnt=split(/\s+/,$str); | |
47 | ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/"); | |
48 | is($cnt, scalar(@res), "$msg - /\\s+/ (count)"); | |
49 | ||
50 | my $s2 = "$space$space:A:$space$space:B\x{FFFD}"; | |
51 | chop $s2; | |
52 | ||
53 | my @r2 = split(' ',$s2); | |
54 | my $c2 = split(' ',$s2); | |
55 | ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '"); | |
56 | is($c2, scalar(@r2), "$msg - ' ' (count)"); | |
57 | ||
58 | my @r3 = split(/\s+/, $s2); | |
59 | my $c3 = split(/\s+/, $s2); | |
60 | ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2"); | |
61 | is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)"); | |
62 | } | |
7e0d5ad7 KW |
63 | |
64 | { # RT #114808 | |
65 | warning_is( | |
66 | sub { | |
67 | $p=chr(0x100); | |
68 | for (".","ab\x{101}def") { | |
69 | @q = split /$p/ | |
70 | } | |
71 | }, | |
72 | undef, | |
73 | 'no warnings when part of split cant match non-utf8' | |
74 | ); | |
75 | } | |
76 | ||
ab08a362 | 77 | } |