Commit | Line | Data |
---|---|---|
8d063cd8 LW |
1 | #!./perl |
2 | ||
a8a2fe91 JH |
3 | BEGIN { |
4 | chdir 't' if -d 't'; | |
5 | @INC = '../lib'; | |
3a2263fe | 6 | require './test.pl'; |
a8a2fe91 JH |
7 | } |
8 | ||
ede8ac17 | 9 | plan tests => 135; |
8d063cd8 LW |
10 | |
11 | $FS = ':'; | |
12 | ||
13 | $_ = 'a:b:c'; | |
14 | ||
15 | ($a,$b,$c) = split($FS,$_); | |
16 | ||
3a2263fe | 17 | is(join(';',$a,$b,$c), 'a;b;c'); |
8d063cd8 LW |
18 | |
19 | @ary = split(/:b:/); | |
3a2263fe | 20 | is(join("$_",@ary), 'aa:b:cc'); |
8d063cd8 LW |
21 | |
22 | $_ = "abc\n"; | |
4765795a | 23 | my @xyz = (@ary = split(//)); |
3a2263fe | 24 | is(join(".",@ary), "a.b.c.\n"); |
8d063cd8 LW |
25 | |
26 | $_ = "a:b:c::::"; | |
27 | @ary = split(/:/); | |
3a2263fe | 28 | is(join(".",@ary), "a.b.c"); |
2e1b3b7e | 29 | |
378cc40b | 30 | $_ = join(':',split(' '," a b\tc \t d ")); |
3a2263fe | 31 | is($_, 'a:b:c:d'); |
2e1b3b7e KK |
32 | |
33 | $_ = join(':',split(/ */,"foo bar bie\tdoll")); | |
3a2263fe | 34 | is($_ , "f:o:o:b:a:r:b:i:e:\t:d:o:l:l"); |
378cc40b LW |
35 | |
36 | $_ = join(':', 'foo', split(/ /,'a b c'), 'bar'); | |
3a2263fe | 37 | is($_, "foo:a:b::c:bar"); |
378cc40b | 38 | |
a687059c LW |
39 | # Can we say how many fields to split to? |
40 | $_ = join(':', split(' ','1 2 3 4 5 6', 3)); | |
3a2263fe | 41 | is($_, '1:2:3 4 5 6'); |
a687059c LW |
42 | |
43 | # Can we do it as a variable? | |
44 | $x = 4; | |
45 | $_ = join(':', split(' ','1 2 3 4 5 6', $x)); | |
3a2263fe | 46 | is($_, '1:2:3:4 5 6'); |
a687059c LW |
47 | |
48 | # Does the 999 suppress null field chopping? | |
49 | $_ = join(':', split(/:/,'1:2:3:4:5:6:::', 999)); | |
3a2263fe | 50 | is($_ , '1:2:3:4:5:6:::'); |
a687059c LW |
51 | |
52 | # Does assignment to a list imply split to one more field than that? | |
6cefa69e RU |
53 | $foo = runperl( switches => ['-Dt'], stderr => 1, prog => '($a,$b)=split;' ); |
54 | ok($foo =~ /DEBUGGING/ || $foo =~ /const\n?\Q(IV(3))\E/); | |
a687059c LW |
55 | |
56 | # Can we say how many fields to split to when assigning to a list? | |
57 | ($a,$b) = split(' ','1 2 3 4 5 6', 2); | |
58 | $_ = join(':',$a,$b); | |
3a2263fe | 59 | is($_, '1:2 3 4 5 6'); |
a687059c | 60 | |
084811a7 | 61 | # do subpatterns generate additional fields (without trailing nulls)? |
62 | $_ = join '|', split(/,|(-)/, "1-10,20,,,"); | |
3a2263fe | 63 | is($_, "1|-|10||20"); |
084811a7 | 64 | |
65 | # do subpatterns generate additional fields (with a limit)? | |
66 | $_ = join '|', split(/,|(-)/, "1-10,20,,,", 10); | |
3a2263fe | 67 | is($_, "1|-|10||20||||||"); |
e1fa4fd3 HS |
68 | |
69 | # is the 'two undefs' bug fixed? | |
70 | (undef, $a, undef, $b) = qw(1 2 3 4); | |
3a2263fe | 71 | is("$a|$b", "2|4"); |
e1fa4fd3 HS |
72 | |
73 | # .. even for locals? | |
74 | { | |
75 | local(undef, $a, undef, $b) = qw(1 2 3 4); | |
3a2263fe | 76 | is("$a|$b", "2|4"); |
e1fa4fd3 | 77 | } |
fb73857a | 78 | |
79 | # check splitting of null string | |
80 | $_ = join('|', split(/x/, '',-1), 'Z'); | |
3a2263fe | 81 | is($_, "Z"); |
fb73857a | 82 | |
83 | $_ = join('|', split(/x/, '', 1), 'Z'); | |
3a2263fe | 84 | is($_, "Z"); |
fb73857a | 85 | |
86 | $_ = join('|', split(/(p+)/,'',-1), 'Z'); | |
3a2263fe | 87 | is($_, "Z"); |
fb73857a | 88 | |
89 | $_ = join('|', split(/.?/, '',-1), 'Z'); | |
3a2263fe | 90 | is($_, "Z"); |
fb73857a | 91 | |
c277df42 IZ |
92 | |
93 | # Are /^/m patterns scanned? | |
94 | $_ = join '|', split(/^a/m, "a b a\na d a", 20); | |
3a2263fe | 95 | is($_, "| b a\n| d a"); |
c277df42 IZ |
96 | |
97 | # Are /$/m patterns scanned? | |
98 | $_ = join '|', split(/a$/m, "a b a\na d a", 20); | |
3a2263fe | 99 | is($_, "a b |\na d |"); |
c277df42 IZ |
100 | |
101 | # Are /^/m patterns scanned? | |
102 | $_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20); | |
3a2263fe | 103 | is($_, "| b aa\n| d aa"); |
c277df42 IZ |
104 | |
105 | # Are /$/m patterns scanned? | |
106 | $_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20); | |
3a2263fe | 107 | is($_, "aa b |\naa d |"); |
c277df42 IZ |
108 | |
109 | # Greedyness: | |
110 | $_ = "a : b :c: d"; | |
111 | @ary = split(/\s*:\s*/); | |
3a2263fe | 112 | is(($res = join(".",@ary)), "a.b.c.d", $res); |
815d35b9 MG |
113 | |
114 | # use of match result as pattern (!) | |
3a2263fe | 115 | is('p:q:r:s', join ':', split('abc' =~ /b/, 'p1q1r1s')); |
1ec94568 MG |
116 | |
117 | # /^/ treated as /^/m | |
118 | $_ = join ':', split /^/, "ab\ncd\nef\n"; | |
3a2263fe | 119 | is($_, "ab\n:cd\n:ef\n"); |
b3f5893f GS |
120 | |
121 | # see if @a = @b = split(...) optimization works | |
122 | @list1 = @list2 = split ('p',"a p b c p"); | |
3a2263fe RGS |
123 | ok(@list1 == @list2 && |
124 | "@list1" eq "@list2" && | |
125 | @list1 == 2 && | |
126 | "@list1" eq "a b c "); | |
0156e0fd RB |
127 | |
128 | # zero-width assertion | |
129 | $_ = join ':', split /(?=\w)/, "rm b"; | |
3a2263fe | 130 | is($_, "r:m :b"); |
5a2d9fa2 JH |
131 | |
132 | # unicode splittage | |
974f237a | 133 | |
5a2d9fa2 | 134 | @ary = map {ord} split //, v1.20.300.4000.50000.4000.300.20.1; |
3a2263fe | 135 | is("@ary", "1 20 300 4000 50000 4000 300 20 1"); |
974f237a JH |
136 | |
137 | @ary = split(/\x{FE}/, "\x{FF}\x{FE}\x{FD}"); # bug id 20010105.016 | |
3a2263fe RGS |
138 | ok(@ary == 2 && |
139 | $ary[0] eq "\xFF" && $ary[1] eq "\xFD" && | |
140 | $ary[0] eq "\x{FF}" && $ary[1] eq "\x{FD}"); | |
974f237a JH |
141 | |
142 | @ary = split(/(\x{FE}\xFE)/, "\xFF\x{FF}\xFE\x{FE}\xFD\x{FD}"); # variant of 31 | |
3a2263fe RGS |
143 | ok(@ary == 3 && |
144 | $ary[0] eq "\xFF\xFF" && | |
145 | $ary[0] eq "\x{FF}\xFF" && | |
146 | $ary[0] eq "\x{FF}\x{FF}" && | |
147 | $ary[1] eq "\xFE\xFE" && | |
148 | $ary[1] eq "\x{FE}\xFE" && | |
149 | $ary[1] eq "\x{FE}\x{FE}" && | |
150 | $ary[2] eq "\xFD\xFD" && | |
151 | $ary[2] eq "\x{FD}\xFD" && | |
152 | $ary[2] eq "\x{FD}\x{FD}"); | |
4765795a JH |
153 | |
154 | { | |
155 | my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); | |
3a2263fe | 156 | is("@a", "1234 123 2345"); |
4765795a JH |
157 | } |
158 | ||
159 | { | |
31e261c7 JH |
160 | my $x = 'A'; |
161 | my @a = map ord, split(/$x/, join("", map chr, (1234, ord($x), 2345))); | |
3a2263fe | 162 | is("@a", "1234 2345"); |
4765795a JH |
163 | } |
164 | ||
165 | { | |
166 | # bug id 20000427.003 | |
167 | ||
168 | use warnings; | |
169 | use strict; | |
170 | ||
171 | my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; | |
172 | ||
173 | my @charlist = split //, $sushi; | |
174 | my $r = ''; | |
175 | foreach my $ch (@charlist) { | |
176 | $r = $r . " " . sprintf "U+%04X", ord($ch); | |
177 | } | |
178 | ||
3a2263fe | 179 | is($r, " U+B36C U+5A8C U+FF5B U+5079 U+505B"); |
4765795a JH |
180 | } |
181 | ||
182 | { | |
dd83d948 DD |
183 | my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; |
184 | ||
3a2263fe | 185 | SKIP: { |
31e261c7 | 186 | if (ord('A') == 193) { |
3a2263fe | 187 | skip("EBCDIC", 1); |
31e261c7 JH |
188 | } else { |
189 | # bug id 20000426.003 | |
4765795a | 190 | |
31e261c7 | 191 | my ($a, $b, $c) = split(/\x40/, $s); |
3a2263fe | 192 | ok($a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a); |
31e261c7 | 193 | } |
3a2263fe | 194 | } |
4765795a JH |
195 | |
196 | my ($a, $b) = split(/\x{100}/, $s); | |
3a2263fe | 197 | ok($a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"); |
4765795a JH |
198 | |
199 | my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); | |
3a2263fe | 200 | ok($a eq "\x20\x40" && $b eq "\x40\x20"); |
4765795a | 201 | |
3a2263fe | 202 | SKIP: { |
31e261c7 | 203 | if (ord('A') == 193) { |
3a2263fe | 204 | skip("EBCDIC", 1); |
31e261c7 JH |
205 | } else { |
206 | my ($a, $b) = split(/\x40\x{80}/, $s); | |
3a2263fe | 207 | ok($a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"); |
31e261c7 | 208 | } |
3a2263fe | 209 | } |
4765795a JH |
210 | |
211 | my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); | |
3a2263fe | 212 | ok($a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"); |
4765795a JH |
213 | } |
214 | ||
215 | { | |
216 | # 20001205.014 | |
217 | ||
218 | my $a = "ABC\x{263A}"; | |
219 | ||
220 | my @b = split( //, $a ); | |
221 | ||
3a2263fe | 222 | is(scalar @b, 4); |
4765795a | 223 | |
3a2263fe | 224 | ok(length($b[3]) == 1 && $b[3] eq "\x{263A}"); |
4765795a JH |
225 | |
226 | $a =~ s/^A/Z/; | |
3a2263fe | 227 | ok(length($a) == 4 && $a eq "ZBC\x{263A}"); |
4765795a JH |
228 | } |
229 | ||
230 | { | |
231 | my @a = split(/\xFE/, "\xFF\xFE\xFD"); | |
232 | ||
3a2263fe | 233 | ok(@a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD"); |
4765795a JH |
234 | } |
235 | ||
16bdb4ac RG |
236 | { |
237 | # check that PMf_WHITE is cleared after \s+ is used | |
238 | # reported in <20010627113312.RWGY6087.viemta06@localhost> | |
239 | my $r; | |
240 | foreach my $pat ( qr/\s+/, qr/ll/ ) { | |
241 | $r = join ':' => split($pat, "hello cruel world"); | |
242 | } | |
3a2263fe | 243 | is($r, "he:o cruel world"); |
16bdb4ac | 244 | } |
6de67870 JP |
245 | |
246 | ||
247 | { | |
248 | # split /(A)|B/, "1B2" should return (1, undef, 2) | |
249 | my @x = split /(A)|B/, "1B2"; | |
3a2263fe | 250 | ok($x[0] eq '1' and (not defined $x[1]) and $x[2] eq '2'); |
6de67870 | 251 | } |
1d86a7f9 HS |
252 | |
253 | { | |
254 | # [perl #17064] | |
255 | my $warn; | |
256 | local $SIG{__WARN__} = sub { $warn = join '', @_; chomp $warn }; | |
257 | my $char = "\x{10f1ff}"; | |
258 | my @a = split /\r?\n/, "$char\n"; | |
3a2263fe RGS |
259 | ok(@a == 1 && $a[0] eq $char && !defined($warn)); |
260 | } | |
261 | ||
262 | { | |
263 | # [perl #18195] | |
e1c3fb40 RGS |
264 | for my $u (0, 1) { |
265 | for my $a (0, 1) { | |
266 | $_ = 'readin,database,readout'; | |
267 | utf8::upgrade $_ if $u; | |
268 | /(.+)/; | |
269 | my @d = split /[,]/,$1; | |
270 | is(join (':',@d), 'readin:database:readout', "[perl #18195]"); | |
3a2263fe | 271 | } |
1d86a7f9 HS |
272 | } |
273 | } | |
3b0d546b AE |
274 | |
275 | { | |
276 | $p="a,b"; | |
277 | utf8::upgrade $p; | |
7f18b612 | 278 | eval { @a=split(/[, ]+/,$p) }; |
3b0d546b AE |
279 | is ("$@-@a-", '-a b-', '#20912 - split() to array with /[]+/ and utf8'); |
280 | } | |
7f18b612 YST |
281 | |
282 | { | |
283 | is (\@a, \@{"a"}, '@a must be global for following test'); | |
284 | $p=""; | |
285 | $n = @a = split /,/,$p; | |
286 | is ($n, 0, '#21765 - pmreplroot hack used to return undef for 0 iters'); | |
287 | } | |
e3a8873f DM |
288 | |
289 | { | |
290 | # [perl #28938] | |
291 | # assigning off the end of the array after a split could leave garbage | |
292 | # in the inner elements | |
293 | ||
294 | my $x; | |
295 | @a = split /,/, ',,,,,'; | |
296 | $a[3]=1; | |
297 | $x = \$a[2]; | |
298 | is (ref $x, 'SCALAR', '#28938 - garbage after extend'); | |
299 | } | |
8727f688 YO |
300 | { |
301 | # check the special casing of split /\s/ and unicode | |
302 | use charnames qw(:full); | |
303 | # below test data is extracted from | |
304 | # PropList-5.0.0.txt | |
305 | # Date: 2006-06-07, 23:22:52 GMT [MD] | |
306 | # | |
307 | # Unicode Character Database | |
308 | # Copyright (c) 1991-2006 Unicode, Inc. | |
309 | # For terms of use, see http://www.unicode.org/terms_of_use.html | |
310 | # For documentation, see UCD.html | |
311 | my @spaces=( | |
613f191e TS |
312 | ord("\t"), # Cc <control-0009> |
313 | ord("\n"), # Cc <control-000A> | |
314 | # not PerlSpace # Cc <control-000B> | |
315 | ord("\f"), # Cc <control-000C> | |
316 | ord("\r"), # Cc <control-000D> | |
317 | ord(" "), # Zs SPACE | |
318 | ord("\N{NEL}"), # Cc <control-0085> | |
319 | ord("\N{NO-BREAK SPACE}"), | |
320 | # Zs NO-BREAK SPACE | |
8727f688 YO |
321 | 0x1680, # Zs OGHAM SPACE MARK |
322 | 0x180E, # Zs MONGOLIAN VOWEL SEPARATOR | |
323 | 0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE | |
324 | 0x2028, # Zl LINE SEPARATOR | |
325 | 0x2029, # Zp PARAGRAPH SEPARATOR | |
326 | 0x202F, # Zs NARROW NO-BREAK SPACE | |
327 | 0x205F, # Zs MEDIUM MATHEMATICAL SPACE | |
328 | 0x3000 # Zs IDEOGRAPHIC SPACE | |
329 | ); | |
330 | #diag "Have @{[0+@spaces]} to test\n"; | |
331 | foreach my $cp (@spaces) { | |
613f191e | 332 | my $msg = sprintf "Space: U+%04x", $cp; |
8727f688 | 333 | my $space = chr($cp); |
613f191e | 334 | my $str="A:$space:B\x{FFFD}"; |
8727f688 | 335 | chop $str; |
613f191e | 336 | |
8727f688 | 337 | my @res=split(/\s+/,$str); |
613f191e TS |
338 | ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/"); |
339 | ||
340 | my $s2 = "$space$space:A:$space$space:B\x{FFFD}"; | |
341 | chop $s2; | |
342 | ||
343 | my @r2 = split(' ',$s2); | |
344 | ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '"); | |
345 | ||
346 | my @r3 = split(/\s+/, $s2); | |
347 | ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2"); | |
8727f688 YO |
348 | } |
349 | } | |
ede8ac17 TS |
350 | |
351 | { | |
352 | my $src = "ABC \0 FOO \0 XYZ"; | |
353 | my @s = split(" \0 ", $src); | |
354 | my @r = split(/ \0 /, $src); | |
355 | is(scalar(@s), 3); | |
356 | is($s[0], "ABC"); | |
357 | is($s[1], "FOO"); | |
358 | is($s[2]," XYZ"); | |
359 | is(join(':',@s), join(':',@r)); | |
360 | } |