t/op/split.t

   1 #!./perl
   2
   3 BEGIN {
   4     chdir 't' if -d 't';
   5     @INC = '../lib';
   6     require './test.pl';
   7 }
   8
   9 plan tests => 250;
  10
  11 $FS = ':';
  12
  13 $_ = 'a:b:c';
  14
  15 ($a,$b,$c) = split($FS,$_);
  16
  17 is(join(';',$a,$b,$c), 'a;b;c');
  18
  19 @ary = split(/:b:/);
  20 $cnt = split(/:b:/);
  21 is(join("$_",@ary), 'aa:b:cc');
  22 is($cnt, scalar(@ary));
  23
  24 $_ = "abc\n";
  25 my @xyz = (@ary = split(//));
  26 $cnt = split(//);
  27 is(join(".",@ary), "a.b.c.\n");
  28 is($cnt, scalar(@ary));
  29
  30 $_ = "a:b:c::::";
  31 @ary = split(/:/);
  32 $cnt = split(/:/);
  33 is(join(".",@ary), "a.b.c");
  34 is($cnt, scalar(@ary));
  35
  36 $_ = join(':',split(' ',"    a b\tc \t d "));
  37 is($_, 'a:b:c:d');
  38 @ary = split(' ',"    a b\tc \t d ");
  39 $cnt = split(' ',"    a b\tc \t d ");
  40 is($cnt, scalar(@ary));
  41
  42 $_ = join(':',split(/ */,"foo  bar bie\tdoll"));
  43 is($_ , "f:o:o:b:a:r:b:i:e:\t:d:o:l:l");
  44 @ary = split(/ */,"foo  bar bie\tdoll");
  45 $cnt = split(/ */,"foo  bar bie\tdoll");
  46 is($cnt, scalar(@ary));
  47
  48 $_ = join(':', 'foo', split(/ /,'a b  c'), 'bar');
  49 is($_, "foo:a:b::c:bar");
  50 @ary = split(/ /,'a b  c');
  51 $cnt = split(/ /,'a b  c');
  52 is($cnt, scalar(@ary));
  53
  54 # Can we say how many fields to split to?
  55 $_ = join(':', split(' ','1 2 3 4 5 6', 3));
  56 is($_, '1:2:3 4 5 6');
  57 @ary = split(' ','1 2 3 4 5 6', 3);
  58 $cnt = split(' ','1 2 3 4 5 6', 3);
  59 is($cnt, scalar(@ary));
  60
  61 # Can we do it as a variable?
  62 $x = 4;
  63 $_ = join(':', split(' ','1 2 3 4 5 6', $x));
  64 is($_, '1:2:3:4 5 6');
  65 @ary = split(' ','1 2 3 4 5 6', $x);
  66 $cnt = split(' ','1 2 3 4 5 6', $x);
  67 is($cnt, scalar(@ary));
  68
  69 # Does the 999 suppress null field chopping?
  70 $_ = join(':', split(/:/,'1:2:3:4:5:6:::', 999));
  71 is($_ , '1:2:3:4:5:6:::');
  72 @ary = split(/:/,'1:2:3:4:5:6:::', 999);
  73 $cnt = split(/:/,'1:2:3:4:5:6:::', 999);
  74 is($cnt, scalar(@ary));
  75
  76 # Does assignment to a list imply split to one more field than that?
  77 $foo = runperl( switches => ['-Dt'], stderr => 1, prog => '($a,$b)=split;' );
  78 ok($foo =~ /DEBUGGING/ || $foo =~ /const\n?\Q(IV(3))\E/);
  79
  80 # Can we say how many fields to split to when assigning to a list?
  81 ($a,$b) = split(' ','1 2 3 4 5 6', 2);
  82 $_ = join(':',$a,$b);
  83 is($_, '1:2 3 4 5 6');
  84
  85 # do subpatterns generate additional fields (without trailing nulls)?
  86 $_ = join '|', split(/,|(-)/, "1-10,20,,,");
  87 is($_, "1|-|10||20");
  88 @ary = split(/,|(-)/, "1-10,20,,,");
  89 $cnt = split(/,|(-)/, "1-10,20,,,");
  90 is($cnt, scalar(@ary));
  91
  92 # do subpatterns generate additional fields (with a limit)?
  93 $_ = join '|', split(/,|(-)/, "1-10,20,,,", 10);
  94 is($_, "1|-|10||20||||||");
  95 @ary = split(/,|(-)/, "1-10,20,,,", 10);
  96 $cnt = split(/,|(-)/, "1-10,20,,,", 10);
  97 is($cnt, scalar(@ary));
  98
  99 # is the 'two undefs' bug fixed?
 100 (undef, $a, undef, $b) = qw(1 2 3 4);
 101 is("$a|$b", "2|4");
 102
 103 # .. even for locals?
 104 {
 105   local(undef, $a, undef, $b) = qw(1 2 3 4);
 106   is("$a|$b", "2|4");
 107 }
 108
 109 # check splitting of null string
 110 $_ = join('|', split(/x/,   '',-1), 'Z');
 111 is($_, "Z");
 112 @ary = split(/x/,   '',-1);
 113 $cnt = split(/x/,   '',-1);
 114 is($cnt, scalar(@ary));
 115
 116 $_ = join('|', split(/x/,   '', 1), 'Z');
 117 is($_, "Z");
 118 @ary = split(/x/,   '', 1);
 119 $cnt = split(/x/,   '', 1);
 120 is($cnt, scalar(@ary));
 121
 122 $_ = join('|', split(/(p+)/,'',-1), 'Z');
 123 is($_, "Z");
 124 @ary = split(/(p+)/,'',-1);
 125 $cnt = split(/(p+)/,'',-1);
 126 is($cnt, scalar(@ary));
 127
 128 $_ = join('|', split(/.?/,  '',-1), 'Z');
 129 is($_, "Z");
 130 @ary = split(/.?/,  '',-1);
 131 $cnt = split(/.?/,  '',-1);
 132 is($cnt, scalar(@ary));
 133
 134
 135 # Are /^/m patterns scanned?
 136 $_ = join '|', split(/^a/m, "a b a\na d a", 20);
 137 is($_, "| b a\n| d a");
 138 @ary = split(/^a/m, "a b a\na d a", 20);
 139 $cnt = split(/^a/m, "a b a\na d a", 20);
 140 is($cnt, scalar(@ary));
 141
 142 # Are /$/m patterns scanned?
 143 $_ = join '|', split(/a$/m, "a b a\na d a", 20);
 144 is($_, "a b |\na d |");
 145 @ary = split(/a$/m, "a b a\na d a", 20);
 146 $cnt = split(/a$/m, "a b a\na d a", 20);
 147 is($cnt, scalar(@ary));
 148
 149 # Are /^/m patterns scanned?
 150 $_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20);
 151 is($_, "| b aa\n| d aa");
 152 @ary = split(/^aa/m, "aa b aa\naa d aa", 20);
 153 $cnt = split(/^aa/m, "aa b aa\naa d aa", 20);
 154 is($cnt, scalar(@ary));
 155
 156 # Are /$/m patterns scanned?
 157 $_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20);
 158 is($_, "aa b |\naa d |");
 159 @ary = split(/aa$/m, "aa b aa\naa d aa", 20);
 160 $cnt = split(/aa$/m, "aa b aa\naa d aa", 20);
 161 is($cnt, scalar(@ary));
 162
 163 # Greedyness:
 164 $_ = "a : b :c: d";
 165 @ary = split(/\s*:\s*/);
 166 $cnt = split(/\s*:\s*/);
 167 is(($res = join(".",@ary)), "a.b.c.d", $res);
 168 is($cnt, scalar(@ary));
 169
 170 # use of match result as pattern (!)
 171 is('p:q:r:s', join ':', split('abc' =~ /b/, 'p1q1r1s'));
 172 @ary = split('abc' =~ /b/, 'p1q1r1s');
 173 $cnt = split('abc' =~ /b/, 'p1q1r1s');
 174 is($cnt, scalar(@ary));
 175
 176 # /^/ treated as /^/m
 177 $_ = join ':', split /^/, "ab\ncd\nef\n";
 178 is($_, "ab\n:cd\n:ef\n");
 179
 180 # see if @a = @b = split(...) optimization works
 181 @list1 = @list2 = split ('p',"a p b c p");
 182 ok(@list1 == @list2 &&
 183    "@list1" eq "@list2" &&
 184    @list1 == 2 &&
 185    "@list1" eq "a   b c ");
 186
 187 # zero-width assertion
 188 $_ = join ':', split /(?=\w)/, "rm b";
 189 is($_, "r:m :b");
 190 @ary = split /(?=\w)/, "rm b";
 191 $cnt = split /(?=\w)/, "rm b";
 192 is($cnt, scalar(@ary));
 193
 194 # unicode splittage
 195
 196 @ary = map {ord} split //, v1.20.300.4000.50000.4000.300.20.1;
 197 $cnt =           split //, v1.20.300.4000.50000.4000.300.20.1;
 198 is("@ary", "1 20 300 4000 50000 4000 300 20 1");
 199 is($cnt, scalar(@ary));
 200
 201 @ary = split(/\x{FE}/, "\x{FF}\x{FE}\x{FD}"); # bug id 20010105.016
 202 $cnt = split(/\x{FE}/, "\x{FF}\x{FE}\x{FD}"); # bug id 20010105.016
 203 ok(@ary == 2 &&
 204    $ary[0] eq "\xFF"   && $ary[1] eq "\xFD" &&
 205    $ary[0] eq "\x{FF}" && $ary[1] eq "\x{FD}");
 206 is($cnt, scalar(@ary));
 207
 208 @ary = split(/(\x{FE}\xFE)/, "\xFF\x{FF}\xFE\x{FE}\xFD\x{FD}"); # variant of 31
 209 $cnt = split(/(\x{FE}\xFE)/, "\xFF\x{FF}\xFE\x{FE}\xFD\x{FD}"); # variant of 31
 210 ok(@ary == 3 &&
 211    $ary[0] eq "\xFF\xFF"     &&
 212    $ary[0] eq "\x{FF}\xFF"   &&
 213    $ary[0] eq "\x{FF}\x{FF}" &&
 214    $ary[1] eq "\xFE\xFE"     &&
 215    $ary[1] eq "\x{FE}\xFE"   &&
 216    $ary[1] eq "\x{FE}\x{FE}" &&
 217    $ary[2] eq "\xFD\xFD"     &&
 218    $ary[2] eq "\x{FD}\xFD"   &&
 219    $ary[2] eq "\x{FD}\x{FD}");
 220 is($cnt, scalar(@ary));
 221
 222 {
 223     my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
 224     my $c =          split(//, join("", map chr, (1234, 123, 2345)));
 225     is("@a", "1234 123 2345");
 226     is($c, scalar(@a));
 227 }
 228
 229 {
 230     my $x = 'A';
 231     my @a = map ord, split(/$x/, join("", map chr, (1234, ord($x), 2345)));
 232     my $c =          split(/$x/, join("", map chr, (1234, ord($x), 2345)));
 233     is("@a", "1234 2345");
 234     is($c, scalar(@a));
 235 }
 236
 237 {
 238     # bug id 20000427.003
 239
 240     use warnings;
 241     use strict;
 242
 243     my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
 244
 245     my @charlist = split //, $sushi;
 246     my $charnum  = split //, $sushi;
 247     is($charnum, scalar(@charlist));
 248     my $r = '';
 249     foreach my $ch (@charlist) {
 250         $r = $r . " " . sprintf "U+%04X", ord($ch);
 251     }
 252
 253     is($r, " U+B36C U+5A8C U+FF5B U+5079 U+505B");
 254 }
 255
 256 {
 257     my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
 258
 259   SKIP: {
 260     if (ord('A') == 193) {
 261         skip("EBCDIC", 1);
 262     } else {
 263         # bug id 20000426.003
 264
 265         my ($a, $b, $c) = split(/\x40/, $s);
 266         ok($a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a);
 267     }
 268   }
 269
 270     my ($a, $b) = split(/\x{100}/, $s);
 271     ok($a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20");
 272
 273     my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
 274     ok($a eq "\x20\x40" && $b eq "\x40\x20");
 275
 276   SKIP: {
 277     if (ord('A') == 193) {
 278         skip("EBCDIC", 1);
 279     }  else {
 280         my ($a, $b) = split(/\x40\x{80}/, $s);
 281         ok($a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20");
 282     }
 283   }
 284
 285     my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
 286     ok($a eq "\x20" && $b eq "\x{100}" && $c eq "\x20");
 287 }
 288
 289 {
 290     # 20001205.014
 291
 292     my $a = "ABC\x{263A}";
 293
 294     my @b = split( //, $a );
 295     my $c = split( //, $a );
 296     is($c, scalar(@b));
 297
 298     is(scalar @b, 4);
 299
 300     ok(length($b[3]) == 1 && $b[3] eq "\x{263A}");
 301
 302     $a =~ s/^A/Z/;
 303     ok(length($a) == 4 && $a eq "ZBC\x{263A}");
 304 }
 305
 306 {
 307     my @a = split(/\xFE/, "\xFF\xFE\xFD");
 308     my $b = split(/\xFE/, "\xFF\xFE\xFD");
 309
 310     ok(@a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD");
 311     is($b, scalar(@a));
 312 }
 313
 314 {
 315     # check that PMf_WHITE is cleared after \s+ is used
 316     # reported in <20010627113312.RWGY6087.viemta06@localhost>
 317     my $r;
 318     foreach my $pat ( qr/\s+/, qr/ll/ ) {
 319         $r = join ':' => split($pat, "hello cruel world");
 320     }
 321     is($r, "he:o cruel world");
 322 }
 323
 324
 325 {
 326     # split /(A)|B/, "1B2" should return (1, undef, 2)
 327     my @x = split /(A)|B/, "1B2";
 328     my $y = split /(A)|B/, "1B2";
 329     is($y, scalar(@x));
 330     ok($x[0] eq '1' and (not defined $x[1]) and $x[2] eq '2');
 331 }
 332
 333 {
 334     # [perl #17064]
 335     my $warn;
 336     local $SIG{__WARN__} = sub { $warn = join '', @_; chomp $warn };
 337     my $char = "\x{10f1ff}";
 338     my @a = split /\r?\n/, "$char\n";
 339     my $b = split /\r?\n/, "$char\n";
 340     is($b, scalar(@a));
 341     ok(@a == 1 && $a[0] eq $char && !defined($warn));
 342 }
 343
 344 {
 345     # [perl #18195]
 346     for my $u (0, 1) {
 347         for my $a (0, 1) {
 348             $_ = 'readin,database,readout';
 349             utf8::upgrade $_ if $u;
 350             /(.+)/;
 351             my @d = split /[,]/,$1;
 352             my $e = split /[,]/,$1;
 353             is($e, scalar(@d));
 354             is(join (':',@d), 'readin:database:readout', "[perl #18195]");
 355         }
 356     }
 357 }
 358
 359 {
 360     $p="a,b";
 361     utf8::upgrade $p;
 362     eval { @a=split(/[, ]+/,$p) };
 363     eval { $b=split(/[, ]+/,$p) };
 364     is($b, scalar(@a));
 365     is ("$@-@a-", '-a b-', '#20912 - split() to array with /[]+/ and utf8');
 366 }
 367
 368 {
 369     is (\@a, \@{"a"}, '@a must be global for following test');
 370     $p="";
 371     $n = @a = split /,/,$p;
 372     is ($n, 0, '#21765 - pmreplroot hack used to return undef for 0 iters');
 373 }
 374
 375 {
 376     # [perl #28938]
 377     # assigning off the end of the array after a split could leave garbage
 378     # in the inner elements
 379
 380     my $x;
 381     @a = split /,/, ',,,,,';
 382     $a[3]=1;
 383     $x = \$a[2];
 384     is (ref $x, 'SCALAR', '#28938 - garbage after extend');
 385 }
 386 {
 387     # check the special casing of split /\s/ and unicode
 388     use charnames qw(:full);
 389     # below test data is extracted from
 390     # PropList-5.0.0.txt
 391     # Date: 2006-06-07, 23:22:52 GMT [MD]
 392     #
 393     # Unicode Character Database
 394     # Copyright (c) 1991-2006 Unicode, Inc.
 395     # For terms of use, see http://www.unicode.org/terms_of_use.html
 396     # For documentation, see UCD.html
 397     my @spaces=(
 398         ord("\t"),      # Cc       <control-0009>
 399         ord("\n"),      # Cc       <control-000A>
 400         # not PerlSpace # Cc       <control-000B>
 401         ord("\f"),      # Cc       <control-000C>
 402         ord("\r"),      # Cc       <control-000D>
 403         ord(" "),       # Zs       SPACE
 404         ord("\N{NEL}"), # Cc       <control-0085>
 405         ord("\N{NO-BREAK SPACE}"),
 406                         # Zs       NO-BREAK SPACE
 407         0x1680,         # Zs       OGHAM SPACE MARK
 408         0x180E,         # Zs       MONGOLIAN VOWEL SEPARATOR
 409         0x2000..0x200A, # Zs  [11] EN QUAD..HAIR SPACE
 410         0x2028,         # Zl       LINE SEPARATOR
 411         0x2029,         # Zp       PARAGRAPH SEPARATOR
 412         0x202F,         # Zs       NARROW NO-BREAK SPACE
 413         0x205F,         # Zs       MEDIUM MATHEMATICAL SPACE
 414         0x3000          # Zs       IDEOGRAPHIC SPACE
 415     );
 416     #diag "Have @{[0+@spaces]} to test\n";
 417     foreach my $cp (@spaces) {
 418         my $msg = sprintf "Space: U+%04x", $cp;
 419         my $space = chr($cp);
 420         my $str="A:$space:B\x{FFFD}";
 421         chop $str;
 422
 423         my @res=split(/\s+/,$str);
 424         my $cnt=split(/\s+/,$str);
 425         ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/");
 426         is($cnt, scalar(@res), "$msg - /\\s+/ (count)");
 427
 428         my $s2 = "$space$space:A:$space$space:B\x{FFFD}";
 429         chop $s2;
 430
 431         my @r2 = split(' ',$s2);
 432         my $c2 = split(' ',$s2);
 433         ok(@r2 == 2 && join('-', @r2) eq ":A:-:B",  "$msg - ' '");
 434         is($c2, scalar(@r2), "$msg - ' ' (count)");
 435
 436         my @r3 = split(/\s+/, $s2);
 437         my $c3 = split(/\s+/, $s2);
 438         ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2");
 439         is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)");
 440     }
 441 }
 442
 443 {
 444     my $src = "ABC \0 FOO \0  XYZ";
 445     my @s = split(" \0 ", $src);
 446     my @r = split(/ \0 /, $src);
 447     my $cs = split(" \0 ", $src);
 448     my $cr = split(/ \0 /, $src);
 449     is(scalar(@s), 3);
 450     is($cs, 3);
 451     is($cr, 3);
 452     is($s[0], "ABC");
 453     is($s[1], "FOO");
 454     is($s[2]," XYZ");
 455     is(join(':',@s), join(':',@r));
 456 }
 457
 458 {
 459     use constant BANG => {};
 460     () = split m/,/, "", BANG;
 461     ok(1);
 462 }