| 1 | #!./perl -w |
| 2 | |
| 3 | BEGIN { |
| 4 | chdir 't' if -d 't'; |
| 5 | require './test.pl'; |
| 6 | set_up_inc('../lib'); |
| 7 | require Config; Config->import; |
| 8 | require constant; |
| 9 | constant->import(constcow => *Config::{NAME}); |
| 10 | require './charset_tools.pl'; |
| 11 | require './loc_tools.pl'; |
| 12 | } |
| 13 | |
| 14 | plan(tests => 281); |
| 15 | |
| 16 | $_ = 'david'; |
| 17 | $a = s/david/rules/r; |
| 18 | ok( $_ eq 'david' && $a eq 'rules', 'non-destructive substitute' ); |
| 19 | |
| 20 | $a = "david" =~ s/david/rules/r; |
| 21 | ok( $a eq 'rules', 's///r with constant' ); |
| 22 | |
| 23 | #[perl #127635] failed with -DPERL_NO_COW perl build (George smoker uses flag) |
| 24 | #Modification of a read-only value attempted at ../t/re/subst.t line 23. |
| 25 | $a = constcow =~ s/Config/David/r; |
| 26 | ok( $a eq 'David::', 's///r with COW constant' ); |
| 27 | |
| 28 | $a = "david" =~ s/david/"is"."great"/er; |
| 29 | ok( $a eq 'isgreat', 's///er' ); |
| 30 | |
| 31 | $a = "daviddavid" =~ s/david/cool/gr; |
| 32 | ok( $a eq 'coolcool', 's///gr' ); |
| 33 | |
| 34 | $a = 'david'; |
| 35 | $b = $a =~ s/david/sucks/r =~ s/sucks/rules/r; |
| 36 | ok( $a eq 'david' && $b eq 'rules', 'chained s///r' ); |
| 37 | |
| 38 | $a = 'david'; |
| 39 | $b = $a =~ s/xxx/sucks/r; |
| 40 | ok( $a eq 'david' && $b eq 'david', 'non matching s///r' ); |
| 41 | |
| 42 | $a = 'david'; |
| 43 | for (0..2) { |
| 44 | ok( 'david' =~ s/$a/rules/ro eq 'rules', 's///ro '.$_ ); |
| 45 | } |
| 46 | |
| 47 | $a = 'david'; |
| 48 | eval '$b = $a !~ s/david/is great/r'; |
| 49 | like( $@, qr{Using !~ with s///r doesn't make sense}, 's///r !~ operator gives error' ); |
| 50 | |
| 51 | { |
| 52 | no warnings 'uninitialized'; |
| 53 | $a = undef; |
| 54 | $b = $a =~ s/left/right/r; |
| 55 | ok ( !defined $a && !defined $b, 's///r with undef input' ); |
| 56 | |
| 57 | use warnings; |
| 58 | warning_like(sub { $b = $a =~ s/left/right/r }, |
| 59 | qr/^Use of uninitialized value/, |
| 60 | 's///r Uninitialized warning'); |
| 61 | |
| 62 | $a = 'david'; |
| 63 | warning_like(sub {eval 's/david/sucks/r; 1'}, |
| 64 | qr/^Useless use of non-destructive substitution/, |
| 65 | 's///r void context warning'); |
| 66 | } |
| 67 | |
| 68 | $a = ''; |
| 69 | $b = $a =~ s/david/rules/r; |
| 70 | ok( $a eq '' && $b eq '', 's///r on empty string' ); |
| 71 | |
| 72 | $_ = 'david'; |
| 73 | @b = s/david/rules/r; |
| 74 | ok( $_ eq 'david' && $b[0] eq 'rules', 's///r in list context' ); |
| 75 | |
| 76 | # Magic value and s///r |
| 77 | require Tie::Scalar; |
| 78 | tie $m, 'Tie::StdScalar'; # makes $a magical |
| 79 | $m = "david"; |
| 80 | $b = $m =~ s/david/rules/r; |
| 81 | ok( $m eq 'david' && $b eq 'rules', 's///r with magic input' ); |
| 82 | |
| 83 | $m = $b =~ s/rules/david/r; |
| 84 | ok( defined tied($m), 's///r magic isn\'t lost' ); |
| 85 | |
| 86 | $b = $m =~ s/xxx/yyy/r; |
| 87 | ok( ! defined tied($b), 's///r magic isn\'t contagious' ); |
| 88 | |
| 89 | my $ref = \("aaa" =~ s/aaa/bbb/r); |
| 90 | refcount_is $ref, 1, 's///r does not leak'; |
| 91 | $ref = \("aaa" =~ s/aaa/bbb/rg); |
| 92 | refcount_is $ref, 1, 's///rg does not leak'; |
| 93 | |
| 94 | $x = 'foo'; |
| 95 | $_ = "x"; |
| 96 | s/x/\$x/; |
| 97 | ok( $_ eq '$x', ":$_: eq :\$x:" ); |
| 98 | |
| 99 | $_ = "x"; |
| 100 | s/x/$x/; |
| 101 | ok( $_ eq 'foo', ":$_: eq :foo:" ); |
| 102 | |
| 103 | $_ = "x"; |
| 104 | s/x/\$x $x/; |
| 105 | ok( $_ eq '$x foo', ":$_: eq :\$x foo:" ); |
| 106 | |
| 107 | $b = 'cd'; |
| 108 | ($a = 'abcdef') =~ s<(b${b}e)>'\n$1'; |
| 109 | ok( $1 eq 'bcde' && $a eq 'a\n$1f', ":$1: eq :bcde: ; :$a: eq :a\\n\$1f:" ); |
| 110 | |
| 111 | $a = 'abacada'; |
| 112 | ok( ($a =~ s/a/x/g) == 4 && $a eq 'xbxcxdx' ); |
| 113 | |
| 114 | ok( ($a =~ s/a/y/g) == 0 && $a eq 'xbxcxdx' ); |
| 115 | |
| 116 | ok( ($a =~ s/b/y/g) == 1 && $a eq 'xyxcxdx' ); |
| 117 | |
| 118 | $_ = 'ABACADA'; |
| 119 | ok( /a/i && s///gi && $_ eq 'BCD' ); |
| 120 | |
| 121 | $_ = '\\' x 4; |
| 122 | ok( length($_) == 4 ); |
| 123 | $snum = s/\\/\\\\/g; |
| 124 | ok( $_ eq '\\' x 8 && $snum == 4 ); |
| 125 | |
| 126 | $_ = '\/' x 4; |
| 127 | ok( length($_) == 8 ); |
| 128 | $snum = s/\//\/\//g; |
| 129 | ok( $_ eq '\\//' x 4 && $snum == 4 ); |
| 130 | ok( length($_) == 12 ); |
| 131 | |
| 132 | $_ = 'aaaXXXXbbb'; |
| 133 | s/^a//; |
| 134 | ok( $_ eq 'aaXXXXbbb' ); |
| 135 | |
| 136 | $_ = 'aaaXXXXbbb'; |
| 137 | s/a//; |
| 138 | ok( $_ eq 'aaXXXXbbb' ); |
| 139 | |
| 140 | $_ = 'aaaXXXXbbb'; |
| 141 | s/^a/b/; |
| 142 | ok( $_ eq 'baaXXXXbbb' ); |
| 143 | |
| 144 | $_ = 'aaaXXXXbbb'; |
| 145 | s/a/b/; |
| 146 | ok( $_ eq 'baaXXXXbbb' ); |
| 147 | |
| 148 | $_ = 'aaaXXXXbbb'; |
| 149 | s/aa//; |
| 150 | ok( $_ eq 'aXXXXbbb' ); |
| 151 | |
| 152 | $_ = 'aaaXXXXbbb'; |
| 153 | s/aa/b/; |
| 154 | ok( $_ eq 'baXXXXbbb' ); |
| 155 | |
| 156 | $_ = 'aaaXXXXbbb'; |
| 157 | s/b$//; |
| 158 | ok( $_ eq 'aaaXXXXbb' ); |
| 159 | |
| 160 | $_ = 'aaaXXXXbbb'; |
| 161 | s/b//; |
| 162 | ok( $_ eq 'aaaXXXXbb' ); |
| 163 | |
| 164 | $_ = 'aaaXXXXbbb'; |
| 165 | s/bb//; |
| 166 | ok( $_ eq 'aaaXXXXb' ); |
| 167 | |
| 168 | $_ = 'aaaXXXXbbb'; |
| 169 | s/aX/y/; |
| 170 | ok( $_ eq 'aayXXXbbb' ); |
| 171 | |
| 172 | $_ = 'aaaXXXXbbb'; |
| 173 | s/Xb/z/; |
| 174 | ok( $_ eq 'aaaXXXzbb' ); |
| 175 | |
| 176 | $_ = 'aaaXXXXbbb'; |
| 177 | s/aaX.*Xbb//; |
| 178 | ok( $_ eq 'ab' ); |
| 179 | |
| 180 | $_ = 'aaaXXXXbbb'; |
| 181 | s/bb/x/; |
| 182 | ok( $_ eq 'aaaXXXXxb' ); |
| 183 | |
| 184 | # now for some unoptimized versions of the same. |
| 185 | |
| 186 | $_ = 'aaaXXXXbbb'; |
| 187 | $x ne $x || s/^a//; |
| 188 | ok( $_ eq 'aaXXXXbbb' ); |
| 189 | |
| 190 | $_ = 'aaaXXXXbbb'; |
| 191 | $x ne $x || s/a//; |
| 192 | ok( $_ eq 'aaXXXXbbb' ); |
| 193 | |
| 194 | $_ = 'aaaXXXXbbb'; |
| 195 | $x ne $x || s/^a/b/; |
| 196 | ok( $_ eq 'baaXXXXbbb' ); |
| 197 | |
| 198 | $_ = 'aaaXXXXbbb'; |
| 199 | $x ne $x || s/a/b/; |
| 200 | ok( $_ eq 'baaXXXXbbb' ); |
| 201 | |
| 202 | $_ = 'aaaXXXXbbb'; |
| 203 | $x ne $x || s/aa//; |
| 204 | ok( $_ eq 'aXXXXbbb' ); |
| 205 | |
| 206 | $_ = 'aaaXXXXbbb'; |
| 207 | $x ne $x || s/aa/b/; |
| 208 | ok( $_ eq 'baXXXXbbb' ); |
| 209 | |
| 210 | $_ = 'aaaXXXXbbb'; |
| 211 | $x ne $x || s/b$//; |
| 212 | ok( $_ eq 'aaaXXXXbb' ); |
| 213 | |
| 214 | $_ = 'aaaXXXXbbb'; |
| 215 | $x ne $x || s/b//; |
| 216 | ok( $_ eq 'aaaXXXXbb' ); |
| 217 | |
| 218 | $_ = 'aaaXXXXbbb'; |
| 219 | $x ne $x || s/bb//; |
| 220 | ok( $_ eq 'aaaXXXXb' ); |
| 221 | |
| 222 | $_ = 'aaaXXXXbbb'; |
| 223 | $x ne $x || s/aX/y/; |
| 224 | ok( $_ eq 'aayXXXbbb' ); |
| 225 | |
| 226 | $_ = 'aaaXXXXbbb'; |
| 227 | $x ne $x || s/Xb/z/; |
| 228 | ok( $_ eq 'aaaXXXzbb' ); |
| 229 | |
| 230 | $_ = 'aaaXXXXbbb'; |
| 231 | $x ne $x || s/aaX.*Xbb//; |
| 232 | ok( $_ eq 'ab' ); |
| 233 | |
| 234 | $_ = 'aaaXXXXbbb'; |
| 235 | $x ne $x || s/bb/x/; |
| 236 | ok( $_ eq 'aaaXXXXxb' ); |
| 237 | |
| 238 | $_ = 'abc123xyz'; |
| 239 | s/(\d+)/$1*2/e; # yields 'abc246xyz' |
| 240 | ok( $_ eq 'abc246xyz' ); |
| 241 | s/(\d+)/sprintf("%5d",$1)/e; # yields 'abc 246xyz' |
| 242 | ok( $_ eq 'abc 246xyz' ); |
| 243 | s/(\w)/$1 x 2/eg; # yields 'aabbcc 224466xxyyzz' |
| 244 | ok( $_ eq 'aabbcc 224466xxyyzz' ); |
| 245 | |
| 246 | $_ = "aaaaa"; |
| 247 | ok( y/a/b/ == 5 ); |
| 248 | ok( y/a/b/ == 0 ); |
| 249 | ok( y/b// == 5 ); |
| 250 | ok( y/b/c/s == 5 ); |
| 251 | ok( y/c// == 1 ); |
| 252 | ok( y/c//d == 1 ); |
| 253 | ok( $_ eq "" ); |
| 254 | |
| 255 | $_ = "Now is the %#*! time for all good men..."; |
| 256 | ok( ($x=(y/a-zA-Z //cd)) == 7 ); |
| 257 | ok( y/ / /s == 8 ); |
| 258 | |
| 259 | $_ = 'abcdefghijklmnopqrstuvwxyz0123456789'; |
| 260 | tr/a-z/A-Z/; |
| 261 | |
| 262 | ok( $_ eq 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' ); |
| 263 | |
| 264 | # same as tr/A-Z/a-z/; |
| 265 | if (defined $Config{ebcdic} && $Config{ebcdic} eq 'define') { # EBCDIC. |
| 266 | no utf8; |
| 267 | y[\301-\351][\201-\251]; |
| 268 | } else { # Ye Olde ASCII. Or something like it. |
| 269 | y[\101-\132][\141-\172]; |
| 270 | } |
| 271 | |
| 272 | ok( $_ eq 'abcdefghijklmnopqrstuvwxyz0123456789' ); |
| 273 | |
| 274 | SKIP: { |
| 275 | skip("ASCII-centric test",1) unless (ord("+") == ord(",") - 1 |
| 276 | && ord(",") == ord("-") - 1 |
| 277 | && ord("a") == ord("b") - 1 |
| 278 | && ord("b") == ord("c") - 1); |
| 279 | $_ = '+,-'; |
| 280 | tr/+--/a-c/; |
| 281 | ok( $_ eq 'abc' ); |
| 282 | } |
| 283 | |
| 284 | $_ = '+,-'; |
| 285 | tr/+\--/a\/c/; |
| 286 | ok( $_ eq 'a,/' ); |
| 287 | |
| 288 | $_ = '+,-'; |
| 289 | tr/-+,/ab\-/; |
| 290 | ok( $_ eq 'b-a' ); |
| 291 | |
| 292 | |
| 293 | # test recursive substitutions |
| 294 | # code based on the recursive expansion of makefile variables |
| 295 | |
| 296 | my %MK = ( |
| 297 | AAAAA => '$(B)', B=>'$(C)', C => 'D', # long->short |
| 298 | E => '$(F)', F=>'p $(G) q', G => 'HHHHH', # short->long |
| 299 | DIR => '$(UNDEFINEDNAME)/xxx', |
| 300 | ); |
| 301 | sub var { |
| 302 | my($var,$level) = @_; |
| 303 | return "\$($var)" unless exists $MK{$var}; |
| 304 | return exp_vars($MK{$var}, $level+1); # can recurse |
| 305 | } |
| 306 | sub exp_vars { |
| 307 | my($str,$level) = @_; |
| 308 | $str =~ s/\$\((\w+)\)/var($1, $level+1)/ge; # can recurse |
| 309 | #warn "exp_vars $level = '$str'\n"; |
| 310 | $str; |
| 311 | } |
| 312 | |
| 313 | ok( exp_vars('$(AAAAA)',0) eq 'D' ); |
| 314 | ok( exp_vars('$(E)',0) eq 'p HHHHH q' ); |
| 315 | ok( exp_vars('$(DIR)',0) eq '$(UNDEFINEDNAME)/xxx' ); |
| 316 | ok( exp_vars('foo $(DIR)/yyy bar',0) eq 'foo $(UNDEFINEDNAME)/xxx/yyy bar' ); |
| 317 | |
| 318 | $_ = "abcd"; |
| 319 | s/(..)/$x = $1, m#.#/eg; |
| 320 | ok( $x eq "cd", 'a match nested in the RHS of a substitution' ); |
| 321 | |
| 322 | # Subst and lookbehind |
| 323 | |
| 324 | $_="ccccc"; |
| 325 | $snum = s/(?<!x)c/x/g; |
| 326 | ok( $_ eq "xxxxx" && $snum == 5 ); |
| 327 | |
| 328 | $_="ccccc"; |
| 329 | $snum = s/(?<!x)(c)/x/g; |
| 330 | ok( $_ eq "xxxxx" && $snum == 5 ); |
| 331 | |
| 332 | $_="foobbarfoobbar"; |
| 333 | $snum = s/(?<!r)foobbar/foobar/g; |
| 334 | ok( $_ eq "foobarfoobbar" && $snum == 1 ); |
| 335 | |
| 336 | $_="foobbarfoobbar"; |
| 337 | $snum = s/(?<!ar)(foobbar)/foobar/g; |
| 338 | ok( $_ eq "foobarfoobbar" && $snum == 1 ); |
| 339 | |
| 340 | $_="foobbarfoobbar"; |
| 341 | $snum = s/(?<!ar)foobbar/foobar/g; |
| 342 | ok( $_ eq "foobarfoobbar" && $snum == 1 ); |
| 343 | |
| 344 | eval 's{foo} # this is a comment, not a delimiter |
| 345 | {bar};'; |
| 346 | ok( ! @?, 'parsing of split subst with comment' ); |
| 347 | |
| 348 | $snum = eval '$_="exactly"; s sxsys;m 3(yactl)3;$1'; |
| 349 | is( $snum, 'yactl', 'alpha delimiters are allowed' ); |
| 350 | |
| 351 | $_="baacbaa"; |
| 352 | $snum = tr/a/b/s; |
| 353 | ok( $_ eq "bbcbb" && $snum == 4, |
| 354 | 'check if squashing works at the end of string' ); |
| 355 | |
| 356 | $_ = "ab"; |
| 357 | ok( s/a/b/ == 1 ); |
| 358 | |
| 359 | $_ = <<'EOL'; |
| 360 | $url = new URI::URL "http://www/"; die if $url eq "xXx"; |
| 361 | EOL |
| 362 | $^R = 'junk'; |
| 363 | |
| 364 | $foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' . |
| 365 | ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' . |
| 366 | ' lowercase $@%#MiXeD$@%# '; |
| 367 | |
| 368 | $snum = |
| 369 | s{ \d+ \b [,.;]? (?{ 'digits' }) |
| 370 | | |
| 371 | [a-z]+ \b [,.;]? (?{ 'lowercase' }) |
| 372 | | |
| 373 | [A-Z]+ \b [,.;]? (?{ 'UPPERCASE' }) |
| 374 | | |
| 375 | [A-Z] [a-z]+ \b [,.;]? (?{ 'Capitalized' }) |
| 376 | | |
| 377 | [A-Za-z]+ \b [,.;]? (?{ 'MiXeD' }) |
| 378 | | |
| 379 | [A-Za-z0-9]+ \b [,.;]? (?{ 'alphanumeric' }) |
| 380 | | |
| 381 | \s+ (?{ ' ' }) |
| 382 | | |
| 383 | [^A-Za-z0-9\s]+ (?{ '$@%#' }) |
| 384 | }{$^R}xg; |
| 385 | ok( $_ eq $foo ); |
| 386 | ok( $snum == 31 ); |
| 387 | |
| 388 | $_ = 'a' x 6; |
| 389 | $snum = s/a(?{})//g; |
| 390 | ok( $_ eq '' && $snum == 6 ); |
| 391 | |
| 392 | $_ = 'x' x 20; |
| 393 | $snum = s/(\d*|x)/<$1>/g; |
| 394 | $foo = '<>' . ('<x><>' x 20) ; |
| 395 | ok( $_ eq $foo && $snum == 41 ); |
| 396 | |
| 397 | $t = 'aaaaaaaaa'; |
| 398 | |
| 399 | $_ = $t; |
| 400 | pos = 6; |
| 401 | $snum = s/\Ga/xx/g; |
| 402 | ok( $_ eq 'aaaaaaxxxxxx' && $snum == 3 ); |
| 403 | |
| 404 | $_ = $t; |
| 405 | pos = 6; |
| 406 | $snum = s/\Ga/x/g; |
| 407 | ok( $_ eq 'aaaaaaxxx' && $snum == 3 ); |
| 408 | |
| 409 | $_ = $t; |
| 410 | pos = 6; |
| 411 | s/\Ga/xx/; |
| 412 | ok( $_ eq 'aaaaaaxxaa' ); |
| 413 | |
| 414 | $_ = $t; |
| 415 | pos = 6; |
| 416 | s/\Ga/x/; |
| 417 | ok( $_ eq 'aaaaaaxaa' ); |
| 418 | |
| 419 | $_ = $t; |
| 420 | $snum = s/\Ga/xx/g; |
| 421 | ok( $_ eq 'xxxxxxxxxxxxxxxxxx' && $snum == 9 ); |
| 422 | |
| 423 | $_ = $t; |
| 424 | $snum = s/\Ga/x/g; |
| 425 | ok( $_ eq 'xxxxxxxxx' && $snum == 9 ); |
| 426 | |
| 427 | $_ = $t; |
| 428 | s/\Ga/xx/; |
| 429 | ok( $_ eq 'xxaaaaaaaa' ); |
| 430 | |
| 431 | $_ = $t; |
| 432 | s/\Ga/x/; |
| 433 | ok( $_ eq 'xaaaaaaaa' ); |
| 434 | |
| 435 | $_ = 'aaaa'; |
| 436 | $snum = s/\ba/./g; |
| 437 | ok( $_ eq '.aaa' && $snum == 1 ); |
| 438 | |
| 439 | eval q% s/a/"b"}/e %; |
| 440 | ok( $@ =~ /Bad evalled substitution/ ); |
| 441 | eval q% ($_ = "x") =~ s/(.)/"$1 "/e %; |
| 442 | ok( $_ eq "x " and !length $@ ); |
| 443 | $x = $x = 'interp'; |
| 444 | eval q% ($_ = "x") =~ s/x(($x)*)/"$1"/e %; |
| 445 | ok( $_ eq '' and !length $@ ); |
| 446 | |
| 447 | $_ = "C:/"; |
| 448 | ok( !s/^([a-z]:)/\u$1/ ); |
| 449 | |
| 450 | $_ = "Charles Bronson"; |
| 451 | $snum = s/\B\w//g; |
| 452 | ok( $_ eq "C B" && $snum == 12 ); |
| 453 | |
| 454 | { |
| 455 | use utf8; |
| 456 | my $s = "H\303\266he"; |
| 457 | my $l = my $r = $s; |
| 458 | $l =~ s/[^\w]//g; |
| 459 | $r =~ s/[^\w\.]//g; |
| 460 | is($l, $r, "use utf8 \\w"); |
| 461 | } |
| 462 | |
| 463 | my $pv1 = my $pv2 = "Andreas J. K\303\266nig"; |
| 464 | $pv1 =~ s/A/\x{100}/; |
| 465 | substr($pv2,0,1) = "\x{100}"; |
| 466 | is($pv1, $pv2); |
| 467 | |
| 468 | { |
| 469 | { |
| 470 | # Gregor Chrupala <gregor.chrupala@star-group.net> |
| 471 | use utf8; |
| 472 | $a = 'España'; |
| 473 | $a =~ s/ñ/ñ/; |
| 474 | like($a, qr/ñ/, "use utf8 RHS"); |
| 475 | } |
| 476 | |
| 477 | { |
| 478 | use utf8; |
| 479 | $a = 'España España'; |
| 480 | $a =~ s/ñ/ñ/; |
| 481 | like($a, qr/ñ/, "use utf8 LHS"); |
| 482 | } |
| 483 | |
| 484 | { |
| 485 | use utf8; |
| 486 | $a = 'España'; |
| 487 | $a =~ s/ñ/ñ/; |
| 488 | like($a, qr/ñ/, "use utf8 LHS and RHS"); |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | { |
| 493 | # SADAHIRO Tomoyuki <bqw10602@nifty.com> |
| 494 | |
| 495 | $a = "\x{100}\x{101}"; |
| 496 | $a =~ s/\x{101}/\xFF/; |
| 497 | like($a, qr/\xFF/); |
| 498 | is(length($a), 2, "SADAHIRO utf8 s///"); |
| 499 | |
| 500 | $a = "\x{100}\x{101}"; |
| 501 | $a =~ s/\x{101}/"\xFF"/e; |
| 502 | like($a, qr/\xFF/); |
| 503 | is(length($a), 2); |
| 504 | |
| 505 | $a = "\x{100}\x{101}"; |
| 506 | $a =~ s/\x{101}/\xFF\xFF\xFF/; |
| 507 | like($a, qr/\xFF\xFF\xFF/); |
| 508 | is(length($a), 4); |
| 509 | |
| 510 | $a = "\x{100}\x{101}"; |
| 511 | $a =~ s/\x{101}/"\xFF\xFF\xFF"/e; |
| 512 | like($a, qr/\xFF\xFF\xFF/); |
| 513 | is(length($a), 4); |
| 514 | |
| 515 | $a = "\xFF\x{101}"; |
| 516 | $a =~ s/\xFF/\x{100}/; |
| 517 | like($a, qr/\x{100}/); |
| 518 | is(length($a), 2); |
| 519 | |
| 520 | $a = "\xFF\x{101}"; |
| 521 | $a =~ s/\xFF/"\x{100}"/e; |
| 522 | like($a, qr/\x{100}/); |
| 523 | is(length($a), 2); |
| 524 | |
| 525 | $a = "\xFF"; |
| 526 | $a =~ s/\xFF/\x{100}/; |
| 527 | like($a, qr/\x{100}/); |
| 528 | is(length($a), 1); |
| 529 | |
| 530 | $a = "\xFF"; |
| 531 | $a =~ s/\xFF/"\x{100}"/e; |
| 532 | like($a, qr/\x{100}/); |
| 533 | is(length($a), 1); |
| 534 | } |
| 535 | |
| 536 | { |
| 537 | # subst with mixed utf8/non-utf8 type |
| 538 | my($ua, $ub, $uc, $ud) = ("\x{101}", "\x{102}", "\x{103}", "\x{104}"); |
| 539 | my($na, $nb) = ("\x{ff}", "\x{fe}"); |
| 540 | my $a = "$ua--$ub"; |
| 541 | my $b; |
| 542 | ($b = $a) =~ s/--/$na/; |
| 543 | is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8"); |
| 544 | ($b = $a) =~ s/--/--$na--/; |
| 545 | is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8"); |
| 546 | ($b = $a) =~ s/--/$uc/; |
| 547 | is($b, "$ua$uc$ub", "s///: replace utf8 into utf8"); |
| 548 | ($b = $a) =~ s/--/--$uc--/; |
| 549 | is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8"); |
| 550 | $a = "$na--$nb"; |
| 551 | ($b = $a) =~ s/--/$ua/; |
| 552 | is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8"); |
| 553 | ($b = $a) =~ s/--/--$ua--/; |
| 554 | is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8"); |
| 555 | |
| 556 | # now with utf8 pattern |
| 557 | $a = "$ua--$ub"; |
| 558 | ($b = $a) =~ s/-($ud)?-/$na/; |
| 559 | is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8 (utf8 pattern)"); |
| 560 | ($b = $a) =~ s/-($ud)?-/--$na--/; |
| 561 | is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8 (utf8 pattern)"); |
| 562 | ($b = $a) =~ s/-($ud)?-/$uc/; |
| 563 | is($b, "$ua$uc$ub", "s///: replace utf8 into utf8 (utf8 pattern)"); |
| 564 | ($b = $a) =~ s/-($ud)?-/--$uc--/; |
| 565 | is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8 (utf8 pattern)"); |
| 566 | $a = "$na--$nb"; |
| 567 | ($b = $a) =~ s/-($ud)?-/$ua/; |
| 568 | is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8 (utf8 pattern)"); |
| 569 | ($b = $a) =~ s/-($ud)?-/--$ua--/; |
| 570 | is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8 (utf8 pattern)"); |
| 571 | ($b = $a) =~ s/-($ud)?-/$na/; |
| 572 | is($b, "$na$na$nb", "s///: replace non-utf8 into non-utf8 (utf8 pattern)"); |
| 573 | ($b = $a) =~ s/-($ud)?-/--$na--/; |
| 574 | is($b, "$na--$na--$nb", "s///: replace long non-utf8 into non-utf8 (utf8 pattern)"); |
| 575 | } |
| 576 | |
| 577 | $_ = 'aaaa'; |
| 578 | $r = 'x'; |
| 579 | $s = s/a(?{})/$r/g; |
| 580 | is("<$_> <$s>", "<xxxx> <4>", "[perl #7806]"); |
| 581 | |
| 582 | $_ = 'aaaa'; |
| 583 | $s = s/a(?{})//g; |
| 584 | is("<$_> <$s>", "<> <4>", "[perl #7806]"); |
| 585 | |
| 586 | # [perl #19048] Coredump in silly replacement |
| 587 | { |
| 588 | local $^W = 0; |
| 589 | $_="abcdef\n"; |
| 590 | s!.!!eg; |
| 591 | is($_, "\n", "[perl #19048]"); |
| 592 | } |
| 593 | |
| 594 | # [perl #17757] interaction between saw_ampersand and study |
| 595 | { |
| 596 | my $f = eval q{ $& }; |
| 597 | $f = "xx"; |
| 598 | study $f; |
| 599 | $f =~ s/x/y/g; |
| 600 | is($f, "yy", "[perl #17757]"); |
| 601 | } |
| 602 | |
| 603 | # [perl #20684] returned a zero count |
| 604 | $_ = "1111"; |
| 605 | is(s/(??{1})/2/eg, 4, '#20684 s/// with (??{..}) inside'); |
| 606 | |
| 607 | # [perl #20682] @- not visible in replacement |
| 608 | $_ = "123"; |
| 609 | /(2)/; # seed @- with something else |
| 610 | s/(1)(2)(3)/$#- (@-)/; |
| 611 | is($_, "3 (0 0 1 2)", '#20682 @- not visible in replacement'); |
| 612 | |
| 613 | # [perl #20682] $^N not visible in replacement |
| 614 | $_ = "abc"; |
| 615 | /(a)/; s/(b)|(c)/-$^N/g; |
| 616 | is($_,'a-b-c','#20682 $^N not visible in replacement'); |
| 617 | |
| 618 | # [perl #22351] perl bug with 'e' substitution modifier |
| 619 | my $name = "chris"; |
| 620 | { |
| 621 | no warnings 'uninitialized'; |
| 622 | $name =~ s/hr//e; |
| 623 | } |
| 624 | is($name, "cis", q[#22351 bug with 'e' substitution modifier]); |
| 625 | |
| 626 | |
| 627 | # [perl #34171] $1 didn't honour 'use bytes' in s//e |
| 628 | { |
| 629 | my $s="\x{100}"; |
| 630 | my $x; |
| 631 | { |
| 632 | use bytes; |
| 633 | $s=~ s/(..)/$x=$1/e |
| 634 | } |
| 635 | is(length($x), 2, '[perl #34171]'); |
| 636 | } |
| 637 | |
| 638 | |
| 639 | { # [perl #27940] perlbug: [\x00-\x1f] works, [\c@-\c_] does not |
| 640 | my $c; |
| 641 | |
| 642 | ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ s/[\c@-\c_]//g; |
| 643 | is($c, "\x20\x30\x40\x50\x60", "s/[\\c\@-\\c_]//g"); |
| 644 | |
| 645 | ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ s/[\x00-\x1f]//g; |
| 646 | is($c, "\x20\x30\x40\x50\x60", "s/[\\x00-\\x1f]//g"); |
| 647 | } |
| 648 | { |
| 649 | $_ = "xy"; |
| 650 | no warnings 'uninitialized'; |
| 651 | /(((((((((x)))))))))(z)/; # clear $10 |
| 652 | s/(((((((((x)))))))))(y)/${10}/; |
| 653 | is($_,"y","RT#6006: \$_ eq '$_'"); |
| 654 | $_ = "xr"; |
| 655 | s/(((((((((x)))))))))(r)/fooba${10}/; |
| 656 | is($_,"foobar","RT#6006: \$_ eq '$_'"); |
| 657 | } |
| 658 | { |
| 659 | my $want=("\n" x 11).("B\n" x 11)."B"; |
| 660 | $_="B"; |
| 661 | our $i; |
| 662 | for $i(1..11){ |
| 663 | s/^.*$/$&/gm; |
| 664 | $_="\n$_\n$&"; |
| 665 | } |
| 666 | is($want,$_,"RT#17542"); |
| 667 | } |
| 668 | |
| 669 | { |
| 670 | my @tests = ('ABC', "\xA3\xA4\xA5", "\x{410}\x{411}\x{412}"); |
| 671 | foreach (@tests) { |
| 672 | my $id = ord $_; |
| 673 | s/./pos/ge; |
| 674 | is($_, "012", "RT#52104: $id"); |
| 675 | } |
| 676 | } |
| 677 | |
| 678 | fresh_perl_is( '$_=q(foo);s/(.)\G//g;print' => 'foo', {}, |
| 679 | '[perl #69056] positive GPOS regex segfault' ); |
| 680 | fresh_perl_is( '$_="abcdef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'aXXdef', {}, |
| 681 | 'positive GPOS regex substitution failure (#69056, #114884)' ); |
| 682 | fresh_perl_is( '$_="abcdefg123456"; s/(?<=...\G)?(\d)/($1)/; print' => 'abcdefg(1)23456', {}, |
| 683 | 'positive GPOS lookbehind regex substitution failure #114884' ); |
| 684 | |
| 685 | # s/..\G//g should stop after the first iteration, rather than working its |
| 686 | # way backwards, or looping infinitely, or SEGVing (for example) |
| 687 | { |
| 688 | my ($s, $count); |
| 689 | |
| 690 | # use a function to disable constant folding |
| 691 | my $f = sub { substr("789", 0, $_[0]) }; |
| 692 | |
| 693 | $s = '123456'; |
| 694 | pos($s) = 4; |
| 695 | $count = $s =~ s/\d\d\G/7/g; |
| 696 | is($count, 1, "..\\G count (short)"); |
| 697 | is($s, "12756", "..\\G s (short)"); |
| 698 | |
| 699 | $s = '123456'; |
| 700 | pos($s) = 4; |
| 701 | $count = $s =~ s/\d\d\G/78/g; |
| 702 | is($count, 1, "..\\G count (equal)"); |
| 703 | is($s, "127856", "..\\G s (equal)"); |
| 704 | |
| 705 | $s = '123456'; |
| 706 | pos($s) = 4; |
| 707 | $count = $s =~ s/\d\d\G/789/g; |
| 708 | is($count, 1, "..\\G count (long)"); |
| 709 | is($s, "1278956", "..\\G s (long)"); |
| 710 | |
| 711 | |
| 712 | $s = '123456'; |
| 713 | pos($s) = 4; |
| 714 | $count = $s =~ s/\d\d\G/$f->(1)/eg; |
| 715 | is($count, 1, "..\\G count (short code)"); |
| 716 | is($s, "12756", "..\\G s (short code)"); |
| 717 | |
| 718 | $s = '123456'; |
| 719 | pos($s) = 4; |
| 720 | $count = $s =~ s/\d\d\G/$f->(2)/eg; |
| 721 | is($count, 1, "..\\G count (equal code)"); |
| 722 | is($s, "127856", "..\\G s (equal code)"); |
| 723 | |
| 724 | $s = '123456'; |
| 725 | pos($s) = 4; |
| 726 | $count = $s =~ s/\d\d\G/$f->(3)/eg; |
| 727 | is($count, 1, "..\\G count (long code)"); |
| 728 | is($s, "1278956", "..\\G s (long code)"); |
| 729 | |
| 730 | $s = '123456'; |
| 731 | pos($s) = 4; |
| 732 | $count = $s =~ s/\d\d(?=\d\G)/7/g; |
| 733 | is($count, 1, "..\\G count (lookahead short)"); |
| 734 | is($s, "17456", "..\\G s (lookahead short)"); |
| 735 | |
| 736 | $s = '123456'; |
| 737 | pos($s) = 4; |
| 738 | $count = $s =~ s/\d\d(?=\d\G)/78/g; |
| 739 | is($count, 1, "..\\G count (lookahead equal)"); |
| 740 | is($s, "178456", "..\\G s (lookahead equal)"); |
| 741 | |
| 742 | $s = '123456'; |
| 743 | pos($s) = 4; |
| 744 | $count = $s =~ s/\d\d(?=\d\G)/789/g; |
| 745 | is($count, 1, "..\\G count (lookahead long)"); |
| 746 | is($s, "1789456", "..\\G s (lookahead long)"); |
| 747 | |
| 748 | |
| 749 | $s = '123456'; |
| 750 | pos($s) = 4; |
| 751 | $count = $s =~ s/\d\d(?=\d\G)/$f->(1)/eg; |
| 752 | is($count, 1, "..\\G count (lookahead short code)"); |
| 753 | is($s, "17456", "..\\G s (lookahead short code)"); |
| 754 | |
| 755 | $s = '123456'; |
| 756 | pos($s) = 4; |
| 757 | $count = $s =~ s/\d\d(?=\d\G)/$f->(2)/eg; |
| 758 | is($count, 1, "..\\G count (lookahead equal code)"); |
| 759 | is($s, "178456", "..\\G s (lookahead equal code)"); |
| 760 | |
| 761 | $s = '123456'; |
| 762 | pos($s) = 4; |
| 763 | $count = $s =~ s/\d\d(?=\d\G)/$f->(3)/eg; |
| 764 | is($count, 1, "..\\G count (lookahead long code)"); |
| 765 | is($s, "1789456", "..\\G s (lookahead long code)"); |
| 766 | } |
| 767 | |
| 768 | |
| 769 | # [perl #71470] $var =~ s/$qr//e calling get-magic on $_ as well as $var |
| 770 | { |
| 771 | local *_; |
| 772 | my $scratch; |
| 773 | sub qrBug::TIESCALAR { bless[pop], 'qrBug' } |
| 774 | sub qrBug::FETCH { $scratch .= "[fetching $_[0][0]]"; 'prew' } |
| 775 | sub qrBug::STORE{} |
| 776 | tie my $kror, qrBug => '$kror'; |
| 777 | tie $_, qrBug => '$_'; |
| 778 | my $qr = qr/(?:)/; |
| 779 | $kror =~ s/$qr/""/e; |
| 780 | is( |
| 781 | $scratch, '[fetching $kror]', |
| 782 | 'bug: $var =~ s/$qr//e calling get-magic on $_ as well as $var', |
| 783 | ); |
| 784 | } |
| 785 | |
| 786 | { # Bug #41530; replacing non-utf8 with a utf8 causes problems |
| 787 | my $string = "a\x{a0}a"; |
| 788 | my $sub_string = $string; |
| 789 | ok(! utf8::is_utf8($sub_string), "Verify that string isn't initially utf8"); |
| 790 | $sub_string =~ s/a/\x{100}/g; |
| 791 | ok(utf8::is_utf8($sub_string), |
| 792 | 'Verify replace of non-utf8 with utf8 upgrades to utf8'); |
| 793 | is($sub_string, "\x{100}\x{A0}\x{100}", |
| 794 | 'Verify #41530 fixed: replace of non-utf8 with utf8'); |
| 795 | |
| 796 | my $non_sub_string = $string; |
| 797 | ok(! utf8::is_utf8($non_sub_string), |
| 798 | "Verify that string isn't initially utf8"); |
| 799 | $non_sub_string =~ s/b/\x{100}/g; |
| 800 | ok(! utf8::is_utf8($non_sub_string), |
| 801 | "Verify that failed substitute doesn't change string's utf8ness"); |
| 802 | is($non_sub_string, $string, |
| 803 | "Verify that failed substitute doesn't change string"); |
| 804 | } |
| 805 | |
| 806 | { # Verify largish octal in replacement pattern |
| 807 | |
| 808 | my $string = "a"; |
| 809 | $string =~ s/a/\400/; |
| 810 | is($string, chr 0x100, "Verify that handles s/foo/\\400/"); |
| 811 | $string =~ s/./\600/; |
| 812 | is($string, chr 0x180, "Verify that handles s/foo/\\600/"); |
| 813 | $string =~ s/./\777/; |
| 814 | is($string, chr 0x1FF, "Verify that handles s/foo/\\777/"); |
| 815 | } |
| 816 | |
| 817 | # Scoping of s//the RHS/ when there is no /e |
| 818 | # Tests based on [perl #19078] |
| 819 | { |
| 820 | local *_; |
| 821 | my $output = ''; my %a; |
| 822 | no warnings 'uninitialized'; |
| 823 | |
| 824 | $_="CCCGGG"; |
| 825 | s!.!<@a{$output .= ("$&"),/[$&]/g}>!g; |
| 826 | $output .= $_; |
| 827 | is( |
| 828 | $output, "CCCGGG< >< >< >< >< >< >", |
| 829 | 's/// sets PL_curpm for each iteration even when the RHS has set it' |
| 830 | ); |
| 831 | |
| 832 | s/C/$a{m\G\}/; |
| 833 | is( |
| 834 | "$&", G => |
| 835 | 'Match vars reflect the last match after s/pat/$a{m|pat|}/ without /e' |
| 836 | ); |
| 837 | } |
| 838 | |
| 839 | { |
| 840 | # a tied scalar that returned a plain string, got messed up |
| 841 | # when substituted with a UTF8 replacement string, due to |
| 842 | # magic getting called multiple times, and pointers now pointing |
| 843 | # to stale/freed strings |
| 844 | # The original fix for this caused infinite loops for non- or cow- |
| 845 | # strings, so we test those, too. |
| 846 | package FOO; |
| 847 | my $fc; |
| 848 | sub TIESCALAR { bless [ "abcdefgh" ] } |
| 849 | sub FETCH { $fc++; $_[0][0] } |
| 850 | sub STORE { $_[0][0] = $_[1] } |
| 851 | |
| 852 | my $s; |
| 853 | tie $s, 'FOO'; |
| 854 | $s =~ s/..../\x{101}/; |
| 855 | ::is($fc, 1, "tied UTF8 stuff FETCH count"); |
| 856 | ::is("$s", "\x{101}efgh", "tied UTF8 stuff"); |
| 857 | |
| 858 | ::watchdog(300); |
| 859 | $fc = 0; |
| 860 | $s = *foo; |
| 861 | $s =~ s/..../\x{101}/; |
| 862 | ::is($fc, 1, '$tied_glob =~ s/non-utf8/utf8/ fetch count'); |
| 863 | ::is("$s", "\x{101}::foo", '$tied_glob =~ s/non-utf8/utf8/ result'); |
| 864 | $fc = 0; |
| 865 | $s = *foo; |
| 866 | $s =~ s/(....)/\x{101}/g; |
| 867 | ::is($fc, 1, '$tied_glob =~ s/(non-utf8)/utf8/g fetch count'); |
| 868 | ::is("$s", "\x{101}\x{101}o", |
| 869 | '$tied_glob =~ s/(non-utf8)/utf8/g result'); |
| 870 | $fc = 0; |
| 871 | $s = "\xff\xff\xff\xff\xff"; |
| 872 | $s =~ s/..../\x{101}/; |
| 873 | ::is($fc, 1, '$tied_latin1 =~ s/non-utf8/utf8/ fetch count'); |
| 874 | ::is("$s", "\x{101}\xff", '$tied_latin1 =~ s/non-utf8/utf8/ result'); |
| 875 | $fc = 0; |
| 876 | { package package_name; tied($s)->[0] = __PACKAGE__ }; |
| 877 | $s =~ s/..../\x{101}/; |
| 878 | ::is($fc, 1, '$tied_cow =~ s/non-utf8/utf8/ fetch count'); |
| 879 | ::is("$s", "\x{101}age_name", '$tied_cow =~ s/non-utf8/utf8/ result'); |
| 880 | $fc = 0; |
| 881 | $s = \1; |
| 882 | $s =~ s/..../\x{101}/; |
| 883 | ::is($fc, 1, '$tied_ref =~ s/non-utf8/utf8/ fetch count'); |
| 884 | ::like("$s", qr/^\x{101}AR\(0x.*\)\z/, |
| 885 | '$tied_ref =~ s/non-utf8/utf8/ result'); |
| 886 | } |
| 887 | |
| 888 | # RT #97954 |
| 889 | { |
| 890 | my $count; |
| 891 | |
| 892 | sub bam::DESTROY { |
| 893 | --$count; |
| 894 | } |
| 895 | |
| 896 | my $z_zapp = bless [], 'bam'; |
| 897 | ++$count; |
| 898 | |
| 899 | is($count, 1, '1 object'); |
| 900 | is($z_zapp =~ s/.*/R/r, 'R', 'substitution happens'); |
| 901 | is(ref $z_zapp, 'bam', 'still 1 object'); |
| 902 | is($count, 1, 'still 1 object'); |
| 903 | undef $z_zapp; |
| 904 | is($count, 0, 'now 0 objects'); |
| 905 | |
| 906 | $z_zapp = bless [], 'bam'; |
| 907 | ++$count; |
| 908 | |
| 909 | is($count, 1, '1 object'); |
| 910 | like($z_zapp =~ s/./R/rg, qr/\AR{8,}\z/, 'substitution happens'); |
| 911 | is(ref $z_zapp, 'bam', 'still 1 object'); |
| 912 | is($count, 1, 'still 1 object'); |
| 913 | undef $z_zapp; |
| 914 | is($count, 0, 'now 0 objects'); |
| 915 | } |
| 916 | |
| 917 | is(*bam =~ s/\*//r, 'main::bam', 'Can s///r a tyepglob'); |
| 918 | is(*bam =~ s/\*//rg, 'main::bam', 'Can s///rg a tyepglob'); |
| 919 | |
| 920 | { |
| 921 | sub cowBug::TIESCALAR { bless[], 'cowBug' } |
| 922 | sub cowBug::FETCH { __PACKAGE__ } |
| 923 | sub cowBug::STORE{} |
| 924 | tie my $kror, cowBug =>; |
| 925 | $kror =~ s/(?:)/""/e; |
| 926 | } |
| 927 | pass("s/// on tied var returning a cow"); |
| 928 | |
| 929 | # a test for 6502e08109cd003b2cdf39bc94ef35e52203240b |
| 930 | # previously this would segfault |
| 931 | |
| 932 | { |
| 933 | my $s = "abc"; |
| 934 | eval { $s =~ s/(.)/die/e; }; |
| 935 | like($@, qr/Died at/, "s//die/e"); |
| 936 | } |
| 937 | |
| 938 | |
| 939 | # Test problems with constant replacement optimisation |
| 940 | # [perl #26986] logop in repl resulting in incorrect optimisation |
| 941 | "g" =~ /(.)/; |
| 942 | @l{'a'..'z'} = 'A'..':'; |
| 943 | $_ = "hello"; |
| 944 | { s/(.)/$l{my $a||$1}/g } |
| 945 | is $_, "HELLO", |
| 946 | 'logop in s/// repl does not result in "constant" repl optimisation'; |
| 947 | # Aliases to match vars |
| 948 | "g" =~ /(.)/; |
| 949 | $_ = "hello"; |
| 950 | { |
| 951 | local *a = *1; |
| 952 | s/(.)\1/$a/g; |
| 953 | } |
| 954 | is $_, 'helo', 's/pat/$alias_to_match_var/'; |
| 955 | "g" =~ /(.)/; |
| 956 | $_ = "hello"; |
| 957 | { |
| 958 | local *a = *1; |
| 959 | s/e(.)\1/a$a/g; |
| 960 | } |
| 961 | is $_, 'halo', 's/pat/foo$alias_to_match_var/'; |
| 962 | # Last-used pattern containing re-evals that modify "constant" rhs |
| 963 | { |
| 964 | local *a; |
| 965 | $x = "hello"; |
| 966 | $x =~ /(?{*a = \"a"})./; |
| 967 | undef *a; |
| 968 | $x =~ s//$a/g; |
| 969 | is $x, 'aaaaa', |
| 970 | 'last-used pattern disables constant repl optimisation'; |
| 971 | } |
| 972 | |
| 973 | |
| 974 | $_ = "\xc4\x80"; |
| 975 | $a = ""; |
| 976 | utf8::upgrade $a; |
| 977 | $_ =~ s/$/$a/; |
| 978 | is $_, "\xc4\x80", "empty utf8 repl does not result in mangled utf8"; |
| 979 | |
| 980 | $@ = "\x{30cb}eval 18"; |
| 981 | $@ =~ s/eval \d+/eval 11/; |
| 982 | is $@, "\x{30cb}eval 11", |
| 983 | 'loading utf8 tables does not interfere with matches against $@'; |
| 984 | |
| 985 | $reftobe = 3; |
| 986 | $reftobe =~ s/3/$reftobe=\ 3;4/e; |
| 987 | is $reftobe, '4', 'clobbering target with ref in s//.../e'; |
| 988 | $locker{key} = 3; |
| 989 | SKIP:{ |
| 990 | skip "no Hash::Util under miniperl", 2 if is_miniperl; |
| 991 | require Hash::Util; |
| 992 | eval { |
| 993 | $locker{key} =~ s/3/ |
| 994 | $locker{key} = 3; |
| 995 | &Hash::Util::lock_hash(\%locker);4 |
| 996 | /e; |
| 997 | }; |
| 998 | is $locker{key}, '3', 'locking target in $hash{key} =~ s//.../e'; |
| 999 | like $@, qr/^Modification of a read-only value/, 'err msg' . ($@ ? ": $@" : ""); |
| 1000 | } |
| 1001 | delete $::{does_not_exist}; # just in case |
| 1002 | eval { no warnings; $::{does_not_exist}=~s/(?:)/*{"does_not_exist"}; 4/e }; |
| 1003 | like $@, qr/^Modification of a read-only value/, |
| 1004 | 'vivifying stash elem in $that::{elem} =~ s//.../e'; |
| 1005 | |
| 1006 | # COWs should not be exempt from read-only checks. s/// croaks on read- |
| 1007 | # only values even when the pattern does not match, but it was not doing so |
| 1008 | # for COWs. |
| 1009 | eval { for (__PACKAGE__) { s/b/c/; } }; |
| 1010 | like $@, qr/^Modification of a read-only value/, |
| 1011 | 'read-only COW =~ s/does not match// should croak'; |
| 1012 | |
| 1013 | { |
| 1014 | my $a_acute = chr utf8::unicode_to_native(0xE1); # LATIN SMALL LETTER A WITH ACUTE |
| 1015 | my $egrave = chr utf8::unicode_to_native(0xE8); # LATIN SMALL LETTER E WITH GRAVE |
| 1016 | my $u_umlaut = chr utf8::unicode_to_native(0xFC); # LATIN SMALL LETTER U WITH DIAERESIS |
| 1017 | my $division = chr utf8::unicode_to_native(0xF7); # DIVISION SIGN |
| 1018 | |
| 1019 | is("ab.c" =~ s/\b/!/agr, "!ab!.!c!", '\\b matches ASCII before string, mid, and end, /a'); |
| 1020 | is("$a_acute$egrave.$u_umlaut" =~ s/\b/!/agr, "$a_acute$egrave.$u_umlaut", '\\b matches Latin1 before string, mid, and end, /a'); |
| 1021 | is("\x{100}\x{101}.\x{102}" =~ s/\b/!/agr, "\x{100}\x{101}.\x{102}", '\\b matches above-Latin1 before string, mid, and end, /a'); |
| 1022 | |
| 1023 | is("..." =~ s/\B/!/agr, "!.!.!.!", '\\B matches ASCII before string, mid, and end, /a'); |
| 1024 | is("$division$division$division" =~ s/\B/!/agr, "!$division!$division!$division!", '\\B matches Latin1 before string, mid, and end, /a'); |
| 1025 | is("\x{2028}\x{2028}\x{2028}" =~ s/\B/!/agr, "!\x{2028}!\x{2028}!\x{2028}!", '\\B matches above-Latin1 before string, mid, and end, /a'); |
| 1026 | |
| 1027 | is("ab.c" =~ s/\b/!/dgr, "!ab!.!c!", '\\b matches ASCII before string, mid, and end, /d'); |
| 1028 | { is("$a_acute$egrave.$u_umlaut" =~ s/\b/!/dgr, "$a_acute$egrave.$u_umlaut", '\\b matches Latin1 before string, mid, and end, /d'); } |
| 1029 | is("\x{100}\x{101}.\x{102}" =~ s/\b/!/dgr, "!\x{100}\x{101}!.!\x{102}!", '\\b matches above-Latin1 before string, mid, and end, /d'); |
| 1030 | |
| 1031 | is("..." =~ s/\B/!/dgr, "!.!.!.!", '\\B matches ASCII before string, mid, and end, /d'); |
| 1032 | is("$division$division$division" =~ s/\B/!/dgr, "!$division!$division!$division!", '\\B matches Latin1 before string, mid, and end, /d'); |
| 1033 | is("\x{2028}\x{2028}\x{2028}" =~ s/\B/!/dgr, "!\x{2028}!\x{2028}!\x{2028}!", '\\B matches above-Latin1 before string, mid, and end, /d'); |
| 1034 | |
| 1035 | is("ab.c" =~ s/\b/!/ugr, "!ab!.!c!", '\\b matches ASCII before string, mid, and end, /u'); |
| 1036 | is("$a_acute$egrave.$u_umlaut" =~ s/\b/!/ugr, "!$a_acute$egrave!.!$u_umlaut!", '\\b matches Latin1 before string, mid, and end, /u'); |
| 1037 | is("\x{100}\x{101}.\x{102}" =~ s/\b/!/ugr, "!\x{100}\x{101}!.!\x{102}!", '\\b matches above-Latin1 before string, mid, and end, /u'); |
| 1038 | |
| 1039 | is("..." =~ s/\B/!/ugr, "!.!.!.!", '\\B matches ASCII before string, mid, and end, /u'); |
| 1040 | is("$division$division$division" =~ s/\B/!/ugr, "!$division!$division!$division!", '\\B matches Latin1 before string, mid, and end, /u'); |
| 1041 | is("\x{2028}\x{2028}\x{2028}" =~ s/\B/!/ugr, "!\x{2028}!\x{2028}!\x{2028}!", '\\B matches above-Latin1 before string, mid, and end, /u'); |
| 1042 | |
| 1043 | fresh_perl_like( '$_=""; /\b{gcb}/; s///g', qr/^$/, {}, |
| 1044 | '[perl #126319: Segmentation fault in Perl_sv_catpvn_flags with \b{gcb}' |
| 1045 | ); |
| 1046 | fresh_perl_like( '$_=""; /\B{gcb}/; s///g', qr/^$/, {}, |
| 1047 | '[perl #126319: Segmentation fault in Perl_sv_catpvn_flags with \b{gcb}' |
| 1048 | ); |
| 1049 | fresh_perl_like( '$_=""; /\b{wb}/; s///g', qr/^$/, {}, |
| 1050 | '[perl #126319: Segmentation fault in Perl_sv_catpvn_flags with \b{wb}' |
| 1051 | ); |
| 1052 | fresh_perl_like( '$_=""; /\B{wb}/; s///g', qr/^$/, {}, |
| 1053 | '[perl #126319: Segmentation fault in Perl_sv_catpvn_flags with \b{wb}' |
| 1054 | ); |
| 1055 | fresh_perl_like( '$_=""; /\b{sb}/; s///g', qr/^$/, {}, |
| 1056 | '[perl #126319: Segmentation fault in Perl_sv_catpvn_flags with \b{sb}' |
| 1057 | ); |
| 1058 | fresh_perl_like( '$_=""; /\B{sb}/; s///g', qr/^$/, {}, |
| 1059 | '[perl #126319: Segmentation fault in Perl_sv_catpvn_flags with \b{sb}' |
| 1060 | ); |
| 1061 | |
| 1062 | SKIP: { |
| 1063 | if (! locales_enabled('LC_ALL')) { |
| 1064 | skip "Can't test locale (maybe you are missing POSIX)", 6; |
| 1065 | } |
| 1066 | |
| 1067 | setlocale(&POSIX::LC_ALL, "C"); |
| 1068 | use locale; |
| 1069 | is("a.b" =~ s/\b/!/gr, "!a!.!b!", '\\b matches ASCII before string, mid, and end, /l'); |
| 1070 | is("$a_acute.$egrave" =~ s/\b/!/gr, "$a_acute.$egrave", '\\b matches Latin1 before string, mid, and end, /l'); |
| 1071 | is("\x{100}\x{101}.\x{102}" =~ s/\b/!/gr, "!\x{100}\x{101}!.!\x{102}!", '\\b matches above-Latin1 before string, mid, and end, /l'); |
| 1072 | |
| 1073 | is("..." =~ s/\B/!/gr, "!.!.!.!", '\\B matches ASCII before string, mid, and end, /l'); |
| 1074 | is("$division$division$division" =~ s/\B/!/gr, "!$division!$division!$division!", '\\B matches Latin1 before string, mid, and end, /l'); |
| 1075 | is("\x{2028}\x{2028}\x{2028}" =~ s/\B/!/gr, "!\x{2028}!\x{2028}!\x{2028}!", '\\B matches above-Latin1 before string, mid, and end, /l'); |
| 1076 | } |
| 1077 | |
| 1078 | } |
| 1079 | |
| 1080 | { |
| 1081 | # RT #123954 if the string getting matched against got converted during |
| 1082 | # s///e so that it was no longer SvPOK, an assertion would fail when |
| 1083 | # setting pos. |
| 1084 | my $s1 = 0; |
| 1085 | $s1 =~ s/.?/$s1++/ge; |
| 1086 | is($s1, "01","RT #123954 s1"); |
| 1087 | } |
| 1088 | { |
| 1089 | # RT #126602 double free if the value being modified is freed in the replacement |
| 1090 | fresh_perl_is('s//*_=0;s|0||;00.y0/e; print qq(ok\n)', "ok\n", { stderr => 1 }, |
| 1091 | "[perl #126602] s//*_=0;s|0||/e crashes"); |
| 1092 | } |
| 1093 | |
| 1094 | { |
| 1095 | #RT 126260 gofs is in chars, not bytes |
| 1096 | |
| 1097 | # in something like /..\G/, the engine should start matching two |
| 1098 | # chars before pos(). At one point it was matching two bytes before. |
| 1099 | |
| 1100 | my $s = "\x{121}\x{122}\x{123}"; |
| 1101 | pos($s) = 2; |
| 1102 | $s =~ s/..\G//g; |
| 1103 | is($s, "\x{123}", "#RT 126260 gofs"); |
| 1104 | } |
| 1105 | |
| 1106 | SKIP: { |
| 1107 | if (! locales_enabled('LC_CTYPE')) { |
| 1108 | skip "Can't test locale", 1; |
| 1109 | } |
| 1110 | |
| 1111 | # To cause breakeage, we need a locale in which \xff matches whatever |
| 1112 | # POSIX class is used in the pattern. Easiest is C, with \W. |
| 1113 | fresh_perl_is(' use POSIX qw(locale_h); |
| 1114 | setlocale(&POSIX::LC_CTYPE, "C"); |
| 1115 | my $s = "\xff"; |
| 1116 | $s =~ s/\W//l; |
| 1117 | print qq(ok$s\n)', |
| 1118 | "ok\n", |
| 1119 | {stderr => 1 }, |
| 1120 | '[perl #129038 ] s/\xff//l no longer crashes'); |
| 1121 | } |
| 1122 | |
| 1123 | SKIP: { |
| 1124 | skip("no Tie::Hash::NamedCapture under miniperl", 3) if is_miniperl; |
| 1125 | |
| 1126 | # RT #23624 scoping of @+/@- when used with tie() |
| 1127 | #! /usr/bin/perl -w |
| 1128 | |
| 1129 | package Tie::Prematch; |
| 1130 | sub TIEHASH { bless \my $dummy => __PACKAGE__ } |
| 1131 | sub FETCH { return substr $_[1], 0, $-[0] } |
| 1132 | |
| 1133 | package main; |
| 1134 | |
| 1135 | eval <<'__EOF__'; |
| 1136 | tie my %pre, 'Tie::Prematch'; |
| 1137 | my $foo = 'foobar'; |
| 1138 | $foo =~ s/.ob/$pre{ $foo }/; |
| 1139 | is($foo, 'ffar', 'RT #23624'); |
| 1140 | |
| 1141 | $foo = 'foobar'; |
| 1142 | $foo =~ s/.ob/tied(%pre)->FETCH($foo)/e; |
| 1143 | is($foo, 'ffar', 'RT #23624'); |
| 1144 | |
| 1145 | tie %-, 'Tie::Prematch'; |
| 1146 | $foo = 'foobar'; |
| 1147 | $foo =~ s/.ob/$-{$foo}/; |
| 1148 | is($foo, 'ffar', 'RT #23624'); |
| 1149 | |
| 1150 | undef *Tie::Prematch::TIEHASH; |
| 1151 | undef *Tie::Prematch::FETCH; |
| 1152 | __EOF__ |
| 1153 | } |
| 1154 | |
| 1155 | # [perl #130188] crash on return from substitution in subroutine |
| 1156 | # make sure returning from s///e doesn't SEGV |
| 1157 | { |
| 1158 | my $f = sub { |
| 1159 | my $x = 'a'; |
| 1160 | $x =~ s/./return;/e; |
| 1161 | }; |
| 1162 | my $x = $f->(); |
| 1163 | pass("RT #130188"); |
| 1164 | } |
| 1165 | |
| 1166 | # RT #131930 |
| 1167 | # a multi-line s/// wasn't resetting the cop_line correctly |
| 1168 | { |
| 1169 | my $l0 = __LINE__; |
| 1170 | my $s = "a"; |
| 1171 | $s =~ s[a] |
| 1172 | [b]; |
| 1173 | my $lines = __LINE__ - $l0; |
| 1174 | is $lines, 4, "RT #131930"; |
| 1175 | } |
| 1176 | |
| 1177 | { # [perl #133899], would panic |
| 1178 | |
| 1179 | fresh_perl_is('my $a = "ha"; $a =~ s!|0?h\x{300}(?{})!!gi', "", {}, |
| 1180 | "[perl #133899] s!|0?h\\x{300}(?{})!!gi panics"); |
| 1181 | } |
| 1182 | |
| 1183 | { |
| 1184 | fresh_perl_is("s//00000000000format \0 '0000000\\x{800}/;eval", "", {}, "RT #133882"); |
| 1185 | } |
| 1186 | |
| 1187 | { # GH Issue 20690 |
| 1188 | my @ret; |
| 1189 | my $str = "abc"; |
| 1190 | for my $upgrade (0,1) { |
| 1191 | my $copy = $str; |
| 1192 | utf8::upgrade($copy) if $upgrade; |
| 1193 | my $r= $copy=~s/b{0}//gr; |
| 1194 | push @ret, $r; |
| 1195 | } |
| 1196 | is( $ret[1], $ret[0], |
| 1197 | "Issue #20690 - s/b{0}//gr should work the same for utf8 and non-utf8 strings"); |
| 1198 | is( $ret[0], $str, |
| 1199 | "Issue #20690 - s/b{0}//gr on non-utf8 string should not remove anything"); |
| 1200 | is( $ret[1], $str, |
| 1201 | "Issue #20690 - s/b{0}//gr on utf8 string should not remove anything"); |
| 1202 | } |