6 require Config; import Config;
14 ok( $_ eq 'david' && $a eq 'rules', 'non-destructive substitute' );
16 $a = "david" =~ s/david/rules/r;
17 ok( $a eq 'rules', 's///r with constant' );
19 $a = "david" =~ s/david/"is"."great"/er;
20 ok( $a eq 'isgreat', 's///er' );
22 $a = "daviddavid" =~ s/david/cool/gr;
23 ok( $a eq 'coolcool', 's///gr' );
26 $b = $a =~ s/david/sucks/r =~ s/sucks/rules/r;
27 ok( $a eq 'david' && $b eq 'rules', 'chained s///r' );
30 $b = $a =~ s/xxx/sucks/r;
31 ok( $a eq 'david' && $b eq 'david', 'non matching s///r' );
35 ok( 'david' =~ s/$a/rules/ro eq 'rules', 's///ro '.$_ );
39 eval '$b = $a !~ s/david/is great/r';
40 like( $@, qr{Using !~ with s///r doesn't make sense}, 's///r !~ operator gives error' );
43 no warnings 'uninitialized';
45 $b = $a =~ s/left/right/r;
46 ok ( !defined $a && !defined $b, 's///r with undef input' );
49 warning_like(sub { $b = $a =~ s/left/right/r },
50 qr/^Use of uninitialized value/,
51 's///r Uninitialized warning');
54 warning_like(sub {eval 's/david/sucks/r; 1'},
55 qr/^Useless use of non-destructive substitution/,
56 's///r void context warning');
60 $b = $a =~ s/david/rules/r;
61 ok( $a eq '' && $b eq '', 's///r on empty string' );
65 ok( $_ eq 'david' && $b[0] eq 'rules', 's///r in list context' );
67 # Magic value and s///r
69 tie $m, 'Tie::StdScalar'; # makes $a magical
71 $b = $m =~ s/david/rules/r;
72 ok( $m eq 'david' && $b eq 'rules', 's///r with magic input' );
74 $m = $b =~ s/rules/david/r;
75 ok( defined tied($m), 's///r magic isn\'t lost' );
77 $b = $m =~ s/xxx/yyy/r;
78 ok( ! defined tied($b), 's///r magic isn\'t contagious' );
80 my $ref = \("aaa" =~ s/aaa/bbb/r);
81 is (Internals::SvREFCNT($$ref), 1, 's///r does not leak');
82 $ref = \("aaa" =~ s/aaa/bbb/rg);
83 is (Internals::SvREFCNT($$ref), 1, 's///rg does not leak');
88 ok( $_ eq '$x', ":$_: eq :\$x:" );
92 ok( $_ eq 'foo', ":$_: eq :foo:" );
96 ok( $_ eq '$x foo', ":$_: eq :\$x foo:" );
99 ($a = 'abcdef') =~ s<(b${b}e)>'\n$1';
100 ok( $1 eq 'bcde' && $a eq 'a\n$1f', ":$1: eq :bcde: ; :$a: eq :a\\n\$1f:" );
103 ok( ($a =~ s/a/x/g) == 4 && $a eq 'xbxcxdx' );
105 ok( ($a =~ s/a/y/g) == 0 && $a eq 'xbxcxdx' );
107 ok( ($a =~ s/b/y/g) == 1 && $a eq 'xyxcxdx' );
110 ok( /a/i && s///gi && $_ eq 'BCD' );
113 ok( length($_) == 4 );
115 ok( $_ eq '\\' x 8 && $snum == 4 );
118 ok( length($_) == 8 );
120 ok( $_ eq '\\//' x 4 && $snum == 4 );
121 ok( length($_) == 12 );
125 ok( $_ eq 'aaXXXXbbb' );
129 ok( $_ eq 'aaXXXXbbb' );
133 ok( $_ eq 'baaXXXXbbb' );
137 ok( $_ eq 'baaXXXXbbb' );
141 ok( $_ eq 'aXXXXbbb' );
145 ok( $_ eq 'baXXXXbbb' );
149 ok( $_ eq 'aaaXXXXbb' );
153 ok( $_ eq 'aaaXXXXbb' );
157 ok( $_ eq 'aaaXXXXb' );
161 ok( $_ eq 'aayXXXbbb' );
165 ok( $_ eq 'aaaXXXzbb' );
173 ok( $_ eq 'aaaXXXXxb' );
175 # now for some unoptimized versions of the same.
179 ok( $_ eq 'aaXXXXbbb' );
183 ok( $_ eq 'aaXXXXbbb' );
187 ok( $_ eq 'baaXXXXbbb' );
191 ok( $_ eq 'baaXXXXbbb' );
195 ok( $_ eq 'aXXXXbbb' );
199 ok( $_ eq 'baXXXXbbb' );
203 ok( $_ eq 'aaaXXXXbb' );
207 ok( $_ eq 'aaaXXXXbb' );
211 ok( $_ eq 'aaaXXXXb' );
215 ok( $_ eq 'aayXXXbbb' );
219 ok( $_ eq 'aaaXXXzbb' );
222 $x ne $x || s/aaX.*Xbb//;
227 ok( $_ eq 'aaaXXXXxb' );
230 s/(\d+)/$1*2/e; # yields 'abc246xyz'
231 ok( $_ eq 'abc246xyz' );
232 s/(\d+)/sprintf("%5d",$1)/e; # yields 'abc 246xyz'
233 ok( $_ eq 'abc 246xyz' );
234 s/(\w)/$1 x 2/eg; # yields 'aabbcc 224466xxyyzz'
235 ok( $_ eq 'aabbcc 224466xxyyzz' );
246 $_ = "Now is the %#*! time for all good men...";
247 ok( ($x=(y/a-zA-Z //cd)) == 7 );
250 $_ = 'abcdefghijklmnopqrstuvwxyz0123456789';
253 ok( $_ eq 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' );
255 # same as tr/A-Z/a-z/;
256 if (defined $Config{ebcdic} && $Config{ebcdic} eq 'define') { # EBCDIC.
258 y[\301-\351][\201-\251];
259 } else { # Ye Olde ASCII. Or something like it.
260 y[\101-\132][\141-\172];
263 ok( $_ eq 'abcdefghijklmnopqrstuvwxyz0123456789' );
266 skip("not ASCII",1) unless (ord("+") == ord(",") - 1
267 && ord(",") == ord("-") - 1
268 && ord("a") == ord("b") - 1
269 && ord("b") == ord("c") - 1);
284 # test recursive substitutions
285 # code based on the recursive expansion of makefile variables
288 AAAAA => '$(B)', B=>'$(C)', C => 'D', # long->short
289 E => '$(F)', F=>'p $(G) q', G => 'HHHHH', # short->long
290 DIR => '$(UNDEFINEDNAME)/xxx',
293 my($var,$level) = @_;
294 return "\$($var)" unless exists $MK{$var};
295 return exp_vars($MK{$var}, $level+1); # can recurse
298 my($str,$level) = @_;
299 $str =~ s/\$\((\w+)\)/var($1, $level+1)/ge; # can recurse
300 #warn "exp_vars $level = '$str'\n";
304 ok( exp_vars('$(AAAAA)',0) eq 'D' );
305 ok( exp_vars('$(E)',0) eq 'p HHHHH q' );
306 ok( exp_vars('$(DIR)',0) eq '$(UNDEFINEDNAME)/xxx' );
307 ok( exp_vars('foo $(DIR)/yyy bar',0) eq 'foo $(UNDEFINEDNAME)/xxx/yyy bar' );
310 s/(..)/$x = $1, m#.#/eg;
311 ok( $x eq "cd", 'a match nested in the RHS of a substitution' );
313 # Subst and lookbehind
316 $snum = s/(?<!x)c/x/g;
317 ok( $_ eq "xxxxx" && $snum == 5 );
320 $snum = s/(?<!x)(c)/x/g;
321 ok( $_ eq "xxxxx" && $snum == 5 );
324 $snum = s/(?<!r)foobbar/foobar/g;
325 ok( $_ eq "foobarfoobbar" && $snum == 1 );
328 $snum = s/(?<!ar)(foobbar)/foobar/g;
329 ok( $_ eq "foobarfoobbar" && $snum == 1 );
332 $snum = s/(?<!ar)foobbar/foobar/g;
333 ok( $_ eq "foobarfoobbar" && $snum == 1 );
335 eval 's{foo} # this is a comment, not a delimiter
337 ok( ! @?, 'parsing of split subst with comment' );
339 $snum = eval '$_="exactly"; s sxsys;m 3(yactl)3;$1';
340 is( $snum, 'yactl', 'alpha delimiters are allowed' );
344 ok( $_ eq "bbcbb" && $snum == 4,
345 'check if squashing works at the end of string' );
351 $url = new URI::URL "http://www/"; die if $url eq "xXx";
355 $foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' .
356 ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' .
357 ' lowercase $@%#MiXeD$@%# ';
360 s{ \d+ \b [,.;]? (?{ 'digits' })
362 [a-z]+ \b [,.;]? (?{ 'lowercase' })
364 [A-Z]+ \b [,.;]? (?{ 'UPPERCASE' })
366 [A-Z] [a-z]+ \b [,.;]? (?{ 'Capitalized' })
368 [A-Za-z]+ \b [,.;]? (?{ 'MiXeD' })
370 [A-Za-z0-9]+ \b [,.;]? (?{ 'alphanumeric' })
374 [^A-Za-z0-9\s]+ (?{ '$@%#' })
381 ok( $_ eq '' && $snum == 6 );
384 $snum = s/(\d*|x)/<$1>/g;
385 $foo = '<>' . ('<x><>' x 20) ;
386 ok( $_ eq $foo && $snum == 41 );
393 ok( $_ eq 'aaaaaaxxxxxx' && $snum == 3 );
398 ok( $_ eq 'aaaaaaxxx' && $snum == 3 );
403 ok( $_ eq 'aaaaaaxxaa' );
408 ok( $_ eq 'aaaaaaxaa' );
412 ok( $_ eq 'xxxxxxxxxxxxxxxxxx' && $snum == 9 );
416 ok( $_ eq 'xxxxxxxxx' && $snum == 9 );
420 ok( $_ eq 'xxaaaaaaaa' );
424 ok( $_ eq 'xaaaaaaaa' );
428 ok( $_ eq '.aaa' && $snum == 1 );
430 eval q% s/a/"b"}/e %;
431 ok( $@ =~ /Bad evalled substitution/ );
432 eval q% ($_ = "x") =~ s/(.)/"$1 "/e %;
433 ok( $_ eq "x " and !length $@ );
435 eval q% ($_ = "x") =~ s/x(($x)*)/"$1"/e %;
436 ok( $_ eq '' and !length $@ );
439 ok( !s/^([a-z]:)/\u$1/ );
441 $_ = "Charles Bronson";
443 ok( $_ eq "C B" && $snum == 12 );
447 my $s = "H\303\266he";
451 is($l, $r, "use utf8 \\w");
454 my $pv1 = my $pv2 = "Andreas J. K\303\266nig";
455 $pv1 =~ s/A/\x{100}/;
456 substr($pv2,0,1) = "\x{100}";
460 skip("EBCDIC", 3) if ord("A") == 193;
463 # Gregor Chrupala <gregor.chrupala@star-group.net>
465 $a = 'España';
466 $a =~ s/ñ/ñ/;
467 like($a, qr/ñ/, "use utf8 RHS");
472 $a = 'España España';
473 $a =~ s/ñ/ñ/;
474 like($a, qr/ñ/, "use utf8 LHS");
481 like($a, qr/ñ/, "use utf8 LHS and RHS");
486 # SADAHIRO Tomoyuki <bqw10602@nifty.com>
488 $a = "\x{100}\x{101}";
489 $a =~ s/\x{101}/\xFF/;
491 is(length($a), 2, "SADAHIRO utf8 s///");
493 $a = "\x{100}\x{101}";
494 $a =~ s/\x{101}/"\xFF"/e;
498 $a = "\x{100}\x{101}";
499 $a =~ s/\x{101}/\xFF\xFF\xFF/;
500 like($a, qr/\xFF\xFF\xFF/);
503 $a = "\x{100}\x{101}";
504 $a =~ s/\x{101}/"\xFF\xFF\xFF"/e;
505 like($a, qr/\xFF\xFF\xFF/);
509 $a =~ s/\xFF/\x{100}/;
510 like($a, qr/\x{100}/);
514 $a =~ s/\xFF/"\x{100}"/e;
515 like($a, qr/\x{100}/);
519 $a =~ s/\xFF/\x{100}/;
520 like($a, qr/\x{100}/);
524 $a =~ s/\xFF/"\x{100}"/e;
525 like($a, qr/\x{100}/);
530 # subst with mixed utf8/non-utf8 type
531 my($ua, $ub, $uc, $ud) = ("\x{101}", "\x{102}", "\x{103}", "\x{104}");
532 my($na, $nb) = ("\x{ff}", "\x{fe}");
535 ($b = $a) =~ s/--/$na/;
536 is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8");
537 ($b = $a) =~ s/--/--$na--/;
538 is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8");
539 ($b = $a) =~ s/--/$uc/;
540 is($b, "$ua$uc$ub", "s///: replace utf8 into utf8");
541 ($b = $a) =~ s/--/--$uc--/;
542 is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8");
544 ($b = $a) =~ s/--/$ua/;
545 is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8");
546 ($b = $a) =~ s/--/--$ua--/;
547 is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8");
549 # now with utf8 pattern
551 ($b = $a) =~ s/-($ud)?-/$na/;
552 is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8 (utf8 pattern)");
553 ($b = $a) =~ s/-($ud)?-/--$na--/;
554 is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8 (utf8 pattern)");
555 ($b = $a) =~ s/-($ud)?-/$uc/;
556 is($b, "$ua$uc$ub", "s///: replace utf8 into utf8 (utf8 pattern)");
557 ($b = $a) =~ s/-($ud)?-/--$uc--/;
558 is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8 (utf8 pattern)");
560 ($b = $a) =~ s/-($ud)?-/$ua/;
561 is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8 (utf8 pattern)");
562 ($b = $a) =~ s/-($ud)?-/--$ua--/;
563 is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8 (utf8 pattern)");
564 ($b = $a) =~ s/-($ud)?-/$na/;
565 is($b, "$na$na$nb", "s///: replace non-utf8 into non-utf8 (utf8 pattern)");
566 ($b = $a) =~ s/-($ud)?-/--$na--/;
567 is($b, "$na--$na--$nb", "s///: replace long non-utf8 into non-utf8 (utf8 pattern)");
573 is("<$_> <$s>", "<xxxx> <4>", "[perl #7806]");
577 is("<$_> <$s>", "<> <4>", "[perl #7806]");
579 # [perl #19048] Coredump in silly replacement
584 is($_, "\n", "[perl #19048]");
587 # [perl #17757] interaction between saw_ampersand and study
589 my $f = eval q{ $& };
593 is($f, "yy", "[perl #17757]");
596 # [perl #20684] returned a zero count
598 is(s/(??{1})/2/eg, 4, '#20684 s/// with (??{..}) inside');
600 # [perl #20682] @- not visible in replacement
602 /(2)/; # seed @- with something else
603 s/(1)(2)(3)/$#- (@-)/;
604 is($_, "3 (0 0 1 2)", '#20682 @- not visible in replacement');
606 # [perl #20682] $^N not visible in replacement
608 /(a)/; s/(b)|(c)/-$^N/g;
609 is($_,'a-b-c','#20682 $^N not visible in replacement');
611 # [perl #22351] perl bug with 'e' substitution modifier
614 no warnings 'uninitialized';
617 is($name, "cis", q[#22351 bug with 'e' substitution modifier]);
620 # [perl #34171] $1 didn't honour 'use bytes' in s//e
628 is(length($x), 2, '[perl #34171]');
632 { # [perl #27940] perlbug: [\x00-\x1f] works, [\c@-\c_] does not
635 ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ s/[\c@-\c_]//g;
636 is($c, "\x20\x30\x40\x50\x60", "s/[\\c\@-\\c_]//g");
638 ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ s/[\x00-\x1f]//g;
639 is($c, "\x20\x30\x40\x50\x60", "s/[\\x00-\\x1f]//g");
643 no warnings 'uninitialized';
644 /(((((((((x)))))))))(z)/; # clear $10
645 s/(((((((((x)))))))))(y)/${10}/;
646 is($_,"y","RT#6006: \$_ eq '$_'");
648 s/(((((((((x)))))))))(r)/fooba${10}/;
649 is($_,"foobar","RT#6006: \$_ eq '$_'");
652 my $want=("\n" x 11).("B\n" x 11)."B";
659 is($want,$_,"RT#17542");
663 my @tests = ('ABC', "\xA3\xA4\xA5", "\x{410}\x{411}\x{412}");
667 is($_, "012", "RT#52104: $id");
671 fresh_perl_is( '$_=q(foo);s/(.)\G//g;print' => 'foo', '[perl #69056] positive GPOS regex segfault' );
672 fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'aXX[c-e][e-f]f', 'positive GPOS regex substitution failure' );
674 # [perl #71470] $var =~ s/$qr//e calling get-magic on $_ as well as $var
678 sub qrBug::TIESCALAR { bless[pop], 'qrBug' }
679 sub qrBug::FETCH { $scratch .= "[fetching $_[0][0]]"; 'prew' }
681 tie my $kror, qrBug => '$kror';
682 tie $_, qrBug => '$_';
686 $scratch, '[fetching $kror]',
687 'bug: $var =~ s/$qr//e calling get-magic on $_ as well as $var',
691 { # Bug #41530; replacing non-utf8 with a utf8 causes problems
692 my $string = "a\x{a0}a";
693 my $sub_string = $string;
694 ok(! utf8::is_utf8($sub_string), "Verify that string isn't initially utf8");
695 $sub_string =~ s/a/\x{100}/g;
696 ok(utf8::is_utf8($sub_string),
697 'Verify replace of non-utf8 with utf8 upgrades to utf8');
698 is($sub_string, "\x{100}\x{A0}\x{100}",
699 'Verify #41530 fixed: replace of non-utf8 with utf8');
701 my $non_sub_string = $string;
702 ok(! utf8::is_utf8($non_sub_string),
703 "Verify that string isn't initially utf8");
704 $non_sub_string =~ s/b/\x{100}/g;
705 ok(! utf8::is_utf8($non_sub_string),
706 "Verify that failed substitute doesn't change string's utf8ness");
707 is($non_sub_string, $string,
708 "Verify that failed substitute doesn't change string");
711 { # Verify largish octal in replacement pattern
714 $string =~ s/a/\400/;
715 is($string, chr 0x100, "Verify that handles s/foo/\\400/");
716 $string =~ s/./\600/;
717 is($string, chr 0x180, "Verify that handles s/foo/\\600/");
718 $string =~ s/./\777/;
719 is($string, chr 0x1FF, "Verify that handles s/foo/\\777/");
722 # Scoping of s//the RHS/ when there is no /e
723 # Tests based on [perl #19078]
726 my $output = ''; my %a;
727 no warnings 'uninitialized';
730 s!.!<@a{$output .= ("$&"),/[$&]/g}>!g;
733 $output, "CCCGGG< >< >< >< >< >< >",
734 's/// sets PL_curpm for each iteration even when the RHS has set it'
740 'Match vars reflect the last match after s/pat/$a{m|pat|}/ without /e'
745 # a tied scalar that returned a plain string, got messed up
746 # when substituted with a UTF8 replacement string, due to
747 # magic getting called multiple times, and pointers now pointing
748 # to stale/freed strings
749 # The original fix for this caused infinite loops for non- or cow-
750 # strings, so we test those, too.
753 sub TIESCALAR { bless [ "abcdefgh" ] }
754 sub FETCH { $fc++; $_[0][0] }
755 sub STORE { $_[0][0] = $_[1] }
759 $s =~ s/..../\x{101}/;
760 ::is($fc, 1, "tied UTF8 stuff FETCH count");
761 ::is("$s", "\x{101}efgh", "tied UTF8 stuff");
766 $s =~ s/..../\x{101}/;
767 ::is($fc, 1, '$tied_glob =~ s/non-utf8/utf8/ fetch count');
768 ::is("$s", "\x{101}::foo", '$tied_glob =~ s/non-utf8/utf8/ result');
771 $s =~ s/(....)/\x{101}/g;
772 ::is($fc, 1, '$tied_glob =~ s/(non-utf8)/utf8/g fetch count');
773 ::is("$s", "\x{101}\x{101}o",
774 '$tied_glob =~ s/(non-utf8)/utf8/g result');
776 $s = "\xff\xff\xff\xff\xff";
777 $s =~ s/..../\x{101}/;
778 ::is($fc, 1, '$tied_latin1 =~ s/non-utf8/utf8/ fetch count');
779 ::is("$s", "\x{101}\xff", '$tied_latin1 =~ s/non-utf8/utf8/ result');
781 { package package_name; tied($s)->[0] = __PACKAGE__ };
782 $s =~ s/..../\x{101}/;
783 ::is($fc, 1, '$tied_cow =~ s/non-utf8/utf8/ fetch count');
784 ::is("$s", "\x{101}age_name", '$tied_cow =~ s/non-utf8/utf8/ result');
787 $s =~ s/..../\x{101}/;
788 ::is($fc, 1, '$tied_ref =~ s/non-utf8/utf8/ fetch count');
789 ::like("$s", qr/^\x{101}AR\(0x.*\)\z/,
790 '$tied_ref =~ s/non-utf8/utf8/ result');
801 my $z_zapp = bless [], 'bam';
804 is($count, 1, '1 object');
805 is($z_zapp =~ s/.*/R/r, 'R', 'substitution happens');
806 is(ref $z_zapp, 'bam', 'still 1 object');
807 is($count, 1, 'still 1 object');
809 is($count, 0, 'now 0 objects');
811 $z_zapp = bless [], 'bam';
814 is($count, 1, '1 object');
815 like($z_zapp =~ s/./R/rg, qr/\AR{8,}\z/, 'substitution happens');
816 is(ref $z_zapp, 'bam', 'still 1 object');
817 is($count, 1, 'still 1 object');
819 is($count, 0, 'now 0 objects');
822 is(*bam =~ s/\*//r, 'main::bam', 'Can s///r a tyepglob');
823 is(*bam =~ s/\*//rg, 'main::bam', 'Can s///rg a tyepglob');
826 sub cowBug::TIESCALAR { bless[], 'cowBug' }
827 sub cowBug::FETCH { __PACKAGE__ }
829 tie my $kror, cowBug =>;
830 $kror =~ s/(?:)/""/e;
832 pass("s/// on tied var returning a cow");
834 # a test for 6502e08109cd003b2cdf39bc94ef35e52203240b
835 # previously this would segfault
839 eval { $s =~ s/(.)/die/e; };
840 like($@, qr/Died at/, "s//die/e");
844 # Test problems with constant replacement optimisation
845 # [perl #26986] logop in repl resulting in incorrect optimisation
847 @l{'a'..'z'} = 'A'..':';
849 { s/(.)/$l{my $a||$1}/g }
851 'logop in s/// repl does not result in "constant" repl optimisation';
852 # Aliases to match vars
859 is $_, 'helo', 's/pat/$alias_to_match_var/';
866 is $_, 'halo', 's/pat/$alias_to_match_var/';
867 # Last-used pattern containing re-evals that modify "constant" rhs
871 $x =~ /(?{*a = \"a"})./;
875 'last-used pattern disables constant repl optimisation';
883 is $_, "\xc4\x80", "empty utf8 repl does not result in mangled utf8";
885 $@ = "\x{30cb}eval 18";
886 $@ =~ s/eval \d+/eval 11/;
887 is $@, "\x{30cb}eval 11",
888 'loading utf8 tables does not interfere with matches against $@';