[perl5.git] / lib / utf8.t

#!./perl 

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
}

# NOTE!
#
# Think carefully before adding tests here.  In general this should be
# used only for about three categories of tests:
#
# (1) tests that absolutely require 'use utf8', and since that in general
#     shouldn't be needed as the utf8 is being obsoleted, this should
#     have rather few tests.  If you want to test Unicode and regexes,
#     you probably want to go to op/regexp or op/pat; if you want to test
#     split, go to op/split; pack, op/pack; appending or joining,
#     op/append or op/join, and so forth
#
# (2) tests that have to do with Unicode tokenizing (though it's likely
#     that all the other Unicode tests sprinkled around the t/**/*.t are
#     going to catch that)
#
# (3) complicated tests that simultaneously stress so many Unicode features
#     that deciding into which other test script the tests should go to
#     is hard -- maybe consider breaking up the complicated test
#
#

use Test;
plan tests => 15;

{
    # bug id 20001009.001

    my ($a, $b);

    { use bytes; $a = "\xc3\xa4" }
    { use utf8;  $b = "\xe4"     }

    my $test = 68;

    ok($a ne $b);

    { use utf8; ok($a ne $b) }
}


{
    # bug id 20000730.004

    my $smiley = "\x{263a}";

    for my $s ("\x{263a}",
	       $smiley,
		
	       "" . $smiley,
	       "" . "\x{263a}",

	       $smiley    . "",
	       "\x{263a}" . "",
	       ) {
	my $length_chars = length($s);
	my $length_bytes;
	{ use bytes; $length_bytes = length($s) }
	my @regex_chars = $s =~ m/(.)/g;
	my $regex_chars = @regex_chars;
	my @split_chars = split //, $s;
	my $split_chars = @split_chars;
	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
	   "1/1/1/3");
    }

    for my $s ("\x{263a}" . "\x{263a}",
	       $smiley    . $smiley,

	       "\x{263a}\x{263a}",
	       "$smiley$smiley",
	       
	       "\x{263a}" x 2,
	       $smiley    x 2,
	       ) {
	my $length_chars = length($s);
	my $length_bytes;
	{ use bytes; $length_bytes = length($s) }
	my @regex_chars = $s =~ m/(.)/g;
	my $regex_chars = @regex_chars;
	my @split_chars = split //, $s;
	my $split_chars = @split_chars;
	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
	   "2/2/2/6");
    }
}


{
    my $w = 0;
    local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
    my $x = eval q/"\\/ . "\x{100}" . q/"/;;
   
    ok($w == 0 && $x eq "\x{100}");
}
Commit	Line	Data
f96ec2a2 GS	1	#!./perl
	2
	3	BEGIN {
	4	chdir 't' if -d 't';
20822f61	5	@INC = '../lib';
f96ec2a2 GS	6	}
f96ec2a2 GS	7
4765795a JH	8	# NOTE!
	9	#
	10	# Think carefully before adding tests here. In general this should be
	11	# used only for about three categories of tests:
	12	#
	13	# (1) tests that absolutely require 'use utf8', and since that in general
	14	# shouldn't be needed as the utf8 is being obsoleted, this should
	15	# have rather few tests. If you want to test Unicode and regexes,
	16	# you probably want to go to op/regexp or op/pat; if you want to test
	17	# split, go to op/split; pack, op/pack; appending or joining,
	18	# op/append or op/join, and so forth
	19	#
	20	# (2) tests that have to do with Unicode tokenizing (though it's likely
	21	# that all the other Unicode tests sprinkled around the t/*/.t are
	22	# going to catch that)
	23	#
	24	# (3) complicated tests that simultaneously stress so many Unicode features
	25	# that deciding into which other test script the tests should go to
	26	# is hard -- maybe consider breaking up the complicated test
	27	#
	28	#
	29
	30	use Test;
	31	plan tests => 15;
31067593	32
7bbb0251	33	{
da450f52 JH	34	# bug id 20001009.001
da450f52 JH	35
89491803 SC	36	my ($a, $b);
	37
	38	{ use bytes; $a = "\xc3\xa4" }
4765795a	39	{ use utf8; $b = "\xe4" }
89491803	40
4765795a	41	my $test = 68;
31067593	42
4765795a	43	ok($a ne $b);
da450f52	44
4765795a	45	{ use utf8; ok($a ne $b) }
da450f52 JH	46	}
da450f52 JH	47
60ff4832 JH	48
	49	{
	50	# bug id 20000730.004
	51
60ff4832 JH	52	my $smiley = "\x{263a}";
60ff4832 JH	53
4765795a JH	54	for my $s ("\x{263a}",
4765795a JH	55	$smiley,
60ff4832	56
4765795a JH	57	"" . $smiley,
4765795a JH	58	"" . "\x{263a}",
60ff4832	59
4765795a JH	60	$smiley . "",
4765795a JH	61	"\x{263a}" . "",
60ff4832 JH	62	) {
	63	my $length_chars = length($s);
	64	my $length_bytes;
	65	{ use bytes; $length_bytes = length($s) }
	66	my @regex_chars = $s =~ m/(.)/g;
	67	my $regex_chars = @regex_chars;
	68	my @split_chars = split //, $s;
	69	my $split_chars = @split_chars;
4765795a JH	70	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
4765795a JH	71	"1/1/1/3");
60ff4832 JH	72	}
60ff4832 JH	73
4765795a JH	74	for my $s ("\x{263a}" . "\x{263a}",
4765795a JH	75	$smiley . $smiley,
60ff4832	76
4765795a JH	77	"\x{263a}\x{263a}",
4765795a JH	78	"$smiley$smiley",
60ff4832	79
4765795a JH	80	"\x{263a}" x 2,
4765795a JH	81	$smiley x 2,
60ff4832 JH	82	) {
	83	my $length_chars = length($s);
	84	my $length_bytes;
	85	{ use bytes; $length_bytes = length($s) }
	86	my @regex_chars = $s =~ m/(.)/g;
	87	my $regex_chars = @regex_chars;
	88	my @split_chars = split //, $s;
	89	my $split_chars = @split_chars;
4765795a JH	90	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
4765795a JH	91	"2/2/2/6");
60ff4832 JH	92	}
60ff4832 JH	93	}
ffc61ed2	94
ffc61ed2 JH	95
ffc61ed2 JH	96	{
f9a63242 JH	97	my $w = 0;
	98	local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
	99	my $x = eval q/"\\/ . "\x{100}" . q/"/;;
	100
4765795a	101	ok($w == 0 && $x eq "\x{100}");
f9a63242 JH	102	}
f9a63242 JH	103