From 3de8ed06f96286478ecf8f3810596152fa21b27b Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Fri, 2 Nov 2001 15:19:35 +0000 Subject: [PATCH] More encoding testing. p4raw-id: //depot/perl@12813 --- lib/encoding.pm | 10 +++++----- lib/encoding.t | 23 +++++++++++++++++++++-- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/lib/encoding.pm b/lib/encoding.pm index 2f4b059..6f5970f 100644 --- a/lib/encoding.pm +++ b/lib/encoding.pm @@ -43,7 +43,7 @@ encoding - pragma to control the conversion of legacy data into Unicode print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; - # but pack/unpack C are not, in case you still + # but pack/unpack are not affected, in case you still # want back to your native encoding print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; @@ -58,13 +58,13 @@ The pragma is a per script, not a per block lexical. Only the last C matters, and it affects B. If no encoding is specified, the environment variable L -is consulted. If that fails, "latin1" (ISO 8859-1) is assumed. -If no encoding can be found, C error will be thrown. +is consulted. If that fails, "latin1" (ISO 8859-1) is assumed. If no +encoding can be found, C error will be thrown. =head1 KNOWN PROBLEMS -The C<\x..> and C<\0...> in regular expressions are not affected by -this pragma. They very probably should. +Literals in regular expressions are not affected by this pragma. +They very probably should. =head1 SEE ALSO diff --git a/lib/encoding.t b/lib/encoding.t index 923baa7..0363441 100644 --- a/lib/encoding.t +++ b/lib/encoding.t @@ -1,4 +1,4 @@ -print "1..10\n"; +print "1..15\n"; use encoding "latin1"; # ignored (overwritten by the next line) use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) @@ -44,8 +44,27 @@ print "ok 8\n"; print "not " unless unpack("C", chr(0xdf)) == 0xce; print "ok 9\n"; +print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; +print "ok 10\n"; + +print "not " unless unpack("U", chr(0xdf)) == 0x3af; +print "ok 11\n"; + # charnames must still work use charnames ':full'; print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; -print "ok 10\n"; +print "ok 12\n"; + +# combine + +$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); + +print "not " unless ord($c) == 0x3af; +print "ok 13\n"; + +print "not " unless ord(substr($c, 1, 1)) == 0xdf; +print "ok 14\n"; + +print "not " unless ord(substr($c, 2, 1)) == 0x3af; +print "ok 15\n"; -- 1.8.3.1