Upgrade to Encode-2.21
authorSteve Peters <steve@fisharerojo.org>
Sun, 13 May 2007 16:47:40 +0000 (16:47 +0000)
committerSteve Peters <steve@fisharerojo.org>
Sun, 13 May 2007 16:47:40 +0000 (16:47 +0000)
p4raw-id: //depot/perl@31212

MANIFEST
ext/Encode/Encode.pm
ext/Encode/Encode.xs
ext/Encode/Unicode/Unicode.pm
ext/Encode/encoding.pm
ext/Encode/lib/Encode/Encoding.pm
ext/Encode/lib/Encode/MIME/Name.pm [new file with mode: 0644]
ext/Encode/t/mime-name.t [new file with mode: 0644]

index cc8d430..f04de0b 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -483,6 +483,7 @@ ext/Encode/lib/Encode/JP/JIS7.pm    Encode extension
 ext/Encode/lib/Encode/KR/2022_KR.pm     Encode extension
 ext/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm       Encode extension
 ext/Encode/lib/Encode/MIME/Header.pm   Encode extension
+ext/Encode/lib/Encode/MIME/Name.pm     Encode extension
 ext/Encode/lib/Encode/PerlIO.pod       Documents for Encode & PerlIO
 ext/Encode/lib/Encode/Supported.pod    Documents for supported encodings
 ext/Encode/lib/Encode/Unicode/UTF7.pm  Encode extension
@@ -524,6 +525,7 @@ ext/Encode/t/ksc5601.enc    test data
 ext/Encode/t/ksc5601.utf       test data
 ext/Encode/t/mime_header_iso2022jp.t   test script
 ext/Encode/t/mime-header.t     test script
+ext/Encode/t/mime-name.t       test script
 ext/Encode/t/Mod_EUCJP.pm      module that t/enc_module.enc uses
 ext/Encode/t/perlio.t          test script
 ext/Encode/t/rt.pl             test script
index c52e7c4..74c7e3f 100644 (file)
@@ -1,10 +1,10 @@
 #
-# $Id: Encode.pm,v 2.20 2007/04/22 14:56:12 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.21 2007/05/12 06:42:19 dankogai Exp dankogai $
 #
 package Encode;
 use strict;
 use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.20 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.21 $ =~ /(\d+)/g;
 sub DEBUG () { 0 }
 use XSLoader ();
 XSLoader::load( __PACKAGE__, $VERSION );
@@ -565,6 +565,22 @@ exported via C<use Encode qw(resolve_alias)>.
 
 See L<Encode::Alias> for details.
 
+=head2 Finding IANA Character Set Registry names
+
+The canonical name of a given encoding does not necessarily agree with
+IANA IANA Character Set Registry, commonly seen as C<< Content-Type:
+text/plain; charset=I<whatever> >>.  For most cases canonical names
+work but sometimes it does not (notably 'utf-8-strict').
+
+Therefore as of Encode version 2.21, a new method C<mime_name()> is added.
+
+  use Encode;
+  my $enc = find_encoding('UTF-8');
+  warn $enc->name;      # utf-8-strict
+  warn $enc->mime_name; # UTF-8
+
+See also:  L<Encode::Encoding>
+
 =head1 Encoding via PerlIO
 
 If your perl supports I<PerlIO> (which is the default), you can use a
index fe645b6..9de0a64 100644 (file)
@@ -1,5 +1,5 @@
 /*
- $Id: Encode.xs,v 2.11 2007/04/06 12:53:41 dankogai Exp $
+ $Id: Encode.xs,v 2.12 2007/05/12 06:42:19 dankogai Exp dankogai $
  */
 
 #define PERL_NO_GET_CONTEXT
@@ -650,6 +650,35 @@ CODE:
     XSRETURN(1);
 }
 
+void
+Method_mime_name(obj)
+SV *   obj
+CODE:
+{
+    encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+    SV *retval;
+    eval_pv("require Encode::MIME::Name", 0);
+
+    if (SvTRUE(get_sv("@", 0))) {
+       ST(0) = &PL_sv_undef;
+    }else{
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(sp);
+       XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0]))));
+       PUTBACK;
+       call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR);
+       SPAGAIN;
+       retval = newSVsv(POPs);
+       PUTBACK;
+       FREETMPS;
+       LEAVE;
+       /* enc->name[0] */
+       ST(0) = retval;
+    }
+    XSRETURN(1);
+}
+
 MODULE = Encode         PACKAGE = Encode
 
 PROTOTYPES: ENABLE
index 9a11d81..6e10941 100644 (file)
@@ -4,7 +4,7 @@ use strict;
 use warnings;
 no warnings 'redefine';
 
-our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
 use XSLoader;
 XSLoader::load( __PACKAGE__, $VERSION );
@@ -158,7 +158,7 @@ and as of this writing Encode suite just leave it as is (\x{FeFF}).
               16         32 bits/char
   -------------------------
   BE      0xFeFF 0x0000FeFF
-  LE      0xFFeF 0xFFFe0000
+  LE      0xFFFe 0xFFFe0000
   -------------------------
 
 =back
index 7b8eee4..5b9bdf8 100644 (file)
@@ -1,4 +1,4 @@
-# $Id: encoding.pm,v 2.6 2007/04/22 14:56:12 dankogai Exp dankogai $
+# $Id: encoding.pm,v 2.6 2007/04/22 14:56:12 dankogai Exp $
 package encoding;
 our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
index 9ebf95b..768d6d1 100644 (file)
@@ -3,7 +3,7 @@ package Encode::Encoding;
 # Base class for classes which implement encodings
 use strict;
 use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
 require Encode;
 
@@ -20,6 +20,11 @@ sub Define {
 
 sub name { return shift->{'Name'} }
 
+sub mime_name{
+    require Encode::MIME::Name;
+    return Encode::MIME::Name::get_mime_name(shift->name);
+}
+
 # sub renew { return $_[0] }
 
 sub renew {
@@ -178,6 +183,17 @@ Predefined As:
 
 MUST return the string representing the canonical name of the encoding.
 
+=item -E<gt>mime_name
+
+Predefined As:
+
+  sub mime_name{
+    require Encode::MIME::Name;
+    return Encode::MIME::Name::get_mime_name(shift->name);
+  }
+
+MUST return the string representing the IANA charset name of the encoding.
+
 =item -E<gt>renew
 
 Predefined As:
diff --git a/ext/Encode/lib/Encode/MIME/Name.pm b/ext/Encode/lib/Encode/MIME/Name.pm
new file mode 100644 (file)
index 0000000..10d86a7
--- /dev/null
@@ -0,0 +1,94 @@
+package Encode::MIME::Name;
+use strict;
+use warnings;
+our $VERSION = do { my @r = ( q$Revision: 1.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+our %MIME_NAME_OF = (
+    'AdobeStandardEncoding' => 'Adobe-Standard-Encoding',
+    'AdobeSymbol'           => 'Adobe-Symbol-Encoding',
+    'ascii'                 => 'US-ASCII',
+    'big5-hkscs'            => 'Big5-HKSCS',
+    'cp1026'                => 'IBM1026',
+    'cp1047'                => 'IBM1047',
+    'cp1250'                => 'windows-1250',
+    'cp1251'                => 'windows-1251',
+    'cp1252'                => 'windows-1252',
+    'cp1253'                => 'windows-1253',
+    'cp1254'                => 'windows-1254',
+    'cp1255'                => 'windows-1255',
+    'cp1256'                => 'windows-1256',
+    'cp1257'                => 'windows-1257',
+    'cp1258'                => 'windows-1258',
+    'cp37'                  => 'IBM037',
+    'cp424'                 => 'IBM424',
+    'cp437'                 => 'IBM437',
+    'cp500'                 => 'IBM500',
+    'cp775'                 => 'IBM775',
+    'cp850'                 => 'IBM850',
+    'cp852'                 => 'IBM852',
+    'cp855'                 => 'IBM855',
+    'cp857'                 => 'IBM857',
+    'cp860'                 => 'IBM860',
+    'cp861'                 => 'IBM861',
+    'cp862'                 => 'IBM862',
+    'cp863'                 => 'IBM863',
+    'cp864'                 => 'IBM864',
+    'cp865'                 => 'IBM865',
+    'cp866'                 => 'IBM866',
+    'cp869'                 => 'IBM869',
+    'cp936'                 => 'GBK',
+    'euc-jp'                => 'EUC-JP',
+    'euc-kr'                => 'EUC-KR',
+    #'gb2312-raw'            => 'GB2312', # no, you're wrong, I18N::Charset
+    'hp-roman8'             => 'hp-roman8',
+    'hz'                    => 'HZ-GB-2312',
+    'iso-2022-jp'           => 'ISO-2022-JP',
+    'iso-2022-jp-1'         => 'ISO-2022-JP',
+    'iso-2022-kr'           => 'ISO-2022-KR',
+    'iso-8859-1'            => 'ISO-8859-1',
+    'iso-8859-10'           => 'ISO-8859-10',
+    'iso-8859-13'           => 'ISO-8859-13',
+    'iso-8859-14'           => 'ISO-8859-14',
+    'iso-8859-15'           => 'ISO-8859-15',
+    'iso-8859-16'           => 'ISO-8859-16',
+    'iso-8859-2'            => 'ISO-8859-2',
+    'iso-8859-3'            => 'ISO-8859-3',
+    'iso-8859-4'            => 'ISO-8859-4',
+    'iso-8859-5'            => 'ISO-8859-5',
+    'iso-8859-6'            => 'ISO-8859-6',
+    'iso-8859-7'            => 'ISO-8859-7',
+    'iso-8859-8'            => 'ISO-8859-8',
+    'iso-8859-9'            => 'ISO-8859-9',
+    #'jis0201-raw'           => 'JIS_X0201',
+    #'jis0208-raw'           => 'JIS_C6226-1983',
+    #'jis0212-raw'           => 'JIS_X0212-1990',
+    'koi8-r'                => 'KOI8-R',
+    'koi8-u'                => 'KOI8-U',
+    #'ksc5601-raw'           => 'KS_C_5601-1987',
+    'shiftjis'              => 'Shift_JIS',
+    'UTF-16'                => 'UTF-16',
+    'UTF-16BE'              => 'UTF-16BE',
+    'UTF-16LE'              => 'UTF-16LE',
+    'UTF-32'                => 'UTF-32',
+    'UTF-32BE'              => 'UTF-32BE',
+    'UTF-32LE'              => 'UTF-32LE',
+    'UTF-7'                 => 'UTF-7',
+    'utf8'                  => 'UTF-8',
+    'utf-8-strict'          => 'UTF-8',
+    'viscii'                => 'VISCII',
+);
+
+sub get_mime_name($) { $MIME_NAME_OF{$_[0]} };
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::MIME::NAME -- internally used by Encode
+
+=head1 SEE ALSO
+
+L<I18N::Charset>
+
+=cut
diff --git a/ext/Encode/t/mime-name.t b/ext/Encode/t/mime-name.t
new file mode 100644 (file)
index 0000000..5346497
--- /dev/null
@@ -0,0 +1,36 @@
+#
+# $Id: mime-name.t,v 1.1 2007/05/12 06:42:19 dankogai Exp dankogai $
+# This script is written in utf8
+#
+BEGIN {
+    if ($ENV{'PERL_CORE'}){
+        chdir 't';
+        unshift @INC, '../lib';
+    }
+    require Config; import Config;
+    if ($Config{'extensions'} !~ /\bEncode\b/) {
+      print "1..0 # Skip: Encode was not built\n";
+      exit 0;
+    }
+    if (ord("A") == 193) {
+    print "1..0 # Skip: EBCDIC\n";
+    exit 0;
+    }
+    $| = 1;
+}
+
+use strict;
+use warnings;
+use Encode;
+#use Test::More qw(no_plan);
+use Test::More tests => 68;
+
+use_ok("Encode::MIME::Name");
+for my $canon ( sort keys %Encode::MIME::Name::MIME_NAME_OF ) {
+    my $enc       = find_encoding($canon);
+    my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon};
+    is $enc->mime_name, $mime_name,
+      qq(\$enc->mime_name("$canon") eq $mime_name);
+}
+
+__END__;