This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
bytes::length TIMTOWTDI
[perl5.git] / ext / Encode / Encode.pm
CommitLineData
2c674647 1package Encode;
51ef4e11 2use strict;
af1f55d9 3our $VERSION = do { my @r = (q$Revision: 1.60 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5129552c 4our $DEBUG = 0;
6d1c0808
JH
5use XSLoader ();
6XSLoader::load 'Encode';
2c674647 7
2c674647 8require Exporter;
6d1c0808 9our @ISA = qw(Exporter);
2c674647 10
4411f3b6 11# Public, encouraged API is exported by default
85982a32
JH
12
13our @EXPORT = qw(
14 decode decode_utf8 encode encode_utf8
15 encodings find_encoding
4411f3b6
NIS
16);
17
af1f55d9
JH
18our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
19 PERLQQ HTMLCREF XMLCREF);
20our @FB_CONSTS = qw(FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
21 FB_PERLQQ FB_HTMLCREF FB_XMLCREF);
85982a32 22
51ef4e11 23our @EXPORT_OK =
6d1c0808 24 (
85982a32
JH
25 qw(
26 _utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
27 is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
28 ),
29 @FB_FLAGS, @FB_CONSTS,
30 );
31
6d1c0808 32our %EXPORT_TAGS =
85982a32
JH
33 (
34 all => [ @EXPORT, @EXPORT_OK ],
35 fallbacks => [ @FB_CONSTS ],
36 fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
37 );
38
4411f3b6 39# Documentation moved after __END__ for speed - NI-S
2c674647 40
bf230f3d
NIS
41use Carp;
42
a63c962f 43our $ON_EBCDIC = (ord("A") == 193);
f2a2953c 44
5d030b67
JH
45use Encode::Alias;
46
5129552c
JH
47# Make a %Encoding package variable to allow a certain amount of cheating
48our %Encoding;
aae85ceb
DK
49our %ExtModule;
50require Encode::Config;
51eval { require Encode::ConfigLocal };
5129552c 52
656753f8
NIS
53sub encodings
54{
5129552c 55 my $class = shift;
071db25d 56 my @modules = (@_ and $_[0] eq ":all") ? values %ExtModule : @_;
c731e18e
JH
57 for my $mod (@modules){
58 $mod =~ s,::,/,g or $mod = "Encode/$mod";
6d1c0808 59 $mod .= '.pm';
c731e18e
JH
60 $DEBUG and warn "about to require $mod;";
61 eval { require $mod; };
5129552c 62 }
c731e18e 63 my %modules = map {$_ => 1} @modules;
5129552c 64 return
ce912cd4
JH
65 sort { lc $a cmp lc $b }
66 grep {!/^(?:Internal|Unicode)$/o} keys %Encoding;
51ef4e11
NIS
67}
68
85982a32 69sub perlio_ok{
0ab8f81e 70 my $obj = ref($_[0]) ? $_[0] : find_encoding($_[0]);
011b2d2f 71 $obj->can("perlio_ok") and return $obj->perlio_ok();
0ab8f81e 72 return 0; # safety net
85982a32
JH
73}
74
51ef4e11
NIS
75sub define_encoding
76{
18586f54
NIS
77 my $obj = shift;
78 my $name = shift;
5129552c 79 $Encoding{$name} = $obj;
18586f54
NIS
80 my $lc = lc($name);
81 define_alias($lc => $obj) unless $lc eq $name;
82 while (@_)
83 {
84 my $alias = shift;
85 define_alias($alias,$obj);
86 }
87 return $obj;
656753f8
NIS
88}
89
656753f8
NIS
90sub getEncoding
91{
dd9703c9 92 my ($class,$name,$skip_external) = @_;
18586f54
NIS
93 my $enc;
94 if (ref($name) && $name->can('new_sequence'))
95 {
96 return $name;
97 }
98 my $lc = lc $name;
5129552c 99 if (exists $Encoding{$name})
18586f54 100 {
5129552c 101 return $Encoding{$name};
18586f54 102 }
5129552c 103 if (exists $Encoding{$lc})
18586f54 104 {
5129552c 105 return $Encoding{$lc};
18586f54 106 }
c50d192e 107
5129552c 108 my $oc = $class->find_alias($name);
c50d192e
AT
109 return $oc if defined $oc;
110
5129552c 111 $oc = $class->find_alias($lc) if $lc ne $name;
c50d192e
AT
112 return $oc if defined $oc;
113
c731e18e 114 unless ($skip_external)
d1ed7747 115 {
c731e18e
JH
116 if (my $mod = $ExtModule{$name} || $ExtModule{$lc}){
117 $mod =~ s,::,/,g ; $mod .= '.pm';
118 eval{ require $mod; };
119 return $Encoding{$name} if exists $Encoding{$name};
120 }
d1ed7747 121 }
18586f54 122 return;
656753f8
NIS
123}
124
4411f3b6
NIS
125sub find_encoding
126{
dd9703c9
AT
127 my ($name,$skip_external) = @_;
128 return __PACKAGE__->getEncoding($name,$skip_external);
4411f3b6
NIS
129}
130
fcb875d4
JH
131sub resolve_alias {
132 my $obj = find_encoding(shift);
133 defined $obj and return $obj->name;
134 return;
135}
136
b2704119 137sub encode($$;$)
4411f3b6 138{
18586f54 139 my ($name,$string,$check) = @_;
b2704119 140 $check ||=0;
18586f54
NIS
141 my $enc = find_encoding($name);
142 croak("Unknown encoding '$name'") unless defined $enc;
143 my $octets = $enc->encode($string,$check);
144 return undef if ($check && length($string));
145 return $octets;
4411f3b6
NIS
146}
147
b2704119 148sub decode($$;$)
4411f3b6 149{
18586f54 150 my ($name,$octets,$check) = @_;
b2704119 151 $check ||=0;
18586f54
NIS
152 my $enc = find_encoding($name);
153 croak("Unknown encoding '$name'") unless defined $enc;
154 my $string = $enc->decode($octets,$check);
155 $_[1] = $octets if $check;
156 return $string;
4411f3b6
NIS
157}
158
b2704119 159sub from_to($$$;$)
4411f3b6 160{
18586f54 161 my ($string,$from,$to,$check) = @_;
b2704119 162 $check ||=0;
18586f54
NIS
163 my $f = find_encoding($from);
164 croak("Unknown encoding '$from'") unless defined $f;
165 my $t = find_encoding($to);
166 croak("Unknown encoding '$to'") unless defined $t;
167 my $uni = $f->decode($string,$check);
168 return undef if ($check && length($string));
a999c27c 169 $string = $t->encode($uni,$check);
18586f54 170 return undef if ($check && length($uni));
3ef515df 171 return defined($_[0] = $string) ? length($string) : undef ;
4411f3b6
NIS
172}
173
b2704119 174sub encode_utf8($)
4411f3b6 175{
18586f54 176 my ($str) = @_;
c731e18e 177 utf8::encode($str);
18586f54 178 return $str;
4411f3b6
NIS
179}
180
b2704119 181sub decode_utf8($)
4411f3b6 182{
18586f54
NIS
183 my ($str) = @_;
184 return undef unless utf8::decode($str);
185 return $str;
5ad8ef52
NIS
186}
187
f2a2953c
JH
188predefine_encodings();
189
190#
191# This is to restore %Encoding if really needed;
192#
193sub predefine_encodings{
6d1c0808 194 if ($ON_EBCDIC) {
f2a2953c
JH
195 # was in Encode::UTF_EBCDIC
196 package Encode::UTF_EBCDIC;
197 *name = sub{ shift->{'Name'} };
198 *new_sequence = sub{ return $_[0] };
af1f55d9
JH
199 *needs_lines = sub{ 0 };
200 *perlio_ok = sub {
201 eval{ require PerlIO::encoding };
202 return $@ ? 0 : 1;
203 };
f2a2953c
JH
204 *decode = sub{
205 my ($obj,$str,$chk) = @_;
206 my $res = '';
207 for (my $i = 0; $i < length($str); $i++) {
6d1c0808 208 $res .=
f2a2953c
JH
209 chr(utf8::unicode_to_native(ord(substr($str,$i,1))));
210 }
211 $_[1] = '' if $chk;
212 return $res;
213 };
214 *encode = sub{
215 my ($obj,$str,$chk) = @_;
216 my $res = '';
217 for (my $i = 0; $i < length($str); $i++) {
6d1c0808 218 $res .=
f2a2953c
JH
219 chr(utf8::native_to_unicode(ord(substr($str,$i,1))));
220 }
221 $_[1] = '' if $chk;
222 return $res;
223 };
6d1c0808 224 $Encode::Encoding{Unicode} =
c731e18e 225 bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
6d1c0808 226 } else {
f2a2953c
JH
227 # was in Encode::UTF_EBCDIC
228 package Encode::Internal;
229 *name = sub{ shift->{'Name'} };
230 *new_sequence = sub{ return $_[0] };
af1f55d9
JH
231 *needs_lines = sub{ 0 };
232 *perlio_ok = sub {
233 eval{ require PerlIO::encoding };
234 return $@ ? 0 : 1;
235 };
f2a2953c
JH
236 *decode = sub{
237 my ($obj,$str,$chk) = @_;
238 utf8::upgrade($str);
239 $_[1] = '' if $chk;
240 return $str;
241 };
242 *encode = \&decode;
6d1c0808 243 $Encode::Encoding{Unicode} =
c731e18e 244 bless {Name => "Internal"} => "Encode::Internal";
f2a2953c
JH
245 }
246
247 {
248 # was in Encode::utf8
249 package Encode::utf8;
250 *name = sub{ shift->{'Name'} };
251 *new_sequence = sub{ return $_[0] };
af1f55d9
JH
252 *needs_lines = sub{ 0 };
253 *perlio_ok = sub {
254 eval{ require PerlIO::encoding };
255 return $@ ? 0 : 1;
256 };
f2a2953c
JH
257 *decode = sub{
258 my ($obj,$octets,$chk) = @_;
259 my $str = Encode::decode_utf8($octets);
260 if (defined $str) {
261 $_[1] = '' if $chk;
262 return $str;
263 }
264 return undef;
265 };
266 *encode = sub {
267 my ($obj,$string,$chk) = @_;
268 my $octets = Encode::encode_utf8($string);
269 $_[1] = '' if $chk;
270 return $octets;
271 };
0ab8f81e 272 $Encode::Encoding{utf8} =
c731e18e 273 bless {Name => "utf8"} => "Encode::utf8";
f2a2953c 274 }
f2a2953c
JH
275}
276
656753f8
NIS
2771;
278
2a936312
NIS
279__END__
280
4411f3b6
NIS
281=head1 NAME
282
283Encode - character encodings
284
285=head1 SYNOPSIS
286
287 use Encode;
288
67d7b5ef
JH
289=head2 Table of Contents
290
0ab8f81e 291Encode consists of a collection of modules whose details are too big
67d7b5ef 292to fit in one document. This POD itself explains the top-level APIs
6d1c0808 293and general topics at a glance. For other topics and more details,
0ab8f81e 294see the PODs below:
67d7b5ef
JH
295
296 Name Description
297 --------------------------------------------------------
6d1c0808 298 Encode::Alias Alias definitions to encodings
67d7b5ef
JH
299 Encode::Encoding Encode Implementation Base Class
300 Encode::Supported List of Supported Encodings
301 Encode::CN Simplified Chinese Encodings
302 Encode::JP Japanese Encodings
303 Encode::KR Korean Encodings
304 Encode::TW Traditional Chinese Encodings
305 --------------------------------------------------------
306
4411f3b6
NIS
307=head1 DESCRIPTION
308
47bfe92f 309The C<Encode> module provides the interfaces between Perl's strings
67d7b5ef
JH
310and the rest of the system. Perl strings are sequences of
311B<characters>.
312
313The repertoire of characters that Perl can represent is at least that
314defined by the Unicode Consortium. On most platforms the ordinal
315values of the characters (as returned by C<ord(ch)>) is the "Unicode
316codepoint" for the character (the exceptions are those platforms where
317the legacy encoding is some variant of EBCDIC rather than a super-set
318of ASCII - see L<perlebcdic>).
319
0ab8f81e 320Traditionally, computer data has been moved around in 8-bit chunks
67d7b5ef
JH
321often called "bytes". These chunks are also known as "octets" in
322networking standards. Perl is widely used to manipulate data of many
323types - not only strings of characters representing human or computer
0ab8f81e 324languages but also "binary" data being the machine's representation of
67d7b5ef
JH
325numbers, pixels in an image - or just about anything.
326
0ab8f81e 327When Perl is processing "binary data", the programmer wants Perl to
67d7b5ef 328process "sequences of bytes". This is not a problem for Perl - as a
0ab8f81e 329byte has 256 possible values, it easily fits in Perl's much larger
67d7b5ef
JH
330"logical character".
331
332=head2 TERMINOLOGY
4411f3b6 333
67d7b5ef 334=over 4
21938dfa 335
67d7b5ef
JH
336=item *
337
338I<character>: a character in the range 0..(2**32-1) (or more).
339(What Perl's strings are made of.)
340
341=item *
342
343I<byte>: a character in the range 0..255
344(A special case of a Perl character.)
345
346=item *
347
348I<octet>: 8 bits of data, with ordinal values 0..255
0ab8f81e 349(Term for bytes passed to or from a non-Perl context, e.g. a disk file.)
67d7b5ef
JH
350
351=back
4411f3b6 352
67d7b5ef
JH
353The marker [INTERNAL] marks Internal Implementation Details, in
354general meant only for those who think they know what they are doing,
355and such details may change in future releases.
356
357=head1 PERL ENCODING API
4411f3b6
NIS
358
359=over 4
360
f2a2953c 361=item $octets = encode(ENCODING, $string[, CHECK])
4411f3b6 362
0ab8f81e 363Encodes a string from Perl's internal form into I<ENCODING> and returns
67d7b5ef 364a sequence of octets. ENCODING can be either a canonical name or
0ab8f81e
JH
365an alias. For encoding names and aliases, see L</"Defining Aliases">.
366For CHECK, see L</"Handling Malformed Data">.
4411f3b6 367
0ab8f81e 368For example, to convert (internally UTF-8 encoded) Unicode string to
6d1c0808 369iso-8859-1 (also known as Latin1),
681a7c68 370
67d7b5ef 371 $octets = encode("iso-8859-1", $unicode);
681a7c68 372
f2a2953c 373=item $string = decode(ENCODING, $octets[, CHECK])
4411f3b6 374
0ab8f81e
JH
375Decodes a sequence of octets assumed to be in I<ENCODING> into Perl's
376internal form and returns the resulting string. As in encode(),
377ENCODING can be either a canonical name or an alias. For encoding names
378and aliases, see L</"Defining Aliases">. For CHECK, see
47bfe92f
JH
379L</"Handling Malformed Data">.
380
0ab8f81e 381For example, to convert ISO-8859-1 data to UTF-8:
681a7c68 382
67d7b5ef 383 $utf8 = decode("iso-8859-1", $latin1);
681a7c68 384
f2a2953c 385=item [$length =] from_to($string, FROM_ENCODING, TO_ENCODING [,CHECK])
47bfe92f 386
0ab8f81e
JH
387Converts B<in-place> data between two encodings.
388For example, to convert ISO-8859-1 data to UTF-8:
2b106fbe
JH
389
390 from_to($data, "iso-8859-1", "utf-8");
391
392and to convert it back:
393
394 from_to($data, "utf-8", "iso-8859-1");
4411f3b6 395
ab97ca19 396Note that because the conversion happens in place, the data to be
0ab8f81e 397converted cannot be a string constant; it must be a scalar variable.
ab97ca19 398
0ab8f81e 399from_to() returns the length of the converted string on success, undef
3ef515df
JH
400otherwise.
401
4411f3b6
NIS
402=back
403
f2a2953c
JH
404=head2 UTF-8 / utf8
405
0ab8f81e
JH
406The Unicode Consortium defines the UTF-8 transformation format as a
407way of encoding the entire Unicode repertoire as sequences of octets.
408This encoding is expected to become very widespread. Perl can use this
409form internally to represent strings, so conversions to and from this
410form are particularly efficient (as octets in memory do not have to
411change, just the meta-data that tells Perl how to treat them).
f2a2953c
JH
412
413=over 4
414
415=item $octets = encode_utf8($string);
416
0ab8f81e
JH
417The characters that comprise $string are encoded in Perl's superset of
418UTF-8 and the resulting octets are returned as a sequence of bytes. All
419possible characters have a UTF-8 representation so this function cannot
420fail.
f2a2953c
JH
421
422=item $string = decode_utf8($octets [, CHECK]);
423
424The sequence of octets represented by $octets is decoded from UTF-8
425into a sequence of logical characters. Not all sequences of octets
426form valid UTF-8 encodings, so it is possible for this call to fail.
0ab8f81e 427For CHECK, see L</"Handling Malformed Data">.
f2a2953c
JH
428
429=back
430
51ef4e11
NIS
431=head2 Listing available encodings
432
5129552c
JH
433 use Encode;
434 @list = Encode->encodings();
435
436Returns a list of the canonical names of the available encodings that
437are loaded. To get a list of all available encodings including the
438ones that are not loaded yet, say
439
440 @all_encodings = Encode->encodings(":all");
441
0ab8f81e 442Or you can give the name of a specific module.
5129552c 443
c731e18e
JH
444 @with_jp = Encode->encodings("Encode::JP");
445
446When "::" is not in the name, "Encode::" is assumed.
51ef4e11 447
c731e18e 448 @ebcdic = Encode->encodings("EBCDIC");
5d030b67 449
0ab8f81e 450To find out in detail which encodings are supported by this package,
5d030b67 451see L<Encode::Supported>.
51ef4e11
NIS
452
453=head2 Defining Aliases
454
0ab8f81e 455To add a new alias to a given encoding, use:
67d7b5ef 456
5129552c
JH
457 use Encode;
458 use Encode::Alias;
a63c962f 459 define_alias(newName => ENCODING);
51ef4e11 460
3ef515df 461After that, newName can be used as an alias for ENCODING.
f2a2953c
JH
462ENCODING may be either the name of an encoding or an
463I<encoding object>
51ef4e11 464
fcb875d4
JH
465But before you do so, make sure the alias is nonexistent with
466C<resolve_alias()>, which returns the canonical name thereof.
467i.e.
468
469 Encode::resolve_alias("latin1") eq "iso-8859-1" # true
470 Encode::resolve_alias("iso-8859-12") # false; nonexistent
471 Encode::resolve_alias($name) eq $name # true if $name is canonical
472
0ab8f81e
JH
473resolve_alias() does not need C<use Encode::Alias>; it can be
474exported via C<use Encode qw(resolve_alias)>.
fcb875d4 475
0ab8f81e 476See L<Encode::Alias> for details.
51ef4e11 477
85982a32 478=head1 Encoding via PerlIO
4411f3b6 479
0ab8f81e
JH
480If your perl supports I<PerlIO>, you can use a PerlIO layer to decode
481and encode directly via a filehandle. The following two examples
482are totally identical in their functionality.
4411f3b6 483
85982a32
JH
484 # via PerlIO
485 open my $in, "<:encoding(shiftjis)", $infile or die;
486 open my $out, ">:encoding(euc-jp)", $outfile or die;
487 while(<>){ print; }
8e86646e 488
85982a32 489 # via from_to
0ab8f81e
JH
490 open my $in, "<", $infile or die;
491 open my $out, ">", $outfile or die;
6d1c0808 492 while(<>){
0ab8f81e 493 from_to($_, "shiftjis", "euc-jp", 1);
85982a32 494 }
4411f3b6 495
0ab8f81e
JH
496Unfortunately, there may be encodings are PerlIO-savvy. You can check
497if your encoding is supported by PerlIO by calling the C<perlio_ok>
498method.
499
500 Encode::perlio_ok("hz"); # False
501 find_encoding("euc-cn")->perlio_ok; # True where PerlIO is available
502
503 use Encode qw(perlio_ok); # exported upon request
504 perlio_ok("euc-jp")
4411f3b6 505
0ab8f81e
JH
506Fortunately, all encodings that come with Encode core are PerlIO-savvy
507except for hz and ISO-2022-kr. See L<Encode::Encoding> for details.
4411f3b6 508
0ab8f81e 509For gory details, see L<Encode::PerlIO>.
4411f3b6 510
85982a32 511=head1 Handling Malformed Data
4411f3b6 512
85982a32 513=over 4
47bfe92f 514
0ab8f81e
JH
515The I<CHECK> argument is used as follows. When you omit it,
516the behaviour is the same as if you had passed a value of 0 for
517I<CHECK>.
47bfe92f 518
85982a32 519=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
47bfe92f 520
0ab8f81e
JH
521If I<CHECK> is 0, (en|de)code will put a I<substitution character>
522in place of a malformed character. For UCM-based encodings,
523E<lt>subcharE<gt> will be used. For Unicode, "\x{FFFD}" is used.
524If the data is supposed to be UTF-8, an optional lexical warning
525(category utf8) is given.
e9692b5b 526
85982a32 527=item I<CHECK> = Encode::DIE_ON_ERROR (== 1)
e9692b5b 528
0ab8f81e
JH
529If I<CHECK> is 1, methods will die immediately with an error
530message. Therefore, when I<CHECK> is set to 1, you should trap the
531fatal error with eval{} unless you really want to let it die on error.
47bfe92f 532
85982a32 533=item I<CHECK> = Encode::FB_QUIET
47bfe92f 534
85982a32 535If I<CHECK> is set to Encode::FB_QUIET, (en|de)code will immediately
0ab8f81e
JH
536return the portion of the data that has been processed so far when
537an error occurs. The data argument will be overwritten with
538everything after that point (that is, the unprocessed part of data).
539This is handy when you have to call decode repeatedly in the case
540where your source data may contain partial multi-byte character
541sequences, for example because you are reading with a fixed-width
542buffer. Here is some sample code that does exactly this:
4411f3b6 543
85982a32
JH
544 my $data = '';
545 while(defined(read $fh, $buffer, 256)){
0ab8f81e 546 # buffer may end in a partial character so we append
85982a32
JH
547 $data .= $buffer;
548 $utf8 .= decode($encoding, $data, ENCODE::FB_QUIET);
0ab8f81e 549 # $data now contains the unprocessed partial character
85982a32 550 }
1768d7eb 551
85982a32 552=item I<CHECK> = Encode::FB_WARN
67d7b5ef 553
0ab8f81e
JH
554This is the same as above, except that it warns on error. Handy when
555you are debugging the mode above.
85982a32
JH
556
557=item perlqq mode (I<CHECK> = Encode::FB_PERLQQ)
558
af1f55d9
JH
559=item HTML charref mode (I<CHECK> = Encode::FB_HTMLCREF)
560
561=item XML charref mode (I<CHECK> = Encode::FB_XMLCREF)
562
85982a32
JH
563For encodings that are implemented by Encode::XS, CHECK ==
564Encode::FB_PERLQQ turns (en|de)code into C<perlqq> fallback mode.
565
0ab8f81e
JH
566When you decode, '\xI<XX>' will be inserted for a malformed character,
567where I<XX> is the hex representation of the octet that could not be
568decoded to utf8. And when you encode, '\x{I<xxxx>}' will be inserted,
569where I<xxxx> is the Unicode ID of the character that cannot be found
570in the character repertoire of the encoding.
85982a32 571
af1f55d9
JH
572HTML/XML character reference modes are about the same, in place of
573\x{I<xxxx>}, HTML uses &#I<1234>; where I<1234> is a decimal digit and
574XML uses &#xI<abcd>; where I<abcd> is the hexadecimal digit.
575
85982a32
JH
576=item The bitmask
577
0ab8f81e
JH
578These modes are actually set via a bitmask. Here is how the FB_XX
579constants are laid out. You can import the FB_XX constants via
580C<use Encode qw(:fallbacks)>; you can import the generic bitmask
581constants via C<use Encode qw(:fallback_all)>.
85982a32 582
b0b300a3
JH
583 FB_DEFAULT FB_CROAK FB_QUIET FB_WARN FB_PERLQQ
584 DIE_ON_ERR 0x0001 X
585 WARN_ON_ER 0x0002 X
586 RETURN_ON_ERR 0x0004 X X
587 LEAVE_SRC 0x0008
588 PERLQQ 0x0100 X
af1f55d9
JH
589 HTMLCREF 0x0200
590 XMLCREF 0x0400
67d7b5ef 591
0ab8f81e 592=head2 Unimplemented fallback schemes
67d7b5ef 593
0ab8f81e 594In the future, you will be able to use a code reference to a callback
f2a2953c 595function for the value of I<CHECK> but its API is still undecided.
67d7b5ef
JH
596
597=head1 Defining Encodings
598
599To define a new encoding, use:
600
601 use Encode qw(define_alias);
602 define_encoding($object, 'canonicalName' [, alias...]);
603
604I<canonicalName> will be associated with I<$object>. The object
0ab8f81e 605should provide the interface described in L<Encode::Encoding>.
67d7b5ef 606If more than two arguments are provided then additional
0ab8f81e 607arguments are taken as aliases for I<$object>, as for C<define_alias>.
67d7b5ef 608
f2a2953c
JH
609See L<Encode::Encoding> for more details.
610
4411f3b6
NIS
611=head1 Messing with Perl's Internals
612
47bfe92f 613The following API uses parts of Perl's internals in the current
0ab8f81e 614implementation. As such, they are efficient but may change.
4411f3b6
NIS
615
616=over 4
617
a63c962f 618=item is_utf8(STRING [, CHECK])
4411f3b6 619
0ab8f81e 620[INTERNAL] Tests whether the UTF-8 flag is turned on in the STRING.
47bfe92f
JH
621If CHECK is true, also checks the data in STRING for being well-formed
622UTF-8. Returns true if successful, false otherwise.
4411f3b6 623
a63c962f 624=item _utf8_on(STRING)
4411f3b6 625
0ab8f81e 626[INTERNAL] Turns on the UTF-8 flag in STRING. The data in STRING is
4411f3b6
NIS
627B<not> checked for being well-formed UTF-8. Do not use unless you
628B<know> that the STRING is well-formed UTF-8. Returns the previous
0ab8f81e
JH
629state of the UTF-8 flag (so please don't treat the return value as
630indicating success or failure), or C<undef> if STRING is not a string.
4411f3b6 631
a63c962f 632=item _utf8_off(STRING)
4411f3b6 633
0ab8f81e
JH
634[INTERNAL] Turns off the UTF-8 flag in STRING. Do not use frivolously.
635Returns the previous state of the UTF-8 flag (so please don't treat the
636return value as indicating success or failure), or C<undef> if STRING is
4411f3b6
NIS
637not a string.
638
639=back
640
641=head1 SEE ALSO
642
5d030b67
JH
643L<Encode::Encoding>,
644L<Encode::Supported>,
6d1c0808 645L<Encode::PerlIO>,
5d030b67 646L<encoding>,
6d1c0808
JH
647L<perlebcdic>,
648L<perlfunc/open>,
649L<perlunicode>,
650L<utf8>,
5d030b67 651the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt>
4411f3b6 652
85982a32 653=head1 MAINTAINER
aae85ceb
DK
654
655This project was originated by Nick Ing-Simmons and later maintained
0ab8f81e 656by Dan Kogai E<lt>dankogai@dan.co.jpE<gt>. See AUTHORS for a full list
aae85ceb
DK
657of people involved. For any questions, use
658E<lt>perl-unicode@perl.orgE<gt> so others can share.
659
4411f3b6 660=cut