use strict; # quote strings, declare variables
use warnings; # on by default
use warnings qw(FATAL utf8); # fatalize encoding glitches
- use open qw(:std :utf8); # undeclared streams in UTF-8
+ use open qw(:std :encoding(UTF-8)); # undeclared streams in UTF-8
use charnames qw(:full :short); # unneeded in v5.16
This I<does> make even Unix programmers C<binmode> your binary streams,
or open them with C<:raw>, but that's the only way to get at them
portably anyway.
-B<WARNING>: C<use autoload> and C<use open> do not get along with each other.
+B<WARNING>: C<use autodie> (pre 2.26) and C<use open> do not get along with each
+other.
=head2 ℞ 1: Generic Unicode-savvy filter
# cpan -i Unicode::Unihan
use Unicode::Unihan;
my $str = "東京";
- my $unhan = new Unicode::Unihan;
+ my $unhan = Unicode::Unihan->new;
for my $lang (qw(Mandarin Cantonese Korean JapaneseOn JapaneseKun)) {
printf "CJK $str in %-12s is ", $lang;
say $unhan->$lang($str);
# cpan -i Lingua::JA::Romanize::Japanese
use Lingua::JA::Romanize::Japanese;
- my $k2r = new Lingua::JA::Romanize::Japanese;
+ my $k2r = Lingua::JA::Romanize::Japanese->new;
my $str = "東京";
say "Japanese for $str is ", $k2r->chars($str);
or
$ export PERL_UNICODE=A
or
- use Encode qw(decode_utf8);
- @ARGV = map { decode_utf8($_, 1) } @ARGV;
+ use Encode qw(decode);
+ @ARGV = map { decode('UTF-8', $_, 1) } @ARGV;
=head2 ℞ 14: Decode program arguments as locale encoding
or
$ export PERL_UNICODE=S
or
- use open qw(:std :utf8);
+ use open qw(:std :encoding(UTF-8));
or
- binmode(STDIN, ":utf8");
+ binmode(STDIN, ":encoding(UTF-8)");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
or
$ export PERL_UNICODE=D
or
- use open qw(:utf8);
+ use open qw(:encoding(UTF-8));
=head2 ℞ 18: Make all I/O and args default to utf8
or
$ export PERL_UNICODE=SDA
or
- use open qw(:std :utf8);
- use Encode qw(decode_utf8);
- @ARGV = map { decode_utf8($_, 1) } @ARGV;
+ use open qw(:std :encoding(UTF-8));
+ use Encode qw(decode);
+ @ARGV = map { decode('UTF-8', $_, 1) } @ARGV;
=head2 ℞ 19: Open file with specific encoding
\p{Sk}, \p{Ps}, \p{Lt}
\p{alpha}, \p{upper}, \p{lower}
\p{Latin}, \p{Greek}
- \p{script=Latin}, \p{script=Greek}
+ \p{script_extensions=Latin}, \p{scx=Greek}
\p{East_Asian_Width=Wide}, \p{EA=W}
\p{Line_Break=Hyphen}, \p{LB=HY}
\p{Numeric_Value=4}, \p{NV=4}
use Unicode::UCD qw(num);
my $str = "got Ⅻ and ४५६७ and ⅞ and here";
my @nums = ();
- while (/$str =~ (\d+|\N)/g) { # not just ASCII!
+ while ($str =~ /(\d+|\N)/g) { # not just ASCII!
push @nums, num($1);
}
say "@nums"; # 12 4567 0.875
use charnames qw(:full);
my $para = "This is a super\N{HYPHEN}long string. " x 20;
- my $fmt = new Unicode::LineBreak;
+ my $fmt = Unicode::LineBreak->new;
print $fmt->break($para), "\n";
=head2 ℞ 42: Unicode text in DBM hashes, the tedious way
# assume $uni_key holds a normal Perl string (abstract Unicode)
my $enc_key = encode("UTF-8", $uni_key, 1);
my $enc_value = $dbhash{$enc_key};
- my $uni_value = decode("UTF-8", $enc_key, 1);
+ my $uni_value = decode("UTF-8", $enc_value, 1);
=head2 ℞ 43: Unicode text in DBM hashes, the easy way
use strict;
use warnings;
use warnings qw(FATAL utf8); # fatalize encoding faults
- use open qw(:std :utf8); # undeclared streams in UTF-8
+ use open qw(:std :encoding(UTF-8)); # undeclared streams in UTF-8
use charnames qw(:full :short); # unneeded in v5.16
# std modules
# So the Asian stuff comes out in an order that someone
# who reads those scripts won't freak out over; the
# CJK stuff will be in JIS X 0208 order that way.
- my $coll = new Unicode::Collate::Locale locale => "ja";
+ my $coll = Unicode::Collate::Locale->new(locale => "ja");
for my $item ($coll->sort(keys %price)) {
print pad(entitle($item), $width, ".");
=head1 REVISION HISTORY
v1.0.0 – first public release, 2012-02-27
-