$utf8::hint_bits = 0x00800000;
-our $VERSION = '1.14';
+our $VERSION = '1.15';
sub import {
$^H |= $utf8::hint_bits;
=item * C<$unicode = utf8::native_to_unicode($code_point)>
+(Since Perl v5.8.0)
This takes an unsigned integer (which represents the ordinal number of a
character (or a code point) on the platform the program is being run on) and
returns its Unicode equivalent value. Since ASCII platforms natively use the
A meaningless value will currently be returned if the input is not an unsigned
integer.
+Since Perl v5.22.0, calls to this function are optimized out on ASCII
+platforms, so there is no performance hit in using it there.
+
=item * C<$native = utf8::unicode_to_native($code_point)>
+(Since Perl v5.8.0)
This is the inverse of C<utf8::native_to_unicode()>, converting the other
direction. Again, on ASCII platforms, this returns its input, but on EBCDIC
platforms it will find the native platform code point, given any Unicode one.
A meaningless value will currently be returned if the input is not an unsigned
integer.
+Since Perl v5.22.0, calls to this function are optimized out on ASCII
+platforms, so there is no performance hit in using it there.
+
=item * C<$flag = utf8::is_utf8($string)>
(Since Perl 5.8.1) Test whether I<$string> is marked internally as encoded in
use strict;
my $s = "hlagh";
my $r = \$s;
- %s($r);
+ my $dummy = %s($r);
$$r;
], $func;
my $ret = eval $code or my $error = $@;
is($s, "A$utf8_bytes","(pos $pos) str after U; utf8::encode");
}
+SKIP: {
+ skip("Test only valid on ASCII platform", 1) unless $::IS_ASCII;
+ require Config;
+ skip("Test needs a B module, which is lacking in this Perl", 1)
+ if $Config::Config{'extensions'} !~ /\bB\b/;
+
+ my $out = runperl ( switches => ["-XMO=Concise"],
+ prog => 'utf8::unicode_to_native(0x41);
+ utf8::native_to_unicode(0x42)',
+ stderr => 1 );
+ unlike($out, qr/entersub/,
+ "utf8::unicode_to_native() and native_to_unicode() optimized out");
+}
+
+
# [perl #119043] utf8::upgrade should not croak on read-only COWs
for(__PACKAGE__) {
eval { utf8::upgrade($_) };
=item *
-XXX
+The functions
+C<utf8::native_to_unicode()> and
+C<utf8::unicode_to_native()> (see L<utf8>)
+are now optimized out on ASCII platforms.
+There is now not even a minimal performance hit in writing code portable
+between ASCII and EBCDIC platforms.
=back
{"re::regexp_pattern", XS_re_regexp_pattern, "$"},
};
+STATIC OP*
+optimize_out_native_convert_function(pTHX_ OP* entersubop,
+ GV* namegv,
+ SV* protosv)
+{
+ /* Optimizes out an identity function, i.e., one that just returns its
+ * argument. The passed in function is assumed to be an identity function,
+ * with no checking. This is designed to be called for utf8_to_native()
+ * and native_to_utf8() on ASCII platforms, as they just return their
+ * arguments, but it could work on any such function.
+ *
+ * The code is mostly just cargo-culted from Memoize::Lift */
+
+ OP *pushop, *argop;
+ SV* prototype = newSVpvs("$");
+
+ PERL_UNUSED_ARG(protosv);
+
+ assert(entersubop->op_type == OP_ENTERSUB);
+
+ entersubop = ck_entersub_args_proto(entersubop, namegv, prototype);
+
+ SvREFCNT_dec(prototype);
+
+ pushop = cUNOPx(entersubop)->op_first;
+ if (! pushop->op_sibling) {
+ pushop = cUNOPx(pushop)->op_first;
+ }
+ argop = pushop->op_sibling;
+
+ /* Carry on without doing the optimization if it is not something we're
+ * expecting, so continues to work */
+ if ( ! argop
+ || ! argop->op_sibling
+ || argop->op_sibling->op_sibling
+ ) {
+ return entersubop;
+ }
+
+ pushop->op_sibling = argop->op_sibling;
+ argop->op_sibling = NULL;
+ argop->op_lastsib = 1;
+
+ op_free(entersubop);
+ return argop;
+}
+
void
Perl_boot_core_UNIVERSAL(pTHX)
{
newXS_flags(xsub->name, xsub->xsub, file, xsub->proto, 0);
} while (++xsub < end);
+#ifndef EBCDIC
+ { /* On ASCII platforms these functions just return their argument, so can
+ be optimized away */
+
+ CV* to_native_cv = get_cv("utf8::unicode_to_native", 0);
+ CV* to_unicode_cv = get_cv("utf8::native_to_unicode", 0);
+
+ cv_set_call_checker(to_native_cv,
+ optimize_out_native_convert_function,
+ (SV*) to_native_cv);
+ cv_set_call_checker(to_unicode_cv,
+ optimize_out_native_convert_function,
+ (SV*) to_unicode_cv);
+ }
+#endif
+
/* Providing a Regexp::DESTROY fixes #21347. See test in t/op/ref.t */
{
CV * const cv =