X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b75c8c73cd7f3c92a16e03fb046f4e2a99363bc7..677a29545230804411babf7eefdbced18ffdd71d:/lib/bytes.pm diff --git a/lib/bytes.pm b/lib/bytes.pm index 3b0268e..918c8ee 100644 --- a/lib/bytes.pm +++ b/lib/bytes.pm @@ -1,6 +1,6 @@ package bytes; -our $VERSION = '1.00'; +our $VERSION = '1.04'; $bytes::hint_bits = 0x00000008; @@ -14,10 +14,17 @@ sub unimport { sub AUTOLOAD { require "bytes_heavy.pl"; - goto &$AUTOLOAD; + goto &$AUTOLOAD if defined &$AUTOLOAD; + require Carp; + Carp::croak("Undefined subroutine $AUTOLOAD called"); } -sub length ($); +sub length (_); +sub chr (_); +sub ord (_); +sub substr ($$;$$); +sub index ($$;$); +sub rindex ($$;$); 1; __END__ @@ -26,15 +33,31 @@ __END__ bytes - Perl pragma to force byte semantics rather than character semantics +=head1 DEPRECATED + +This pragma reflects early attempts to incorporate Unicode into perl and +has since been superseded. It breaks encapsulation (i.e. it exposes the +innards of how the perl executable currently happens to store a string), +and use of this module for anything other than debugging purposes is +strongly discouraged. If you feel that the functions here within might be +useful for your application, this possibly indicates a mismatch beteen +your mental model of Perl Unicode and the current reality. In that case, +you may wish to peruse some of the perl Unicode documentation: +L, L, L and L. + =head1 SYNOPSIS use bytes; + ... chr(...); # or bytes::chr + ... index(...); # or bytes::index + ... length(...); # or bytes::length + ... ord(...); # or bytes::ord + ... rindex(...); # or bytes::rindex + ... substr(...); # or bytes::substr no bytes; -=head1 DESCRIPTION -WARNING: The implementation of Unicode support in Perl is incomplete. -See L for the exact details. +=head1 DESCRIPTION The C pragma disables character semantics for the rest of the lexical scope in which it appears. C can be used to reverse @@ -47,7 +70,7 @@ effect, the encoding is temporarily ignored, and each string is treated as a series of bytes. As an example, when Perl sees C<$x = chr(400)>, it encodes the character -in UTF8 and stores it in $x. Then it is marked as character data, so, +in UTF-8 and stores it in $x. Then it is marked as character data, so, for instance, C returns C<1>. However, in the scope of the C pragma, $x is treated as a series of bytes - the bytes that make up the UTF8 encoding - and C returns C<2>: @@ -56,16 +79,22 @@ up the UTF8 encoding - and C returns C<2>: print "Length is ", length $x, "\n"; # "Length is 1" printf "Contents are %vd\n", $x; # "Contents are 400" { - use bytes; + use bytes; # or "require bytes; bytes::length()" print "Length is ", length $x, "\n"; # "Length is 2" printf "Contents are %vd\n", $x; # "Contents are 198.144" } +chr(), ord(), substr(), index() and rindex() behave similarly. + For more on the implications and differences between character -semantics and byte semantics, see L. +semantics and byte semantics, see L and L. + +=head1 LIMITATIONS + +bytes::substr() does not work as an lvalue(). =head1 SEE ALSO -L, L +L, L, L =cut