perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	package bytes;
	2
	3	our $VERSION = '1.01';
	4
	5	$bytes::hint_bits = 0x00000008;
	6
	7	sub import {
	8	$^H \|= $bytes::hint_bits;
	9	}
	10
	11	sub unimport {
	12	$^H &= ~$bytes::hint_bits;
	13	}
	14
	15	sub AUTOLOAD {
	16	require "bytes_heavy.pl";
	17	goto &$AUTOLOAD;
	18	}
	19
	20	sub length ($);
	21	sub chr ($);
	22	sub ord ($);
	23	sub substr ($$;$$);
	24	sub index ($$;$);
	25	sub rindex ($$;$);
	26
	27	1;
	28	__END__
	29
	30	=head1 NAME
	31
	32	bytes - Perl pragma to force byte semantics rather than character semantics
	33
	34	=head1 SYNOPSIS
	35
	36	use bytes;
	37	... chr(...); # or bytes::chr
	38	... index(...); # or bytes::index
	39	... length(...); # or bytes::length
	40	... ord(...); # or bytes::ord
	41	... rindex(...); # or bytes::rindex
	42	... substr(...); # or bytes::substr
	43	no bytes;
	44
	45
	46	=head1 DESCRIPTION
	47
	48	The C<use bytes> pragma disables character semantics for the rest of the
	49	lexical scope in which it appears. C<no bytes> can be used to reverse
	50	the effect of C<use bytes> within the current lexical scope.
	51
	52	Perl normally assumes character semantics in the presence of character
	53	data (i.e. data that has come from a source that has been marked as
	54	being of a particular character encoding). When C<use bytes> is in
	55	effect, the encoding is temporarily ignored, and each string is treated
	56	as a series of bytes.
	57
	58	As an example, when Perl sees C<$x = chr(400)>, it encodes the character
	59	in UTF-8 and stores it in $x. Then it is marked as character data, so,
	60	for instance, C<length $x> returns C<1>. However, in the scope of the
	61	C<bytes> pragma, $x is treated as a series of bytes - the bytes that make
	62	up the UTF8 encoding - and C<length $x> returns C<2>:
	63
	64	$x = chr(400);
	65	print "Length is ", length $x, "\n"; # "Length is 1"
	66	printf "Contents are %vd\n", $x; # "Contents are 400"
	67	{
	68	use bytes; # or "require bytes; bytes::length()"
	69	print "Length is ", length $x, "\n"; # "Length is 2"
	70	printf "Contents are %vd\n", $x; # "Contents are 198.144"
	71	}
	72
	73	chr(), ord(), substr(), index() and rindex() behave similarly.
	74
	75	For more on the implications and differences between character
	76	semantics and byte semantics, see L<perluniintro> and L<perlunicode>.
	77
	78	=head1 LIMITATIONS
	79
	80	bytes::substr() does not work as an lvalue().
	81
	82	=head1 SEE ALSO
	83
	84	L<perluniintro>, L<perlunicode>, L<utf8>
	85
	86	=cut