perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	package charnames;
	2
	3	our $VERSION = '1.00';
	4
	5	use bytes (); # for $bytes::hint_bits
	6	use warnings();
	7	$charnames::hint_bits = 0x20000;
	8
	9	my $txt;
	10
	11	# This is not optimized in any way yet
	12	sub charnames {
	13	$name = shift;
	14	$txt = do "unicore/Name.pl" unless $txt;
	15	my @off;
	16	if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
	17	@off = ($-[0], $+[0]);
	18	}
	19	unless (@off) {
	20	if ($^H{charnames_short} and $name =~ /^(.?):(.)/s) {
	21	my ($script, $cname) = ($1,$2);
	22	my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
	23	if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
	24	@off = ($-[0], $+[0]);
	25	}
	26	}
	27	}
	28	unless (@off) {
	29	my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
	30	for ( @{$^H{charnames_scripts}} ) {
	31	(@off = ($-[0], $+[0])), last
	32	if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
	33	}
	34	}
	35	die "Unknown charname '$name'" unless @off;
	36
	37	my $hexlen = 4; # Unicode guarantees 4-, 5-, or 6-digit format
	38	$hexlen++ while
	39	$hexlen < 6 && substr($txt, $off[0] - $hexlen - 1, 1) =~ /[0-9a-f]/;
	40	my $ord = hex substr $txt, $off[0] - $hexlen, $hexlen;
	41	if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
	42	use bytes;
	43	return chr $ord if $ord <= 255;
	44	my $hex = sprintf '%X=0%o', $ord, $ord;
	45	my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
	46	die "Character 0x$hex with name '$fname' is above 0xFF";
	47	}
	48	return chr $ord;
	49	}
	50
	51	sub import {
	52	shift;
	53	die "`use charnames' needs explicit imports list" unless @_;
	54	$^H \|= $charnames::hint_bits;
	55	$^H{charnames} = \&charnames ;
	56	my %h;
	57	@h{@_} = (1) x @_;
	58	$^H{charnames_full} = delete $h{':full'};
	59	$^H{charnames_short} = delete $h{':short'};
	60	$^H{charnames_scripts} = [map uc, keys %h];
	61	if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) {
	62	$txt = do "unicore/Name.pl" unless $txt;
	63	for (@{$^H{charnames_scripts}}) {
	64	warnings::warn('utf8', "No such script: '$_'") unless
	65	$txt =~ m/\t\t$_ (?:CAPITAL \|SMALL )?LETTER /;
	66	}
	67	}
	68	}
	69
	70
	71	1;
	72	__END__
	73
	74	=head1 NAME
	75
	76	charnames - define character names for C<\N{named}> string literal escape.
	77
	78	=head1 SYNOPSIS
	79
	80	use charnames ':full';
	81	print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
	82
	83	use charnames ':short';
	84	print "\N{greek:Sigma} is an upper-case sigma.\n";
	85
	86	use charnames qw(cyrillic greek);
	87	print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
	88
	89	=head1 DESCRIPTION
	90
	91	Pragma C<use charnames> supports arguments C<:full>, C<:short> and
	92	script names. If C<:full> is present, for expansion of
	93	C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
	94	standard Unicode names of chars. If C<:short> is present, and
	95	C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
	96	as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
	97	with script name arguments, then for C<\N{CHARNAME}}> the name
	98	C<CHARNAME> is looked up as a letter in the given scripts (in the
	99	specified order).
	100
	101	For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
	102	this pragma looks for the names
	103
	104	SCRIPTNAME CAPITAL LETTER CHARNAME
	105	SCRIPTNAME SMALL LETTER CHARNAME
	106	SCRIPTNAME LETTER CHARNAME
	107
	108	in the table of standard Unicode names. If C<CHARNAME> is lowercase,
	109	then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
	110	ignored.
	111
	112	=head1 CUSTOM TRANSLATORS
	113
	114	The mechanism of translation of C<\N{...}> escapes is general and not
	115	hardwired into F<charnames.pm>. A module can install custom
	116	translations (inside the scope which C<use>s the module) with the
	117	following magic incantation:
	118
	119	use charnames (); # for $charnames::hint_bits
	120	sub import {
	121	shift;
	122	$^H \|= $charnames::hint_bits;
	123	$^H{charnames} = \&translator;
	124	}
	125
	126	Here translator() is a subroutine which takes C<CHARNAME> as an
	127	argument, and returns text to insert into the string instead of the
	128	C<\N{CHARNAME}> escape. Since the text to insert should be different
	129	in C<bytes> mode and out of it, the function should check the current
	130	state of C<bytes>-flag as in:
	131
	132	use bytes (); # for $bytes::hint_bits
	133	sub translator {
	134	if ($^H & $bytes::hint_bits) {
	135	return bytes_translator(@_);
	136	}
	137	else {
	138	return utf8_translator(@_);
	139	}
	140	}
	141
	142	=head1 BUGS
	143
	144	Since evaluation of the translation function happens in a middle of
	145	compilation (of a string literal), the translation function should not
	146	do any C<eval>s or C<require>s. This restriction should be lifted in
	147	a future version of Perl.
	148
	149	=cut