## It could be a user-defined property.
##
- my $caller1 = caller(1);
+ my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1);
if (defined $caller1 && $type =~ /^(?:\w+)$/) {
my $prop = $caller1 . "::" . ( $wasIs ? "Is" : "" ) . $type;
if (defined $caller0 && $type =~ /^To(?:\w+)$/) {
my $map = $caller0 . "::" . $type;
+
if (exists &{$map}) {
no strict 'refs';
my $char = $1;
my $name = $2;
print STDERR "$1 => $2\n" if DEBUG;
- if ($char =~ /[-+!]/) {
+ if ($char =~ /[-+!&]/) {
my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really
my $subobj;
if ($c eq 'utf8') {
- $subobj = $c->SWASHNEW($t, "", 0, 0, 0);
+ $subobj = utf8->SWASHNEW($t, "", 0, 0, 0);
+ }
+ elsif (exists &$name) {
+ $subobj = utf8->SWASHNEW($name, "", 0, 0, 0);
}
elsif ($c =~ /^([0-9a-fA-F]+)/) {
$subobj = utf8->SWASHNEW("", $c, 0, 0, 0);
}
for my $x ($self->{EXTRAS}) {
pos $x = 0;
- while ($x =~ /^([-+!])(.*)/mg) {
+ while ($x =~ /^([-+!&])(.*)/mg) {
my $char = $1;
my $name = $2;
print STDERR "INDIRECT $1 $2\n" if DEBUG;
}
}
}
+ elsif ($char eq '&') {
+ if ($bits == 1 and $otherbits == 1) {
+ $swatch &= $other;
+ }
+ else {
+ for ($key = 0; $key < $len; $key++) {
+ if (!vec($other, $key, $otherbits)) {
+ vec($swatch, $key, $bits) = 0;
+ }
+ }
+ }
+ }
}
}
if (DEBUG) {
=head2 User-Defined Character Properties
You can define your own character properties by defining subroutines
-whose names begin with "In" or "Is". The subroutines must be defined
-in the C<main> package. The user-defined properties can be used in the
-regular expression C<\p> and C<\P> constructs. Note that the effect
-is compile-time and immutable once defined.
+whose names begin with "In" or "Is". The subroutines can be defined in
+any package. The user-defined properties can be used in the regular
+expression C<\p> and C<\P> constructs; if you are using a user-defined
+property from a package other than the one you are in, you must specify
+its package in the C<\p> or C<\P> construct.
+
+ # assuming property IsForeign defined in Lang::
+ package main; # property package name required
+ if ($txt =~ /\p{Lang::IsForeign}+/) { ... }
+
+ package Lang; # property package name not required
+ if ($txt =~ /\p{IsForeign}+/) { ... }
+
+
+Note that the effect is compile-time and immutable once defined.
The subroutines must return a specially-formatted string, with one
or more newline-separated lines. Each line must be one of the following:
=item *
Something to include, prefixed by "+": a built-in character
-property (prefixed by "utf8::"), to represent all the characters in that
-property; two hexadecimal code points for a range; or a single
-hexadecimal code point.
+property (prefixed by "utf8::") or a user-defined character property,
+to represent all the characters in that property; two hexadecimal code
+points for a range; or a single hexadecimal code point.
=item *
Something to exclude, prefixed by "-": an existing character
-property (prefixed by "utf8::"), for all the characters in that
-property; two hexadecimal code points for a range; or a single
-hexadecimal code point.
+property (prefixed by "utf8::") or a user-defined character property,
+to represent all the characters in that property; two hexadecimal code
+points for a range; or a single hexadecimal code point.
=item *
Something to negate, prefixed "!": an existing character
-property (prefixed by "utf8::") for all the characters except the
-characters in the property; two hexadecimal code points for a range;
-or a single hexadecimal code point.
+property (prefixed by "utf8::") or a user-defined character property,
+to represent all the characters in that property; two hexadecimal code
+points for a range; or a single hexadecimal code point.
+
+=item *
+
+Something to intersect with, prefixed by "&": an existing character
+property (prefixed by "utf8::") or a user-defined character property,
+for all the characters except the characters in the property; two
+hexadecimal code points for a range; or a single hexadecimal code point.
=back
END
}
+Intersection is useful for getting the common characters matched by
+two (or more) classes.
+
+ sub InFooAndBar {
+ return <<'END';
+ +main::Foo
+ &main::Bar
+ END
+ }
+
+It's important to remember not to use "&" for the first set -- that
+would be intersecting with nothing (resulting in an empty set).
+
You can also define your own mappings to be used in the lc(),
lcfirst(), uc(), and ucfirst() (or their string-inlined versions).
The principle is the same: define subroutines in the C<main> package
--- /dev/null
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = qw(../lib .);
+ require "test.pl";
+}
+
+plan tests => 4;
+
+sub MyUniClass {
+ <<END;
+0030 004F
+END
+}
+
+sub Other::Class {
+ <<END;
+0040 005F
+END
+}
+
+sub A::B::Intersection {
+ <<END;
++main::MyUniClass
+&Other::Class
+END
+}
+
+
+my $str = join "", map chr($_), 0x20 .. 0x6F;
+
+# make sure it finds built-in class
+is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
+
+# make sure it finds user-defined class
+is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
+
+# make sure it finds class in other package
+is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
+
+# make sure it finds class in other OTHER package
+is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');