Fix /\p{pkg::User-defined}/i
authorKarl Williamson <khw@cpan.org>
Mon, 15 Feb 2016 18:25:35 +0000 (11:25 -0700)
committerKarl Williamson <khw@cpan.org>
Tue, 1 Mar 2016 04:42:04 +0000 (21:42 -0700)
Prior to this commit, the parsing code got confused if a user-defined
property whose definition was not known at pattern compilation time, was
specified with an explicit package name, under /i.

pod/perldelta.pod
regcomp.c
t/re/regexp_unicode_prop.t

index 40e2a42..c0f27dd 100644 (file)
@@ -345,7 +345,12 @@ files in F<ext/> and F<lib/> are best summarized in L</Modules and Pragmata>.
 
 =item *
 
-XXX
+It now works properly to specify a user-defined property, such as
+
+ qr/\p{mypkg1::IsMyProperty}/i
+
+with C</i> caseless matching, an explicit package name, and
+I<IsMYProperty> not defined at the time of the pattern compilation.
 
 =back
 
index b71c05c..cbde013 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -15819,6 +15819,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     SV* invlist;
                     char* name;
                     char* base_name;    /* name after any packages are stripped */
+                    char* lookup_name = NULL;
                     const char * const colon_colon = "::";
 
                     /* Try to get the definition of the property into
@@ -15826,23 +15827,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * will have its name be <__NAME_i>.  The design is
                      * discussed in commit
                      * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
-                    name = savepv(Perl_form(aTHX_
-                                          "%s%.*s%s\n",
-                                          (FOLD) ? "__" : "",
-                                          (int)n,
-                                          RExC_parse,
-                                          (FOLD) ? "_i" : ""
-                                ));
+                    name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
+                    if (FOLD) {
+                        lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
+                    }
 
                     /* Look up the property name, and get its swash and
                      * inversion list, if the property is found  */
                     SvREFCNT_dec(swash); /* Free any left-overs */
-                    swash = _core_swash_init("utf8", name, &PL_sv_undef,
+                    swash = _core_swash_init("utf8",
+                                             (lookup_name)
+                                              ? lookup_name
+                                              : name,
+                                             &PL_sv_undef,
                                              1, /* binary */
                                              0, /* not tr/// */
                                              NULL, /* No inversion list */
                                              &swash_init_flags
                                             );
+                    if (lookup_name) {
+                        Safefree(lookup_name);
+                    }
                     if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                         HV* curpkg = (IN_PERL_COMPILETIME)
                                       ? PL_curstash
@@ -15909,9 +15914,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                 name = savepvn(full_name, n);
                             }
                         }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n",
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
                                         (value == 'p' ? '+' : '!'),
-                                        UTF8fARG(UTF, n, name));
+                                        (FOLD) ? "__" : "",
+                                        UTF8fARG(UTF, n, name),
+                                        (FOLD) ? "_i" : "");
                         has_user_defined_property = TRUE;
                         optimizable = FALSE;    /* Will have to leave this an
                                                    ANYOF node */
index 25bb781..b807469 100644 (file)
@@ -174,6 +174,7 @@ for (my $i = 0; $i < @CLASSES; $i += 2) {
 $count += 4 * @ILLEGAL_PROPERTIES;
 $count += 4 * grep {length $_ == 1} @ILLEGAL_PROPERTIES;
 $count += 8 * @USER_CASELESS_PROPERTIES;
+$count += 1;    # Test for pkg:IsMyLower
 
 plan(tests => $count);
 
@@ -350,6 +351,26 @@ sub IsMyUpper {
            . "\n&utf8::ASCII";
 }
 
+{   # This has to be done here and not like the others, because we have to
+    # make sure that the property is not known until after the regex is
+    # compiled.  It was previously getting confused about the pkg and /i
+    # combination
+
+    my $mylower = qr/\p{pkg::IsMyLower}/i;
+
+    sub pkg::IsMyLower {
+        my $caseless = shift;
+        return "+utf8::"
+            . (($caseless)
+                ? 'Alphabetic'
+                : 'Lowercase')
+            . "\n&utf8::ASCII";
+    }
+
+    like("A", $mylower, "Not available until runtime user-defined property with pkg:: and /i works");
+
+}
+
 # Verify that can use user-defined properties inside another one
 sub IsSyriac1KanaMark {<<'--'}
 +main::IsSyriac1