This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don't allow /\N{}/ under 're strict'
authorKarl Williamson <khw@cpan.org>
Sat, 13 Feb 2016 20:49:00 +0000 (13:49 -0700)
committerKarl Williamson <khw@cpan.org>
Fri, 19 Feb 2016 03:26:49 +0000 (20:26 -0700)
This is the one remaining empty {} that was accepted under the
experimental 'use re "strict"'.

embed.fnc
embed.h
pod/perldelta.pod
pod/perldiag.pod
proto.h
regcomp.c
t/re/reg_mesg.t

index 23e1e52..a0c7285 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2174,6 +2174,7 @@ Es        |bool   |grok_bslash_N  |NN RExC_state_t *pRExC_state               \
                                |NULLOK UV *code_point_p                    \
                                |NULLOK int* cp_count                       \
                                |NN I32 *flagp                              \
+                               |const bool strict                          \
                                |const U32 depth
 Es     |void   |reginsert      |NN RExC_state_t *pRExC_state \
                                |U8 op|NN regnode *opnd|U32 depth
diff --git a/embed.h b/embed.h
index a1368ea..a12a3e6 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define edit_distance          S_edit_distance
 #define get_ANYOF_cp_list_for_ssc(a,b) S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b)
 #define get_invlist_iter_addr  S_get_invlist_iter_addr
-#define grok_bslash_N(a,b,c,d,e,f)     S_grok_bslash_N(aTHX_ a,b,c,d,e,f)
+#define grok_bslash_N(a,b,c,d,e,f,g)   S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
 #define handle_named_backref(a,b,c,d)  S_handle_named_backref(aTHX_ a,b,c,d)
 #define handle_possible_posix(a,b,c,d) S_handle_possible_posix(aTHX_ a,b,c,d)
 #define handle_regex_sets(a,b,c,d,e)   S_handle_regex_sets(aTHX_ a,b,c,d,e)
index 5607b2e..55db093 100644 (file)
@@ -52,6 +52,12 @@ XXX For a release on a stable branch, this section aspires to be:
 
 [ List each incompatible change as a =head2 entry ]
 
+=head2 C<qr/\N{}/> now disallowed under C<use re "strict">
+
+An empty C<\N{}> makes no sense, but for backwards compatibility is
+silently accepted as doing nothing.  But now this is a fatal error under
+the experimental feature L<re/'strict' mode>.
+
 =head1 Deprecations
 
 XXX Any deprecated features, syntax, modules etc. should be listed here.
index cc27016..1e4760d 100644 (file)
@@ -7203,9 +7203,10 @@ Something Very Wrong.
 
 (F) Named Unicode character escapes (C<\N{...}>) may return a zero-length
 sequence.  Such an escape was used in an extended character class, i.e.
-C<(?[...])>, which is not permitted.  Check that the correct escape has
-been used, and the correct charnames handler is in scope.  The S<<-- HERE>
-shows whereabouts in the regular expression the problem was discovered.
+C<(?[...])>, or under C<use re 'strict'>, which is not permitted.  Check
+that the correct escape has been used, and the correct charnames handler
+is in scope.  The S<<-- HERE> shows whereabouts in the regular
+expression the problem was discovered.
 
 =back
 
diff --git a/proto.h b/proto.h
index 4f2d687..c3adf2d 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -4747,7 +4747,7 @@ PERL_STATIC_INLINE STRLEN*        S_get_invlist_iter_addr(SV* invlist)
 #define PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR \
        assert(invlist)
 
-STATIC bool    S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** nodep, UV *code_point_p, int* cp_count, I32 *flagp, const U32 depth);
+STATIC bool    S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
 #define PERL_ARGS_ASSERT_GROK_BSLASH_N \
        assert(pRExC_state); assert(flagp)
 PERL_STATIC_INLINE regnode*    S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
index f3b185c..cb06174 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -11394,6 +11394,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                 UV * code_point_p,
                 int * cp_count,
                 I32 * flagp,
+                const bool strict,
                 const U32 depth
     )
 {
@@ -11543,6 +11544,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                                         semantics */
 
     if (endbrace == RExC_parse) {   /* empty: \N{} */
+        if (strict) {
+            RExC_parse++;   /* Position after the "}" */
+            vFAIL("Zero length \\N{}");
+        }
         if (cp_count) {
             *cp_count = 0;
         }
@@ -12422,6 +12427,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                               NULL,     /* Don't need a count of how many code
                                            points */
                               flagp,
+                              RExC_strict,
                               depth)
             ) {
                 break;
@@ -12748,6 +12754,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                             NULL,   /* Don't need a count of
                                                        how many code points */
                                             flagp,
+                                            RExC_strict,
                                             depth)
                         ) {
                             if (*flagp & NEED_UTF8)
@@ -15529,6 +15536,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                         &value,    /* Yes single value */
                                         &cp_count, /* Multiple code pt count */
                                         flagp,
+                                        strict,
                                         depth)
                     ) {
 
@@ -15541,11 +15549,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             vFAIL("\\N in a character class must be a named character: \\N{...}");
                         }
                         else if (cp_count == 0) {
-                            if (strict) {
-                                RExC_parse++;   /* Position after the "}" */
-                                vFAIL("Zero length \\N{}");
-                            }
-                            else if (PASS2) {
+                            if (PASS2) {
                                 ckWARNreg(RExC_parse,
                                         "Ignoring zero length \\N{} in character class");
                             }
index 6ec5d94..d05922e 100644 (file)
@@ -295,7 +295,9 @@ my @death_only_under_strict = (
     'm/[\x{ABCDEFG}]/' => 'Illegal hexadecimal digit \'G\' ignored',
                        => 'Non-hex character {#} m/[\x{ABCDEFG{#}}]/',
     'm/[\N{}]/' => 'Ignoring zero length \\N{} in character class {#} m/[\\N{}{#}]/',
-                => 'Zero length \\N{} {#} m/[\\N{}]{#}/',
+                => 'Zero length \\N{} {#} m/[\\N{}{#}]/',
+    'm/\N{}/' => "",
+                => 'Zero length \\N{} {#} m/\\N{}{#}/',
     "m'[\\y]\\x{100}'" => 'Unrecognized escape \y in character class passed through {#} m/[\y{#}]\x{100}/',
                        => 'Unrecognized escape \y in character class {#} m/[\y{#}]\x{100}/',
     'm/[a-\d]\x{100}/' => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',