This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Introduce validate_proto / stop stripping spaces
authorPeter Martini <PeterCMartini@GMail.com>
Mon, 1 Jul 2013 22:13:42 +0000 (18:13 -0400)
committerTony Cook <tony@develop-help.com>
Fri, 5 Jul 2013 07:04:37 +0000 (17:04 +1000)
The code to do warnings on invalid prototypes was a chunk
of 70 or so lines inside the core lexer.  It also had the
side effect of removing spaces from the prototype as part
of its check for warnings.

This validation code is now just a validation, printing
out warnings if and only if warnings are enabled,
and leaving the source SV untouched.

embed.fnc
embed.h
proto.h
t/comp/proto.t
toke.c

index 5c27b27..a6c17ee 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2253,6 +2253,7 @@ s |int    |tokereport     |I32 rv|NN const YYSTYPE* lvalp
 s      |void   |printbuf       |NN const char *const fmt|NN const char *const s
 #  endif
 #endif
+EXMp   |bool   |validate_proto |NN SV *name|NULLOK SV *proto|bool warn
 
 #if defined(PERL_IN_UNIVERSAL_C)
 s      |bool|isa_lookup        |NN HV *stash|NN const char * const name \
diff --git a/embed.h b/embed.h
index 0666e6f..6f3ac5a 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define reg_temp_copy(a,b)     Perl_reg_temp_copy(aTHX_ a,b)
 #define regprop(a,b,c)         Perl_regprop(aTHX_ a,b,c)
 #define report_uninit(a)       Perl_report_uninit(aTHX_ a)
+#define validate_proto(a,b,c)  Perl_validate_proto(aTHX_ a,b,c)
 #define vivify_defelem(a)      Perl_vivify_defelem(aTHX_ a)
 #define yylex()                        Perl_yylex(aTHX)
 #  if defined(DEBUGGING)
diff --git a/proto.h b/proto.h
index 9a06590..2389ed8 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -4738,6 +4738,11 @@ PERL_CALLCONV UV Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
 #define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI   \
        assert(s)
 
+PERL_CALLCONV bool     Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALIDATE_PROTO        \
+       assert(name)
+
 PERL_CALLCONV int      Perl_vcmp(pTHX_ SV *lhv, SV *rhv)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
index 7cbe174..d472cd3 100644 (file)
@@ -680,7 +680,7 @@ for my $p ( "", qw{ () ($) ($@) ($%) ($;$) (&) (&\@) (&@) (%) (\%) (\@) } ) {
   print "ok ", $i++, "\n";
   
   eval 'sub badproto4 (@ $b ar) { 1; }';
-  print "not " unless $warn =~ /Illegal character in prototype for main::badproto4 : \@\$bar/;
+  print "not " unless $warn =~ /Illegal character in prototype for main::badproto4 : \@ \$b ar/;
   print "ok ", $i++, "\n";
 }
 
diff --git a/toke.c b/toke.c
index f1c695e..878b084 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1578,6 +1578,104 @@ Perl_lex_read_space(pTHX_ U32 flags)
 }
 
 /*
+
+=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+
+This function performs syntax checking on a prototype, C<proto>.
+If C<warn> is true, any illegal characters or mismatched brackets
+will trigger illegalproto warnings, declaring that they were
+detected in the prototype for C<name>.
+
+The return value is C<true> if this is a valid prototype, and
+C<false> if it is not, regardless of whether C<warn> was C<true> or
+C<false>.
+
+Note that C<NULL> is a valid C<proto> and will always return C<true>.
+
+=cut
+
+ */
+
+bool
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+{
+    STRLEN len, origlen;
+    char *p = proto ? SvPV(proto, len) : NULL;
+    bool bad_proto = FALSE;
+    bool in_brackets = FALSE;
+    bool after_slash = FALSE;
+    char greedy_proto = ' ';
+    bool proto_after_greedy_proto = FALSE;
+    bool must_be_last = FALSE;
+    bool underscore = FALSE;
+    bool seen_underscore = FALSE;
+
+    PERL_ARGS_ASSERT_VALIDATE_PROTO;
+
+    if (!proto)
+       return TRUE;
+
+    origlen = len;
+    for (; len--; p++) {
+       if (!isSPACE(*p)) {
+           if (must_be_last)
+               proto_after_greedy_proto = TRUE;
+           if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
+               bad_proto = TRUE;
+           }
+           else {
+               if (underscore) {
+                   if(!strchr(";@%", *p))
+                       bad_proto = TRUE;
+                   underscore = FALSE;
+               }
+
+               if (*p == '[')
+                   in_brackets = TRUE;
+               else if (*p == ']')
+                   in_brackets = FALSE;
+               else if ((*p == '@' || *p == '%') &&
+                   !after_slash &&
+                   !in_brackets ) {
+                   must_be_last = TRUE;
+                   greedy_proto = *p;
+               }
+               else if (*p == '_')
+                   underscore = seen_underscore = TRUE;
+           }
+           if (*p == '\\')
+               after_slash = TRUE;
+           else
+               after_slash = FALSE;
+       }
+    }
+
+    if (warn) {
+       p -= origlen;
+       if (proto_after_greedy_proto)
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Prototype after '%c' for %"SVf" : %s",
+                       greedy_proto, SVfARG(name), p);
+       if (bad_proto) {
+           SV *dsv = newSVpvs_flags("", SVs_TEMP);
+           Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+                       "Illegal character %sin prototype for %"SVf" : %s",
+                       seen_underscore ? "after '_' " : "",
+                       SVfARG(PL_subname),
+                       SvUTF8(PL_lex_stuff)
+                           ? sv_uni_display(dsv,
+                               newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
+                               origlen,
+                               UNI_DISPLAY_ISPRINT)
+                           : pv_pretty(dsv, p, origlen, 60, NULL, NULL,
+                               PERL_PV_ESCAPE_NONASCII));
+       }
+    }
+
+    return (! (proto_after_greedy_proto || bad_proto) );
+}
+
+/*
  * S_incline
  * This subroutine has nothing to do with tilting, whether at windmills
  * or pinball tables.  Its name is short for "increment line".  It
@@ -8602,78 +8700,10 @@ Perl_yylex(pTHX)
 
                /* Look for a prototype */
                if (*s == '(') {
-                   char *p;
-                   bool bad_proto = FALSE;
-                   bool in_brackets = FALSE;
-                   char greedy_proto = ' ';
-                   bool proto_after_greedy_proto = FALSE;
-                   bool must_be_last = FALSE;
-                   bool underscore = FALSE;
-                   bool seen_underscore = FALSE;
-                   const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
-                    STRLEN tmplen;
-
                    s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
                    if (!s)
                        Perl_croak(aTHX_ "Prototype not terminated");
-                   /* strip spaces and check for bad characters */
-                   d = SvPV(PL_lex_stuff, tmplen);
-                   tmp = 0;
-                   for (p = d; tmplen; tmplen--, ++p) {
-                       if (!isSPACE(*p)) {
-                            d[tmp++] = *p;
-
-                           if (warnillegalproto) {
-                               if (must_be_last)
-                                   proto_after_greedy_proto = TRUE;
-                               if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
-                                   bad_proto = TRUE;
-                               }
-                               else {
-                                   if ( underscore ) {
-                                       if ( !strchr(";@%", *p) )
-                                           bad_proto = TRUE;
-                                       underscore = FALSE;
-                                   }
-                                   if ( *p == '[' ) {
-                                       in_brackets = TRUE;
-                                   }
-                                   else if ( *p == ']' ) {
-                                       in_brackets = FALSE;
-                                   }
-                                   else if ( (*p == '@' || *p == '%') &&
-                                        ( tmp < 2 || d[tmp-2] != '\\' ) &&
-                                        !in_brackets ) {
-                                       must_be_last = TRUE;
-                                       greedy_proto = *p;
-                                   }
-                                   else if ( *p == '_' ) {
-                                       underscore = seen_underscore = TRUE;
-                                   }
-                               }
-                           }
-                       }
-                   }
-                    d[tmp] = '\0';
-                   if (proto_after_greedy_proto)
-                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                                   "Prototype after '%c' for %"SVf" : %s",
-                                   greedy_proto, SVfARG(PL_subname), d);
-                   if (bad_proto) {
-                        SV *dsv = newSVpvs_flags("", SVs_TEMP);
-                       Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
-                                   "Illegal character %sin prototype for %"SVf" : %s",
-                                   seen_underscore ? "after '_' " : "",
-                                   SVfARG(PL_subname),
-                                    SvUTF8(PL_lex_stuff)
-                                        ? sv_uni_display(dsv,
-                                            newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
-                                            tmp,
-                                            UNI_DISPLAY_ISPRINT)
-                                        : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
-                                            PERL_PV_ESCAPE_NONASCII));
-                    }
-                    SvCUR_set(PL_lex_stuff, tmp);
+                   (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
                    have_proto = TRUE;
 
 #ifdef PERL_MAD