This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactor \x processing to single function
authorKarl Williamson <public@khwilliamson.com>
Sun, 17 Jun 2012 19:03:36 +0000 (13:03 -0600)
committerKarl Williamson <public@khwilliamson.com>
Wed, 20 Jun 2012 20:08:42 +0000 (14:08 -0600)
There are three places that process \x.  These can and did get out of
sync.  This moves all three to use a common static inline function so
that they all do the same thing on the same inputs, and their behaviors
will not drift apart again.

This commit should not change current behavior.  A previous commit
was designed to bring all three to identical behavior.

dquote_static.c
embed.fnc
embed.h
proto.h
regcomp.c
toke.c

index 02e39cf..4f839ee 100644 (file)
@@ -164,6 +164,73 @@ S_grok_bslash_o(pTHX_ const char *s,
     return TRUE;
 }
 
+PERL_STATIC_INLINE bool
+S_grok_bslash_x(pTHX_ const char *s,
+                        UV *uv,
+                        STRLEN *len,
+                        const char** error_msg,
+                        const bool output_warning)
+{
+
+/*  Documentation to be supplied when interface nailed down finally
+ *  This returns FALSE if there is an error which the caller need not recover
+ *  from; , otherwise TRUE.  In either case the caller should look at *len
+ *  On input:
+ *     s   points to a string that begins with 'x', and the previous character
+ *         was a backslash.
+ *     uv  points to a UV that will hold the output value, valid only if the
+ *         return from the function is TRUE
+ *     len on success will point to the next character in the string past the
+ *                    end of this construct.
+ *         on failure, it will point to the failure
+ *      error_msg is a pointer that will be set to an internal buffer giving an
+ *         error message upon failure (the return is FALSE).  Untouched if
+ *         function succeeds
+ *     output_warning says whether to output any warning messages, or suppress
+ *         them
+ */
+    const char* e;
+    STRLEN numbers_len;
+    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+               | PERL_SCAN_DISALLOW_PREFIX;
+
+    PERL_ARGS_ASSERT_GROK_BSLASH_X;
+
+
+    assert(*s == 'x');
+    s++;
+
+    if (*s != '{') {
+       I32 flags = PERL_SCAN_DISALLOW_PREFIX;
+       *len = 2;
+       *uv = grok_hex(s, len, &flags, NULL);
+       (*len)++;
+       return TRUE;
+    }
+
+    e = strchr(s, '}');
+    if (!e) {
+       *len = 2;       /* Move past the 'x{' */
+        /* XXX The corresponding message above for \o is just '\\o{'; other
+         * messages for other constructs include the '}', so are inconsistent.
+         */
+       *error_msg = "Missing right brace on \\x{}";
+       return FALSE;
+    }
+
+    /* Return past the '}' no matter what is inside the braces */
+    *len = e - s + 2;  /* 2 = 1 for the 'x' + 1 for the '}' */
+
+    s++;    /* Point to first digit */
+
+    numbers_len = e - s;
+    *uv = grok_hex(s, &numbers_len, &flags, NULL);
+    /* Note that if has non-hex, will ignore everything starting with that up
+     * to the '}' */
+
+    return TRUE;
+}
+
 /*
  * Local variables:
  * c-indentation-style: bsd
index 196cf1f..1f62b9d 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -718,6 +718,7 @@ Apd |UV     |grok_bin       |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV
 #ifdef PERL_IN_DQUOTE_STATIC_C
 EMsR   |char   |grok_bslash_c  |const char source|const bool utf8|const bool output_warning
 EMsR   |bool   |grok_bslash_o  |NN const char* s|NN UV* uv|NN STRLEN* len|NN const char** error_msg|const bool output_warning
+EMiR   |bool   |grok_bslash_x  |NN const char* s|NN UV* uv|NN STRLEN* len|NN const char** error_msg|const bool output_warning
 #endif
 Apd    |UV     |grok_hex       |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
 Apd    |int    |grok_number    |NN const char *pv|STRLEN len|NULLOK UV *valuep
diff --git a/embed.h b/embed.h
index 2a4585c..260bee9 100644 (file)
--- a/embed.h
+++ b/embed.h
 #  if defined(PERL_IN_DQUOTE_STATIC_C)
 #define grok_bslash_c(a,b,c)   S_grok_bslash_c(aTHX_ a,b,c)
 #define grok_bslash_o(a,b,c,d,e)       S_grok_bslash_o(aTHX_ a,b,c,d,e)
+#define grok_bslash_x(a,b,c,d,e)       S_grok_bslash_x(aTHX_ a,b,c,d,e)
 #define regcurly(a)            S_regcurly(aTHX_ a)
 #  endif
 #  if defined(PERL_IN_REGCOMP_C)
diff --git a/proto.h b/proto.h
index e1e116c..3188170 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5489,6 +5489,15 @@ STATIC bool      S_grok_bslash_o(pTHX_ const char* s, UV* uv, STRLEN* len, const char
 #define PERL_ARGS_ASSERT_GROK_BSLASH_O \
        assert(s); assert(uv); assert(len); assert(error_msg)
 
+PERL_STATIC_INLINE bool        S_grok_bslash_x(pTHX_ const char* s, UV* uv, STRLEN* len, const char** error_msg, const bool output_warning)
+                       __attribute__warn_unused_result__
+                       __attribute__nonnull__(pTHX_1)
+                       __attribute__nonnull__(pTHX_2)
+                       __attribute__nonnull__(pTHX_3)
+                       __attribute__nonnull__(pTHX_4);
+#define PERL_ARGS_ASSERT_GROK_BSLASH_X \
+       assert(s); assert(uv); assert(len); assert(error_msg)
+
 PERL_STATIC_INLINE I32 S_regcurly(pTHX_ const char *s)
                        __attribute__warn_unused_result__
                        __attribute__pure__
index 873458c..acecaa9 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -10434,32 +10434,33 @@ tryagain:
                            break;
                        }
                    case 'x':
-                       if (*++p == '{') {
-                           char* const e = strchr(p, '}');
+                       {
+                           STRLEN brace_len = len;
+                           UV result;
+                           const char* error_msg;
 
-                           if (!e) {
-                               RExC_parse = p + 1;
-                               vFAIL("Missing right brace on \\x{}");
+                           bool valid = grok_bslash_x(p,
+                                                      &result,
+                                                      &brace_len,
+                                                      &error_msg,
+                                                      1);
+                           p += brace_len;
+                           if (! valid) {
+                               RExC_parse = p; /* going to die anyway; point
+                                                  to exact spot of failure */
+                               vFAIL(error_msg);
                            }
                            else {
-                                I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                                    | PERL_SCAN_DISALLOW_PREFIX;
-                                STRLEN numlen = e - p - 1;
-                               ender = grok_hex(p + 1, &numlen, &flags, NULL);
-                               if (ender > 0xff)
-                                   REQUIRE_UTF8;
-                               p = e + 1;
+                               ender = result;
                            }
+                           if (PL_encoding && ender < 0x100) {
+                               goto recode_encoding;
+                           }
+                           if (ender > 0xff) {
+                               REQUIRE_UTF8;
+                           }
+                           break;
                        }
-                       else {
-                            I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                           STRLEN numlen = 2;
-                           ender = grok_hex(p, &numlen, &flags, NULL);
-                           p += numlen;
-                       }
-                       if (PL_encoding && ender < 0x100)
-                           goto recode_encoding;
-                       break;
                    case 'c':
                        p++;
                        ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
@@ -11542,22 +11543,18 @@ parseit:
                }
                break;
            case 'x':
-               if (*RExC_parse == '{') {
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                        | PERL_SCAN_DISALLOW_PREFIX;
-                   char * const e = strchr(RExC_parse++, '}');
-                    if (!e)
-                        vFAIL("Missing right brace on \\x{}");
-
-                   numlen = e - RExC_parse;
-                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
-                   RExC_parse = e + 1;
-               }
-               else {
-                    I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                   numlen = 2;
-                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
+               RExC_parse--;   /* function expects to be pointed at the 'x' */
+               {
+                   const char* error_msg;
+                   bool valid = grok_bslash_x(RExC_parse,
+                                              &value,
+                                              &numlen,
+                                              &error_msg,
+                                              1);
                    RExC_parse += numlen;
+                   if (! valid) {
+                       vFAIL(error_msg);
+                   }
                }
                if (PL_encoding && value < 0x100)
                    goto recode_encoding;
diff --git a/toke.c b/toke.c
index dfcb25c..99c1e22 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -3027,29 +3027,16 @@ S_scan_const(pTHX_ char *start)
 
            /* eg. \x24 indicates the hex constant 0x24 */
            case 'x':
-               ++s;
-               if (*s == '{') {
-                   char* const e = strchr(s, '}');
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
-                      PERL_SCAN_DISALLOW_PREFIX;
+               {
                    STRLEN len;
+                   const char* error;
 
-                    ++s;
-                   if (!e) {
-                       yyerror("Missing right brace on \\x{}");
+                   bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
+                   s += len;
+                   if (! valid) {
+                       yyerror(error);
                        continue;
                    }
-                    len = e - s;
-                   uv = grok_hex(s, &len, &flags, NULL);
-                   s = e + 1;
-               }
-               else {
-                   {
-                       STRLEN len = 2;
-                        I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                       uv = grok_hex(s, &len, &flags, NULL);
-                       s += len;
-                   }
                }
 
              NUM_ESCAPE_INSERT: