regcomp.c: White space only

author Karl Williamson <khw@cpan.org>

Thu, 2 Mar 2017 19:15:20 +0000 (12:15 -0700)

committer Karl Williamson <khw@cpan.org>

Tue, 20 Feb 2018 08:37:23 +0000 (01:37 -0700)
author Karl Williamson <khw@cpan.org>
Thu, 2 Mar 2017 19:15:20 +0000 (12:15 -0700)
committer Karl Williamson <khw@cpan.org>
Tue, 20 Feb 2018 08:37:23 +0000 (01:37 -0700)
diff --git a/regcomp.c b/regcomp.c

index 3c57ee7..cb5dd98 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -12307,7 +12307,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
    */
  
      char * endbrace;    /* points to '}' following the name */
-    char *endchar;     /* Points to '.' or '}' ending cur char in the input
+    char * endchar;     /* Points to '.' or '}' ending cur char in the input
                             stream */
      char* p = RExC_parse; /* Temporary */
  
@@ -12334,20 +12334,20 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
       * [^\n].  The latter is assumed when the {...} following the \N is a legal
       * quantifier, or there is no '{' at all */
      if (*p != '{' || regcurly(p)) {
-       RExC_parse = p;
+        RExC_parse = p;
          if (cp_count) {
              *cp_count = -1;
          }
  
-       if (! node_p) {
+        if (! node_p) {
              return FALSE;
          }
  
-       *node_p = reg_node(pRExC_state, REG_ANY);
-       *flagp |= HASWIDTH|SIMPLE;
-       MARK_NAUGHTY(1);
+        *node_p = reg_node(pRExC_state, REG_ANY);
+        *flagp |= HASWIDTH|SIMPLE;
+        MARK_NAUGHTY(1);
          Set_Node_Length(*node_p, 1); /* MJD */
-       return TRUE;
+        return TRUE;
      }
  
      /* The test above made sure that the next real character is a '{', but
@@ -12355,10 +12355,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
       * \n) and this is not allowed (for consistency with \x{...} and the
       * tokenizer handling of \N{NAME}). */
      if (*RExC_parse != '{') {
-       vFAIL("Missing braces on \\N{}");
+        vFAIL("Missing braces on \\N{}");
      }
  
-    RExC_parse++;      /* Skip past the '{' */
+    RExC_parse++;       /* Skip past the '{' */
  
      endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
      if (! endbrace) { /* no trailing brace */
@@ -12378,7 +12378,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
              *cp_count = 0;
          }
          nextchar(pRExC_state);
-       if (! node_p) {
+        if (! node_p) {
              return FALSE;
          }
  
@@ -12390,74 +12390,76 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      if (   endbrace - RExC_parse < 2
          || strnNE(RExC_parse, "U+", 2))
      {
-       RExC_parse = endbrace;  /* position msg's '<--HERE' */
-       vFAIL("\\N{NAME} must be resolved by the lexer");
+        RExC_parse = endbrace;  /* position msg's '<--HERE' */
+        vFAIL("\\N{NAME} must be resolved by the lexer");
      }
  
-    RExC_parse += 2;   /* Skip past the 'U+' */
+        RExC_parse += 2;    /* Skip past the 'U+' */
  
-    /* Because toke.c has generated a special construct for us guaranteed not
-     * to have NULs, we can use a str function */
-    endchar = RExC_parse + strcspn(RExC_parse, ".}");
+        /* Because toke.c has generated a special construct for us guaranteed
+         * not to have NULs, we can use a str function */
+        endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
-    /* Code points are separated by dots.  If none, there is only one code
-     * point, and is terminated by the brace */
+        /* Code points are separated by dots.  If none, there is only one code
+         * point, and is terminated by the brace */
  
-    if (endchar >= endbrace) {
-       STRLEN length_of_hex;
-       I32 grok_hex_flags;
+        if (endchar >= endbrace) {
+            STRLEN length_of_hex;
+            I32 grok_hex_flags;
  
-        /* Here, exactly one code point.  If that isn't what is wanted, fail */
-        if (! code_point_p) {
-            RExC_parse = p;
-            return FALSE;
-        }
+            /* Here, exactly one code point.  If that isn't what is wanted,
+             * fail */
+            if (! code_point_p) {
+                RExC_parse = p;
+                return FALSE;
+            }
  
-        /* Convert code point from hex */
-       length_of_hex = (STRLEN)(endchar - RExC_parse);
-       grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
-                       | PERL_SCAN_DISALLOW_PREFIX
-
-                           /* No errors in the first pass (See [perl
-                            * #122671].)  We let the code below find the
-                            * errors when there are multiple chars. */
-                       | ((SIZE_ONLY)
-                          ? PERL_SCAN_SILENT_ILLDIGIT
-                          : 0);
-
-        /* This routine is the one place where both single- and double-quotish
-         * \N{U+xxxx} are evaluated.  The value is a Unicode code point which
-         * must be converted to native. */
-       *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
-                                               &length_of_hex,
-                                               &grok_hex_flags,
-                                               NULL));
-
-       /* The tokenizer should have guaranteed validity, but it's possible to
-         * bypass it by using single quoting, so check.  Don't do the check
-         * here when there are multiple chars; we do it below anyway. */
-        if (length_of_hex == 0
-            || length_of_hex != (STRLEN)(endchar - RExC_parse) )
-        {
-            RExC_parse += length_of_hex;       /* Includes all the valid */
-            RExC_parse += (RExC_orig_utf8)     /* point to after 1st invalid */
-                            ? UTF8SKIP(RExC_parse)
-                            : 1;
-            /* Guard against malformed utf8 */
-            if (RExC_parse >= endchar) {
-                RExC_parse = endchar;
+            /* Convert code point from hex */
+            length_of_hex = (STRLEN)(endchar - RExC_parse);
+            grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
+                            | PERL_SCAN_DISALLOW_PREFIX
+
+                                /* No errors in the first pass (See [perl
+                                * #122671].)  We let the code below find the
+                                * errors when there are multiple chars. */
+                            | ((SIZE_ONLY)
+                                ? PERL_SCAN_SILENT_ILLDIGIT
+                                : 0);
+
+            /* This routine is the one place where both single- and
+             * double-quotish \N{U+xxxx} are evaluated.  The value is a Unicode
+             * code point which must be converted to native. */
+            *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
+                                            &length_of_hex,
+                                            &grok_hex_flags,
+                                            NULL));
+
+            /* The tokenizer should have guaranteed validity, but it's possible
+             * to bypass it by using single quoting, so check.  Don't do the
+             * check here when there are multiple chars; we do it below anyway.
+             * */
+            if (length_of_hex == 0
+                || length_of_hex != (STRLEN)(endchar - RExC_parse) )
+            {
+                RExC_parse += length_of_hex;    /* Includes all the valid */
+                RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
+                                ? UTF8SKIP(RExC_parse)
+                                : 1;
+                /* Guard against malformed utf8 */
+                if (RExC_parse >= endchar) {
+                    RExC_parse = endchar;
+                }
+                vFAIL("Invalid hexadecimal number in \\N{U+...}");
              }
-            vFAIL("Invalid hexadecimal number in \\N{U+...}");
-        }
  
-        RExC_parse = endbrace + 1;
-        return TRUE;
-    }
-    else {  /* Is a multiple character sequence */
-       SV * substitute_parse;
-       STRLEN len;
-       char *orig_end = RExC_end;
-       char *save_start = RExC_start;
+            RExC_parse = endbrace + 1;
+            return TRUE;
+        }
+        else {  /* Is a multiple character sequence */
+        SV * substitute_parse;
+        STRLEN len;
+        char *orig_end = RExC_end;
+        char *save_start = RExC_start;
          I32 flags;
  
          /* Count the code points, if desired, in the sequence */
@@ -12481,32 +12483,32 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
              return FALSE;
          }
  
-       /* What is done here is to convert this to a sub-pattern of the form
+        /* What is done here is to convert this to a sub-pattern of the form
           * \x{char1}\x{char2}...  and then call reg recursively to parse it
           * (enclosing in "(?: ... )" ).  That way, it retains its atomicness,
           * while not having to worry about special handling that some code
           * points may have. */
  
-       substitute_parse = newSVpvs("?:");
+        substitute_parse = newSVpvs("?:");
  
-       while (RExC_parse < endbrace) {
+        while (RExC_parse < endbrace) {
  
-           /* Convert to notation the rest of the code understands */
-           sv_catpv(substitute_parse, "\\x{");
-           sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
-           sv_catpv(substitute_parse, "}");
+            /* Convert to notation the rest of the code understands */
+            sv_catpv(substitute_parse, "\\x{");
+            sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
+            sv_catpv(substitute_parse, "}");
  
-           /* Point to the beginning of the next character in the sequence. */
-           RExC_parse = endchar + 1;
-           endchar = RExC_parse + strcspn(RExC_parse, ".}");
+            /* Point to the beginning of the next character in the sequence. */
+            RExC_parse = endchar + 1;
+            endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
-       }
+        }
          sv_catpv(substitute_parse, ")");
  
          len = SvCUR(substitute_parse);
  
-       /* Don't allow empty number */
-       if (len < (STRLEN) 8) {
+        /* Don't allow empty number */
+        if (len < (STRLEN) 8) {
              RExC_parse = endbrace;
             vFAIL("Invalid hexadecimal number in \\N{U+...}");
         }
@@ -12522,27 +12524,28 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          RExC_recode_x_to_native = 1;
  #endif
  
-        *node_p = reg(pRExC_state, 1, &flags, depth+1);
+    *node_p = reg(pRExC_state, 1, &flags, depth+1);
  
-        /* Restore the saved values */
-       RExC_start = RExC_adjusted_start = save_start;
-       RExC_parse = endbrace;
-       RExC_end = orig_end;
+    /* Restore the saved values */
+    RExC_start = RExC_adjusted_start = save_start;
+    RExC_parse = endbrace;
+    RExC_end = orig_end;
  #ifdef EBCDIC
-        RExC_recode_x_to_native = 0;
+    RExC_recode_x_to_native = 0;
  #endif
-        SvREFCNT_dec_NN(substitute_parse);
  
-        if (! *node_p) {
-            RETURN_X_ON_RESTART(FALSE, flags,flagp);
-            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
-                (UV) flags);
-        }
-        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+    SvREFCNT_dec_NN(substitute_parse);
  
-        nextchar(pRExC_state);
+    if (! *node_p) {
+        RETURN_X_ON_RESTART(FALSE, flags,flagp);
+        FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
+            (UV) flags);
+    }
+    *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
  
-        return TRUE;
+    nextchar(pRExC_state);
+
+    return TRUE;
      }
  }
author	Karl Williamson <khw@cpan.org>
	Thu, 2 Mar 2017 19:15:20 +0000 (12:15 -0700)
committer	Karl Williamson <khw@cpan.org>
	Tue, 20 Feb 2018 08:37:23 +0000 (01:37 -0700)