From d164443fe7708a332f3408f2a842d986b3148cbb Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 2 Mar 2017 12:15:20 -0700 Subject: [PATCH] regcomp.c: White space only This changes the indentation of some code to reflect where it should be with the addition and subtraction of blocks in the next commits. And it reflows to fit in 79 columns, and converts some tabs to spaces. --- regcomp.c | 197 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 100 insertions(+), 97 deletions(-) diff --git a/regcomp.c b/regcomp.c index 3c57ee7..cb5dd98 100644 --- a/regcomp.c +++ b/regcomp.c @@ -12307,7 +12307,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, */ char * endbrace; /* points to '}' following the name */ - char *endchar; /* Points to '.' or '}' ending cur char in the input + char * endchar; /* Points to '.' or '}' ending cur char in the input stream */ char* p = RExC_parse; /* Temporary */ @@ -12334,20 +12334,20 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, * [^\n]. The latter is assumed when the {...} following the \N is a legal * quantifier, or there is no '{' at all */ if (*p != '{' || regcurly(p)) { - RExC_parse = p; + RExC_parse = p; if (cp_count) { *cp_count = -1; } - if (! node_p) { + if (! node_p) { return FALSE; } - *node_p = reg_node(pRExC_state, REG_ANY); - *flagp |= HASWIDTH|SIMPLE; - MARK_NAUGHTY(1); + *node_p = reg_node(pRExC_state, REG_ANY); + *flagp |= HASWIDTH|SIMPLE; + MARK_NAUGHTY(1); Set_Node_Length(*node_p, 1); /* MJD */ - return TRUE; + return TRUE; } /* The test above made sure that the next real character is a '{', but @@ -12355,10 +12355,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, * \n) and this is not allowed (for consistency with \x{...} and the * tokenizer handling of \N{NAME}). */ if (*RExC_parse != '{') { - vFAIL("Missing braces on \\N{}"); + vFAIL("Missing braces on \\N{}"); } - RExC_parse++; /* Skip past the '{' */ + RExC_parse++; /* Skip past the '{' */ endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse); if (! endbrace) { /* no trailing brace */ @@ -12378,7 +12378,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, *cp_count = 0; } nextchar(pRExC_state); - if (! node_p) { + if (! node_p) { return FALSE; } @@ -12390,74 +12390,76 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, if ( endbrace - RExC_parse < 2 || strnNE(RExC_parse, "U+", 2)) { - RExC_parse = endbrace; /* position msg's '<--HERE' */ - vFAIL("\\N{NAME} must be resolved by the lexer"); + RExC_parse = endbrace; /* position msg's '<--HERE' */ + vFAIL("\\N{NAME} must be resolved by the lexer"); } - RExC_parse += 2; /* Skip past the 'U+' */ + RExC_parse += 2; /* Skip past the 'U+' */ - /* Because toke.c has generated a special construct for us guaranteed not - * to have NULs, we can use a str function */ - endchar = RExC_parse + strcspn(RExC_parse, ".}"); + /* Because toke.c has generated a special construct for us guaranteed + * not to have NULs, we can use a str function */ + endchar = RExC_parse + strcspn(RExC_parse, ".}"); - /* Code points are separated by dots. If none, there is only one code - * point, and is terminated by the brace */ + /* Code points are separated by dots. If none, there is only one code + * point, and is terminated by the brace */ - if (endchar >= endbrace) { - STRLEN length_of_hex; - I32 grok_hex_flags; + if (endchar >= endbrace) { + STRLEN length_of_hex; + I32 grok_hex_flags; - /* Here, exactly one code point. If that isn't what is wanted, fail */ - if (! code_point_p) { - RExC_parse = p; - return FALSE; - } + /* Here, exactly one code point. If that isn't what is wanted, + * fail */ + if (! code_point_p) { + RExC_parse = p; + return FALSE; + } - /* Convert code point from hex */ - length_of_hex = (STRLEN)(endchar - RExC_parse); - grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES - | PERL_SCAN_DISALLOW_PREFIX - - /* No errors in the first pass (See [perl - * #122671].) We let the code below find the - * errors when there are multiple chars. */ - | ((SIZE_ONLY) - ? PERL_SCAN_SILENT_ILLDIGIT - : 0); - - /* This routine is the one place where both single- and double-quotish - * \N{U+xxxx} are evaluated. The value is a Unicode code point which - * must be converted to native. */ - *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse, - &length_of_hex, - &grok_hex_flags, - NULL)); - - /* The tokenizer should have guaranteed validity, but it's possible to - * bypass it by using single quoting, so check. Don't do the check - * here when there are multiple chars; we do it below anyway. */ - if (length_of_hex == 0 - || length_of_hex != (STRLEN)(endchar - RExC_parse) ) - { - RExC_parse += length_of_hex; /* Includes all the valid */ - RExC_parse += (RExC_orig_utf8) /* point to after 1st invalid */ - ? UTF8SKIP(RExC_parse) - : 1; - /* Guard against malformed utf8 */ - if (RExC_parse >= endchar) { - RExC_parse = endchar; + /* Convert code point from hex */ + length_of_hex = (STRLEN)(endchar - RExC_parse); + grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES + | PERL_SCAN_DISALLOW_PREFIX + + /* No errors in the first pass (See [perl + * #122671].) We let the code below find the + * errors when there are multiple chars. */ + | ((SIZE_ONLY) + ? PERL_SCAN_SILENT_ILLDIGIT + : 0); + + /* This routine is the one place where both single- and + * double-quotish \N{U+xxxx} are evaluated. The value is a Unicode + * code point which must be converted to native. */ + *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse, + &length_of_hex, + &grok_hex_flags, + NULL)); + + /* The tokenizer should have guaranteed validity, but it's possible + * to bypass it by using single quoting, so check. Don't do the + * check here when there are multiple chars; we do it below anyway. + * */ + if (length_of_hex == 0 + || length_of_hex != (STRLEN)(endchar - RExC_parse) ) + { + RExC_parse += length_of_hex; /* Includes all the valid */ + RExC_parse += (RExC_orig_utf8) /* point to after 1st invalid */ + ? UTF8SKIP(RExC_parse) + : 1; + /* Guard against malformed utf8 */ + if (RExC_parse >= endchar) { + RExC_parse = endchar; + } + vFAIL("Invalid hexadecimal number in \\N{U+...}"); } - vFAIL("Invalid hexadecimal number in \\N{U+...}"); - } - RExC_parse = endbrace + 1; - return TRUE; - } - else { /* Is a multiple character sequence */ - SV * substitute_parse; - STRLEN len; - char *orig_end = RExC_end; - char *save_start = RExC_start; + RExC_parse = endbrace + 1; + return TRUE; + } + else { /* Is a multiple character sequence */ + SV * substitute_parse; + STRLEN len; + char *orig_end = RExC_end; + char *save_start = RExC_start; I32 flags; /* Count the code points, if desired, in the sequence */ @@ -12481,32 +12483,32 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, return FALSE; } - /* What is done here is to convert this to a sub-pattern of the form + /* What is done here is to convert this to a sub-pattern of the form * \x{char1}\x{char2}... and then call reg recursively to parse it * (enclosing in "(?: ... )" ). That way, it retains its atomicness, * while not having to worry about special handling that some code * points may have. */ - substitute_parse = newSVpvs("?:"); + substitute_parse = newSVpvs("?:"); - while (RExC_parse < endbrace) { + while (RExC_parse < endbrace) { - /* Convert to notation the rest of the code understands */ - sv_catpv(substitute_parse, "\\x{"); - sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse); - sv_catpv(substitute_parse, "}"); + /* Convert to notation the rest of the code understands */ + sv_catpv(substitute_parse, "\\x{"); + sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse); + sv_catpv(substitute_parse, "}"); - /* Point to the beginning of the next character in the sequence. */ - RExC_parse = endchar + 1; - endchar = RExC_parse + strcspn(RExC_parse, ".}"); + /* Point to the beginning of the next character in the sequence. */ + RExC_parse = endchar + 1; + endchar = RExC_parse + strcspn(RExC_parse, ".}"); - } + } sv_catpv(substitute_parse, ")"); len = SvCUR(substitute_parse); - /* Don't allow empty number */ - if (len < (STRLEN) 8) { + /* Don't allow empty number */ + if (len < (STRLEN) 8) { RExC_parse = endbrace; vFAIL("Invalid hexadecimal number in \\N{U+...}"); } @@ -12522,27 +12524,28 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_recode_x_to_native = 1; #endif - *node_p = reg(pRExC_state, 1, &flags, depth+1); + *node_p = reg(pRExC_state, 1, &flags, depth+1); - /* Restore the saved values */ - RExC_start = RExC_adjusted_start = save_start; - RExC_parse = endbrace; - RExC_end = orig_end; + /* Restore the saved values */ + RExC_start = RExC_adjusted_start = save_start; + RExC_parse = endbrace; + RExC_end = orig_end; #ifdef EBCDIC - RExC_recode_x_to_native = 0; + RExC_recode_x_to_native = 0; #endif - SvREFCNT_dec_NN(substitute_parse); - if (! *node_p) { - RETURN_X_ON_RESTART(FALSE, flags,flagp); - FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf, - (UV) flags); - } - *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); + SvREFCNT_dec_NN(substitute_parse); - nextchar(pRExC_state); + if (! *node_p) { + RETURN_X_ON_RESTART(FALSE, flags,flagp); + FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf, + (UV) flags); + } + *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); - return TRUE; + nextchar(pRExC_state); + + return TRUE; } } -- 1.8.3.1