From: Karl Williamson Date: Fri, 19 Oct 2018 01:58:45 +0000 (-0600) Subject: regcomp.c: Extract code into a function X-Git-Tag: v5.29.4~2^2~6 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/e40c711d0c25d143bda13fc8ce188050d8d2ccf6 regcomp.c: Extract code into a function This should have no changes in behavior, and is for a future commit where this code will be called from a second place. --- diff --git a/embed.fnc b/embed.fnc index e635918..0838fa7 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2412,6 +2412,7 @@ Es |regnode_offset|handle_regex_sets|NN RExC_state_t *pRExC_state \ |NULLOK SV ** return_invlist \ |NN I32 *flagp|U32 depth \ |NN char * const oregcomp_parse +Es |void |set_regex_pv |NN RExC_state_t *pRExC_state|NN REGEXP *Rx #if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING) Es |void |dump_regex_sets_structures \ |NN RExC_state_t *pRExC_state \ diff --git a/embed.h b/embed.h index ea7c0d9..a152776 100644 --- a/embed.h +++ b/embed.h @@ -1222,6 +1222,7 @@ #define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d) #define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d) #define set_ANYOF_arg(a,b,c,d,e,f,g) S_set_ANYOF_arg(aTHX_ a,b,c,d,e,f,g) +#define set_regex_pv(a,b) S_set_regex_pv(aTHX_ a,b) #define skip_to_be_ignored_text(a,b,c) S_skip_to_be_ignored_text(aTHX_ a,b,c) #define ssc_add_range(a,b,c) S_ssc_add_range(aTHX_ a,b,c) #define ssc_and(a,b,c) S_ssc_and(aTHX_ a,b,c) diff --git a/proto.h b/proto.h index f977d95..a56ad89 100644 --- a/proto.h +++ b/proto.h @@ -5560,6 +5560,9 @@ STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_dat STATIC void S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, SV* const only_utf8_locale_list, SV* const swash, const bool has_user_defined_property); #define PERL_ARGS_ASSERT_SET_ANYOF_ARG \ assert(pRExC_state); assert(node) +STATIC void S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx); +#define PERL_ARGS_ASSERT_SET_REGEX_PV \ + assert(pRExC_state); assert(Rx) STATIC void S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state, char ** p, const bool force_to_xmod); #define PERL_ARGS_ASSERT_SKIP_TO_BE_IGNORED_TEXT \ assert(pRExC_state); assert(p) diff --git a/regcomp.c b/regcomp.c index 58a50e3..04e2228 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6936,6 +6936,88 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, return TRUE; } +STATIC void +S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx) +{ + /* Calculates and sets in the compiled pattern 'Rx' the string to compile, + * properly wrapped with the right modifiers */ + + bool has_p = ((RExC_rx->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); + bool has_charset = (get_regex_charset(RExC_rx->extflags) + != REGEX_DEPENDS_CHARSET); + + /* The caret is output if there are any defaults: if not all the STD + * flags are set, or if no character set specifier is needed */ + bool has_default = + (((RExC_rx->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD) + || ! has_charset); + bool has_runon = ((RExC_seen & REG_RUN_ON_COMMENT_SEEN) + == REG_RUN_ON_COMMENT_SEEN); + U8 reganch = (U8)((RExC_rx->extflags & RXf_PMf_STD_PMMOD) + >> RXf_PMf_STD_PMMOD_SHIFT); + const char *fptr = STD_PAT_MODS; /*"msixxn"*/ + char *p; + STRLEN pat_len = RExC_precomp_end - RExC_precomp; + + /* We output all the necessary flags; we never output a minus, as all + * those are defaults, so are + * covered by the caret */ + const STRLEN wraplen = pat_len + has_p + has_runon + + has_default /* If needs a caret */ + + PL_bitcount[reganch] /* 1 char for each set standard flag */ + + /* If needs a character set specifier */ + + ((has_charset) ? MAX_CHARSET_NAME_LENGTH : 0) + + (sizeof("(?:)") - 1); + + PERL_ARGS_ASSERT_SET_REGEX_PV; + + /* make sure PL_bitcount bounds not exceeded */ + assert(sizeof(STD_PAT_MODS) <= 8); + + p = sv_grow(MUTABLE_SV(Rx), wraplen + 1); /* +1 for the ending NUL */ + SvPOK_on(Rx); + if (RExC_utf8) + SvFLAGS(Rx) |= SVf_UTF8; + *p++='('; *p++='?'; + + /* If a default, cover it using the caret */ + if (has_default) { + *p++= DEFAULT_PAT_MOD; + } + if (has_charset) { + STRLEN len; + const char* const name = get_regex_charset_name(RExC_rx->extflags, &len); + Copy(name, p, len, char); + p += len; + } + if (has_p) + *p++ = KEEPCOPY_PAT_MOD; /*'p'*/ + { + char ch; + while((ch = *fptr++)) { + if(reganch & 1) + *p++ = ch; + reganch >>= 1; + } + } + + *p++ = ':'; + Copy(RExC_precomp, p, pat_len, char); + assert ((RX_WRAPPED(Rx) - p) < 16); + RExC_rx->pre_prefix = p - RX_WRAPPED(Rx); + p += pat_len; + + /* Adding a trailing \n causes this to compile properly: + my $R = qr / A B C # D E/x; /($R)/ + Otherwise the parens are considered part of the comment */ + if (has_runon) + *p++ = '\n'; + *p++ = ')'; + *p = 0; + SvCUR_set(Rx, p - RX_WRAPPED(Rx)); +} + /* * Perl_re_op_compile - the perl internal RE engine's function to compile a * regular expression into internal code. @@ -7373,79 +7455,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_rxi->code_blocks->refcnt++; } - { - bool has_p = ((RExC_rx->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); - bool has_charset = (get_regex_charset(RExC_rx->extflags) - != REGEX_DEPENDS_CHARSET); - - /* The caret is output if there are any defaults: if not all the STD - * flags are set, or if no character set specifier is needed */ - bool has_default = - (((RExC_rx->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD) - || ! has_charset); - bool has_runon = ((RExC_seen & REG_RUN_ON_COMMENT_SEEN) - == REG_RUN_ON_COMMENT_SEEN); - U8 reganch = (U8)((RExC_rx->extflags & RXf_PMf_STD_PMMOD) - >> RXf_PMf_STD_PMMOD_SHIFT); - const char *fptr = STD_PAT_MODS; /*"msixxn"*/ - char *p; - - /* We output all the necessary flags; we never output a minus, as all - * those are defaults, so are - * covered by the caret */ - const STRLEN wraplen = plen + has_p + has_runon - + has_default /* If needs a caret */ - + PL_bitcount[reganch] /* 1 char for each set standard flag */ - - /* If needs a character set specifier */ - + ((has_charset) ? MAX_CHARSET_NAME_LENGTH : 0) - + (sizeof("(?:)") - 1); - - /* make sure PL_bitcount bounds not exceeded */ - assert(sizeof(STD_PAT_MODS) <= 8); - - p = sv_grow(MUTABLE_SV(Rx), wraplen + 1); /* +1 for the ending NUL */ - SvPOK_on(Rx); - if (RExC_utf8) - SvFLAGS(Rx) |= SVf_UTF8; - *p++='('; *p++='?'; - - /* If a default, cover it using the caret */ - if (has_default) { - *p++= DEFAULT_PAT_MOD; - } - if (has_charset) { - STRLEN len; - const char* const name = get_regex_charset_name(RExC_rx->extflags, &len); - Copy(name, p, len, char); - p += len; - } - if (has_p) - *p++ = KEEPCOPY_PAT_MOD; /*'p'*/ - { - char ch; - while((ch = *fptr++)) { - if(reganch & 1) - *p++ = ch; - reganch >>= 1; - } - } - - *p++ = ':'; - Copy(RExC_precomp, p, plen, char); - assert ((RX_WRAPPED(Rx) - p) < 16); - RExC_rx->pre_prefix = p - RX_WRAPPED(Rx); - p += plen; - - /* Adding a trailing \n causes this to compile properly: - my $R = qr / A B C # D E/x; /($R)/ - Otherwise the parens are considered part of the comment */ - if (has_runon) - *p++ = '\n'; - *p++ = ')'; - *p = 0; - SvCUR_set(Rx, p - RX_WRAPPED(Rx)); - } + /* Set up the string to compile, with correct modifiers, etc */ + set_regex_pv(pRExC_state, Rx); RExC_rx->intflags = 0; RExC_total_parens = RExC_npar;