S_pat_upgrade_to_utf8(): add num_code_blocks arg
authorDavid Mitchell <davem@iabyn.com>
Thu, 18 Apr 2013 14:42:35 +0000 (15:42 +0100)
committerDavid Mitchell <davem@iabyn.com>
Sat, 20 Apr 2013 16:23:12 +0000 (17:23 +0100)
This function was added a few commits ago in this branch. It's intended
to upgrade a pattern string to utf8, while simultaneously adjusting the
start/end byte indices of any code blocks. In two of the three places
it is called from, all code blocks will already have been processed,
so the number of code blocks equals pRExC_state->num_code_blocks.
In the third place however (S_concat_pat), not all code blocks have yet
been processed, so using num_code_blocks causes us to fall off the end of
the index array.

Add an extra arg to S_pat_upgrade_to_utf8() to tell it how many code
blocks exist so far.

regcomp.c

index 37d395b..3ada131 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4883,7 +4883,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
 
 static void
 S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
-                   char **pat_p, STRLEN *plen_p)
+                   char **pat_p, STRLEN *plen_p, int num_code_blocks)
 {
     U8 *const src = (U8*)*pat_p;
     U8 *dst;
@@ -4905,7 +4905,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
             dst[d++] = (U8)UTF8_EIGHT_BIT_HI(uv);
             dst[d]   = (U8)UTF8_EIGHT_BIT_LO(uv);
         }
-        if (n < pRExC_state->num_code_blocks) {
+        if (n < num_code_blocks) {
             if (!do_end && pRExC_state->code_blocks[n].start == s) {
                 pRExC_state->code_blocks[n].start = d;
                 assert(dst[d] == '(');
@@ -5034,7 +5034,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                 const char *src = SvPV_flags_const(msv, slen, 0);
                 orig_patlen = dlen;
                 if (SvUTF8(msv) && !SvUTF8(pat)) {
-                    S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &dst, &dlen);
+                    S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &dst, &dlen, n);
                     sv_setpvn(pat, dst, dlen);
                     SvUTF8_on(pat);
                 }
@@ -5690,7 +5690,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
            /* whoops, we have a non-utf8 pattern, whilst run-time code
             * got compiled as utf8. Try again with a utf8 pattern */
-            S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen);
+            S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
+                                    pRExC_state->num_code_blocks);
             goto redo_first_pass;
        }
     }
@@ -5756,7 +5757,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         thing.
         -- dmq */
         if (flags & RESTART_UTF8) {
-            S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen);
+            S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
+                                    pRExC_state->num_code_blocks);
             goto redo_first_pass;
         }
         Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#X", flags);