This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
(perl #128996) prevent PL_op pointing to freed ops
[perl5.git] / regcomp.c
index 33d5f7a..634a320 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -101,14 +101,6 @@ EXTERN_C const struct regexp_engine my_reg_engine;
 #define        STATIC  static
 #endif
 
-#ifndef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#endif
-
-#ifndef MAX
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-#endif
-
 /* this is a chain of data about sub patterns we are processing that
    need to be handled separately/specially in study_chunk. Its so
    we can simulate recursion without losing state.  */
@@ -6321,8 +6313,20 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                 sv_catsv_nomg(pat, msv);
                 rx = msv;
             }
-            else
-                pat = msv;
+            else {
+                /* We have only one SV to process, but we need to verify
+                 * it is properly null terminated or we will fail asserts
+                 * later. In theory we probably shouldn't get such SV's,
+                 * but if we do we should handle it gracefully. */
+                if ( SvTYPE(msv) != SVt_PV || (SvLEN(msv) > SvCUR(msv) && *(SvEND(msv)) == 0) ) {
+                    /* not a string, or a string with a trailing null */
+                    pat = msv;
+                } else {
+                    /* a string with no trailing null, we need to copy it
+                     * so it we have a trailing null */
+                    pat = newSVsv(msv);
+                }
+            }
 
             if (code)
                 pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
@@ -8376,6 +8380,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
 
 /* The header definitions are in F<invlist_inline.h> */
 
+#ifndef PERL_IN_XSUB_RE
+
 PERL_STATIC_INLINE UV*
 S__invlist_array_init(SV* const invlist, const bool will_have_0)
 {
@@ -8402,6 +8408,8 @@ S__invlist_array_init(SV* const invlist, const bool will_have_0)
     return zero_addr + *offset;
 }
 
+#endif
+
 PERL_STATIC_INLINE void
 S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
 {
@@ -8538,6 +8546,8 @@ S_invlist_is_iterating(SV* const invlist)
     return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
 }
 
+#ifndef PERL_IN_XSUB_RE
+
 PERL_STATIC_INLINE UV
 S_invlist_max(SV* const invlist)
 {
@@ -8554,8 +8564,6 @@ S_invlist_max(SV* const invlist)
            ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
            : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
 }
-
-#ifndef PERL_IN_XSUB_RE
 SV*
 Perl__new_invlist(pTHX_ IV initial_size)
 {
@@ -8641,7 +8649,6 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list)
 
     return invlist;
 }
-#endif /* ifndef PERL_IN_XSUB_RE */
 
 STATIC void
 S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
@@ -8743,8 +8750,6 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
     }
 }
 
-#ifndef PERL_IN_XSUB_RE
-
 SSize_t
 Perl__invlist_search(SV* const invlist, const UV cp)
 {
@@ -10859,7 +10864,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 RExC_seen |= REG_LOOKBEHIND_SEEN;
                RExC_in_lookbehind++;
                RExC_parse++;
-                assert(RExC_parse < RExC_end);
+                if (RExC_parse >= RExC_end) {
+                    vFAIL("Sequence (?... not terminated");
+                }
+
                 /* FALLTHROUGH */
            case '=':           /* (?=...) */
                RExC_seen_zerolen++;
@@ -15077,8 +15085,8 @@ redo_curchar:
                 }
 
                 /* Stack the position of this undealt-with left paren */
-                fence = top_index + 1;
                 av_push(fence_stack, newSViv(fence));
+                fence = top_index + 1;
                 break;
 
             case '\\':
@@ -15159,7 +15167,12 @@ redo_curchar:
                     vFAIL("Unexpected ')'");
                 }
 
-                 /* If at least two thing on the stack, treat this as an
+                /* If nothing after the fence, is missing an operand */
+                if (top_index - fence < 0) {
+                    RExC_parse++;
+                    goto bad_syntax;
+                }
+                /* If at least two things on the stack, treat this as an
                   * operator */
                 if (top_index - fence >= 1) {
                     goto join_operators;
@@ -17471,22 +17484,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                       &nonascii_but_latin1_properties);
 
                 /* And add them to the final list of such characters. */
-                if (has_upper_latin1_only_utf8_matches) {
-                    _invlist_union(has_upper_latin1_only_utf8_matches,
-                                   nonascii_but_latin1_properties,
-                                   &has_upper_latin1_only_utf8_matches);
-                    SvREFCNT_dec_NN(nonascii_but_latin1_properties);
-                }
-                else {
-                    has_upper_latin1_only_utf8_matches
-                                                = nonascii_but_latin1_properties;
-                }
+                _invlist_union(has_upper_latin1_only_utf8_matches,
+                               nonascii_but_latin1_properties,
+                               &has_upper_latin1_only_utf8_matches);
 
                 /* Remove them from what now becomes the unconditional list */
                 _invlist_subtract(posixes, nonascii_but_latin1_properties,
                                   &posixes);
 
-                /* And the remainder are the unconditional ones */
+                /* And add those unconditional ones to the final list */
                 if (cp_list) {
                     _invlist_union(cp_list, posixes, &cp_list);
                     SvREFCNT_dec_NN(posixes);
@@ -17496,8 +17502,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     cp_list = posixes;
                 }
 
+                SvREFCNT_dec(nonascii_but_latin1_properties);
+
                 /* Get rid of any characters that we now know are matched
-                 * unconditionally from the conditional list */
+                 * unconditionally from the conditional list, which may make
+                 * that list empty */
                 _invlist_subtract(has_upper_latin1_only_utf8_matches,
                                   cp_list,
                                   &has_upper_latin1_only_utf8_matches);
@@ -18884,7 +18893,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 
     PERL_ARGS_ASSERT_REGPROP;
 
-    sv_setpvn(sv, "", 0);
+    sv_setpvs(sv, "");
 
     if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
        /* It would be nice to FAIL() here, but this may be called from