This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Eliminate RF_tainted flag from PL_reg_flags
authorDavid Mitchell <davem@iabyn.com>
Tue, 25 Dec 2012 20:51:50 +0000 (20:51 +0000)
committerDavid Mitchell <davem@iabyn.com>
Tue, 25 Dec 2012 20:51:50 +0000 (20:51 +0000)
This global flag is cleared at the start of execution, and then set if
any locale-based nodes are executed. At the end of execution, the
RXf_TAINTED_SEEN flag on the regex is set/cleared based on RF_tainted.

We eliminate RF_tainted by simply directly setting RXf_TAINTED_SEEN
each time a taintable node is executed.

This is the final step before eliminating PL_reg_flags.

embed.fnc
pp_hot.c
proto.h
regexec.c
regexp.h

index 97e16ba..a2c77b2 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2032,11 +2032,11 @@ Es      |U8     |regtail_study  |NN struct RExC_state_t *pRExC_state \
 ERs    |bool   |isFOO_lc       |const U8 classnum|const U8 character
 ERs    |bool   |isFOO_utf8_lc  |const U8 classnum|NN const U8* character
 ERs    |I32    |regmatch       |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
 ERs    |bool   |isFOO_lc       |const U8 classnum|const U8 character
 ERs    |bool   |isFOO_utf8_lc  |const U8 classnum|NN const U8* character
 ERs    |I32    |regmatch       |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
-ERs    |I32    |regrepeat      |NN const regexp *prog|NN char **startposp \
+ERs    |I32    |regrepeat      |NN regexp *prog|NN char **startposp \
                                |NN const regnode *p|I32 max|int depth \
                                |bool is_utf8_pat
 ERs    |I32    |regtry         |NN regmatch_info *reginfo|NN char **startposp
                                |NN const regnode *p|I32 max|int depth \
                                |bool is_utf8_pat
 ERs    |I32    |regtry         |NN regmatch_info *reginfo|NN char **startposp
-ERs    |bool   |reginclass     |NULLOK const regexp * const prog|NN const regnode * const n|NN const U8 * const p\
+ERs    |bool   |reginclass     |NULLOK regexp * const prog|NN const regnode * const n|NN const U8 * const p\
                                |bool const utf8_target
 Es     |CHECKPOINT|regcppush   |NN const regexp *rex|I32 parenfloor\
                                |U32 maxopenparen
                                |bool const utf8_target
 Es     |CHECKPOINT|regcppush   |NN const regexp *rex|I32 parenfloor\
                                |U32 maxopenparen
index 33f8669..be634a3 100644 (file)
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2084,12 +2084,11 @@ the pattern is marked as tainted. This means that subsequent usage, such
 as /x$r/, will set PL_tainted using TAINT_set, and thus RXf_TAINTED,
 on the new pattern too.
 
 as /x$r/, will set PL_tainted using TAINT_set, and thus RXf_TAINTED,
 on the new pattern too.
 
-During execution of a pattern, locale-variant ops such as ALNUML set the
-local flag RF_tainted. At the end of execution, the engine sets the
-RXf_TAINTED_SEEN on the pattern if RF_tainted got set, or clears it
-otherwise.
+At the start of execution of a pattern, the RXf_TAINTED_SEEN flag on the
+regex is cleared; during execution, locale-variant ops such as ALNUML may
+set RXf_TAINTED_SEEN.
 
 
-In addition, RXf_TAINTED_SEEN is used post-execution by the get magic code
+RXf_TAINTED_SEEN is used post-execution by the get magic code
 of $1 et al to indicate whether the returned value should be tainted.
 It is the responsibility of the caller of the pattern (i.e. pp_match,
 pp_subst etc) to set this flag for any other circumstances where $1 needs
 of $1 et al to indicate whether the returned value should be tainted.
 It is the responsibility of the caller of the pattern (i.e. pp_match,
 pp_subst etc) to set this flag for any other circumstances where $1 needs
diff --git a/proto.h b/proto.h
index a962d32..1151167 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6884,7 +6884,7 @@ STATIC U8*        S_reghopmaybe3(U8 *s, I32 off, const U8 *lim)
 #define PERL_ARGS_ASSERT_REGHOPMAYBE3  \
        assert(s); assert(lim)
 
 #define PERL_ARGS_ASSERT_REGHOPMAYBE3  \
        assert(s); assert(lim)
 
-STATIC bool    S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8 * const p, bool const utf8_target)
+STATIC bool    S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8 * const p, bool const utf8_target)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_2)
                        __attribute__nonnull__(pTHX_3);
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_2)
                        __attribute__nonnull__(pTHX_3);
@@ -6899,7 +6899,7 @@ STATIC I32        S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *pro
 #define PERL_ARGS_ASSERT_REGMATCH      \
        assert(reginfo); assert(startpos); assert(prog)
 
 #define PERL_ARGS_ASSERT_REGMATCH      \
        assert(reginfo); assert(startpos); assert(prog)
 
-STATIC I32     S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth, bool is_utf8_pat)
+STATIC I32     S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, I32 max, int depth, bool is_utf8_pat)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2)
index 104e1d2..02d2b19 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -93,8 +93,6 @@ static const char* const non_utf8_target_but_utf8_required
 #include "inline_invlist.c"
 #include "unicode_constants.h"
 
 #include "inline_invlist.c"
 #include "unicode_constants.h"
 
-#define RF_tainted     1       /* tainted information used? e.g. locale */
-
 #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
 
 #ifndef STATIC
 #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
 
 #ifndef STATIC
@@ -1616,13 +1614,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         break;
     }
     case BOUNDL:
         break;
     }
     case BOUNDL:
-        PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
         FBC_BOUND(isALNUM_LC,
                   isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
                   isALNUM_LC_utf8((U8*)s));
         break;
     case NBOUNDL:
         FBC_BOUND(isALNUM_LC,
                   isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
                   isALNUM_LC_utf8((U8*)s));
         break;
     case NBOUNDL:
-        PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
         FBC_NBOUND(isALNUM_LC,
                    isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
                    isALNUM_LC_utf8((U8*)s));
         FBC_NBOUND(isALNUM_LC,
                    isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
                    isALNUM_LC_utf8((U8*)s));
@@ -1671,7 +1669,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         /* FALLTHROUGH */
 
     case POSIXL:
         /* FALLTHROUGH */
 
     case POSIXL:
-        PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
         REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
                         to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
         break;
         REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
                         to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
         break;
@@ -2104,7 +2102,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
        Perl_croak(aTHX_ "corrupted regexp program");
     }
 
        Perl_croak(aTHX_ "corrupted regexp program");
     }
 
-    PL_reg_flags = 0;
+    RX_MATCH_TAINTED_off(rx);
     PL_reg_state.re_state_eval_setup_done = FALSE;
     PL_reg_maxiter = 0;
 
     PL_reg_state.re_state_eval_setup_done = FALSE;
     PL_reg_maxiter = 0;
 
@@ -2590,7 +2588,6 @@ got_it:
            );
     );
     Safefree(swap);
            );
     );
     Safefree(swap);
-    RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
 
     if (PL_reg_state.re_state_eval_setup_done)
        restore_pos(aTHX_ prog);
 
     if (PL_reg_state.re_state_eval_setup_done)
        restore_pos(aTHX_ prog);
@@ -4125,7 +4122,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            const char * s;
            U32 fold_utf8_flags;
 
            const char * s;
            U32 fold_utf8_flags;
 
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
             folder = foldEQ_locale;
             fold_array = PL_fold_locale;
            fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
             folder = foldEQ_locale;
             fold_array = PL_fold_locale;
            fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
@@ -4189,7 +4186,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         * have to set the FLAGS fields of these */
        case BOUNDL:  /*  /\b/l  */
        case NBOUNDL: /*  /\B/l  */
         * have to set the FLAGS fields of these */
        case BOUNDL:  /*  /\b/l  */
        case NBOUNDL: /*  /\B/l  */
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
            /* FALL THROUGH */
        case BOUND:   /*  /\b/   */
        case BOUNDU:  /*  /\b/u  */
            /* FALL THROUGH */
        case BOUND:   /*  /\b/   */
        case BOUNDU:  /*  /\b/u  */
@@ -4296,7 +4293,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
             /* The locale hasn't influenced the outcome before this, so defer
              * tainting until now */
 
             /* The locale hasn't influenced the outcome before this, so defer
              * tainting until now */
-            PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
 
             /* Use isFOO_lc() for characters within Latin1.  (Note that
              * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
 
             /* Use isFOO_lc() for characters within Latin1.  (Note that
              * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
@@ -4670,7 +4667,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            const U8 *fold_array;
            UV utf8_fold_flags;
 
            const U8 *fold_array;
            UV utf8_fold_flags;
 
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
            folder = foldEQ_locale;
            fold_array = PL_fold_locale;
            type = REFFL;
            folder = foldEQ_locale;
            fold_array = PL_fold_locale;
            type = REFFL;
@@ -4715,7 +4712,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            goto do_nref_ref_common;
 
        case REFFL:  /*  /\1/il  */
            goto do_nref_ref_common;
 
        case REFFL:  /*  /\1/il  */
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
            folder = foldEQ_locale;
            fold_array = PL_fold_locale;
            utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
            folder = foldEQ_locale;
            fold_array = PL_fold_locale;
            utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
@@ -6639,7 +6636,7 @@ no_silent:
  * depth     - (for debugging) backtracking depth.
  */
 STATIC I32
  * depth     - (for debugging) backtracking depth.
  */
 STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p,
+S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                 I32 max, int depth, bool is_utf8_pat)
 {
     dVAR;
                 I32 max, int depth, bool is_utf8_pat)
 {
     dVAR;
@@ -6788,7 +6785,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p,
        goto do_exactf;
 
     case EXACTFL:
        goto do_exactf;
 
     case EXACTFL:
-       PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
        utf8_flags = FOLDEQ_UTF8_LOCALE;
        goto do_exactf;
 
        utf8_flags = FOLDEQ_UTF8_LOCALE;
        goto do_exactf;
 
@@ -6882,7 +6879,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p,
         /* FALLTHROUGH */
 
     case POSIXL:
         /* FALLTHROUGH */
 
     case POSIXL:
-       PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
        if (! utf8_target) {
            while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                    *scan)))
        if (! utf8_target) {
            while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                    *scan)))
@@ -7269,7 +7266,7 @@ S_core_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit
  */
 
 STATIC bool
  */
 
 STATIC bool
-S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
+S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
 {
     dVAR;
     const char flags = ANYOF_FLAGS(n);
 {
     dVAR;
     const char flags = ANYOF_FLAGS(n);
@@ -7302,7 +7299,7 @@ S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8*
            match = TRUE;
        }
        else if (flags & ANYOF_LOCALE) {
            match = TRUE;
        }
        else if (flags & ANYOF_LOCALE) {
-           PL_reg_flags |= RF_tainted;
+           RXp_MATCH_TAINTED_on(prog);
 
            if ((flags & ANYOF_LOC_FOLD)
                 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
 
            if ((flags & ANYOF_LOC_FOLD)
                 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
index 3863308..2c7eb87 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -438,16 +438,22 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
 #if NO_TAINT_SUPPORT
 #   define RX_ISTAINTED(prog)    0
 #   define RX_TAINT_on(prog)     NOOP
 #if NO_TAINT_SUPPORT
 #   define RX_ISTAINTED(prog)    0
 #   define RX_TAINT_on(prog)     NOOP
+#   define RXp_MATCH_TAINTED(prog) 0
+#   define RX_MATCH_TAINTED(prog)  0
+#   define RXp_MATCH_TAINTED_on(prog) NOOP
+#   define RX_MATCH_TAINTED_on(prog)  NOOP
+#   define RX_MATCH_TAINTED_off(prog) NOOP
 #else
 #   define RX_ISTAINTED(prog)    (RX_EXTFLAGS(prog) & RXf_TAINTED)
 #   define RX_TAINT_on(prog)     (RX_EXTFLAGS(prog) |= RXf_TAINTED)
 #else
 #   define RX_ISTAINTED(prog)    (RX_EXTFLAGS(prog) & RXf_TAINTED)
 #   define RX_TAINT_on(prog)     (RX_EXTFLAGS(prog) |= RXf_TAINTED)
+#   define RXp_MATCH_TAINTED(prog)    (RXp_EXTFLAGS(prog) & RXf_TAINTED_SEEN)
+#   define RX_MATCH_TAINTED(prog)     (RX_EXTFLAGS(prog)  & RXf_TAINTED_SEEN)
+#   define RXp_MATCH_TAINTED_on(prog) (RXp_EXTFLAGS(prog) |= RXf_TAINTED_SEEN)
+#   define RX_MATCH_TAINTED_on(prog)  (RX_EXTFLAGS(prog)  |= RXf_TAINTED_SEEN)
+#   define RX_MATCH_TAINTED_off(prog) (RX_EXTFLAGS(prog)  &= ~RXf_TAINTED_SEEN)
 #endif
 
 #define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN)
 #endif
 
 #define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN)
-#define RXp_MATCH_TAINTED(prog)        (RXp_EXTFLAGS(prog) & RXf_TAINTED_SEEN)
-#define RX_MATCH_TAINTED(prog) (RX_EXTFLAGS(prog) & RXf_TAINTED_SEEN)
-#define RX_MATCH_TAINTED_on(prog) (RX_EXTFLAGS(prog) |= RXf_TAINTED_SEEN)
-#define RX_MATCH_TAINTED_off(prog) (RX_EXTFLAGS(prog) &= ~RXf_TAINTED_SEEN)
 #define RX_MATCH_TAINTED_set(prog, t) ((t) \
                                       ? RX_MATCH_TAINTED_on(prog) \
                                       : RX_MATCH_TAINTED_off(prog))
 #define RX_MATCH_TAINTED_set(prog, t) ((t) \
                                       ? RX_MATCH_TAINTED_on(prog) \
                                       : RX_MATCH_TAINTED_off(prog))