This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Allow more debugging in re_comp.c
authorKarl Williamson <khw@cpan.org>
Mon, 24 Feb 2020 19:13:16 +0000 (12:13 -0700)
committerKarl Williamson <khw@cpan.org>
Mon, 2 Mar 2020 18:45:21 +0000 (11:45 -0700)
This adds two main functions that were previously only defined in
regcomp.c to also be defined in re_comp.c.  This allows re.pm to use
debugging with them.   To avoid duplicating large data structures,
several lightweight wrapper functions are added to regcomp.c that
re_comp.c calls to access those structures.

embed.fnc
embed.h
proto.h
regcomp.c

index 30829b3..3b57471 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1864,6 +1864,10 @@ EXp      |I32|reg_numbered_buff_length|NN REGEXP * const rx|NN const SV * const sv|co
 
 : FIXME - is anything in re using this now?
 EXp    |SV*|reg_qr_package|NN REGEXP * const rx
+EXpRT  |I16    |do_uniprop_match|NN const char * const key|const U16 key_len
+EXpRT  |const char * const *|get_prop_values|const int table_index
+EXpR   |SV *   |get_prop_definition|const int table_index
+EXpRT  |const char *|get_deprecated_property_msg|const Size_t warning_offset
 #if defined(PERL_IN_REGCOMP_C)
 ERS    |REGEXP*|re_op_compile_wrapper|NN SV * const pattern|U32 orig_rx_flags|const U32 pm_flags
 EiRT   |bool   |invlist_is_iterating|NN SV* const invlist
@@ -1911,6 +1915,30 @@ ES       |regnode_offset|regclass|NN RExC_state_t *pRExC_state                 \
                                |const bool strict                            \
                                |bool optimizable                             \
                                |NULLOK SV** ret_invlist
+ES     |SV *   |parse_uniprop_string|NN const char * const name            \
+                                    |Size_t name_len                       \
+                                    |const bool is_utf8                    \
+                                    |const bool to_fold                    \
+                                    |const bool runtime                    \
+                                    |const bool deferrable                 \
+                                    |NN bool * user_defined_ptr            \
+                                    |NN SV * msg                           \
+                                    |const STRLEN level
+ES     |SV *   |handle_user_defined_property|NN const char * name          \
+                                            |const STRLEN name_len         \
+                                            |const bool is_utf8            \
+                                            |const bool to_fold            \
+                                            |const bool runtime            \
+                                            |const bool deferrable         \
+                                            |NN SV* contents               \
+                                            |NN bool *user_defined_ptr     \
+                                            |NN SV * msg                   \
+                                            |const STRLEN level
+ERS    |REGEXP*|compile_wildcard|NN const char * name|const STRLEN len     \
+                                |const bool ignore_case
+ES     |I32    |execute_wildcard|NN REGEXP * const prog|NN char* stringarg \
+                               |NN char* strend|NN char* strbeg \
+                               |SSize_t minend |NN SV* screamer|U32 nosave
 ES     |void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
                                |NN SV** invlist
 Ei     |regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state   \
@@ -2021,30 +2049,6 @@ ETSR     |int    |edit_distance  |NN const UV *src                   \
                                |const STRLEN x                     \
                                |const STRLEN y                     \
                                |const SSize_t maxDistance
-EpX    |SV *   |parse_uniprop_string|NN const char * const name            \
-                                    |Size_t name_len                       \
-                                    |const bool is_utf8                    \
-                                    |const bool to_fold                    \
-                                    |const bool runtime                    \
-                                    |const bool deferrable                 \
-                                    |NN bool * user_defined_ptr            \
-                                    |NN SV * msg                           \
-                                    |const STRLEN level
-EXp    |SV *   |handle_user_defined_property|NN const char * name          \
-                                            |const STRLEN name_len         \
-                                            |const bool is_utf8            \
-                                            |const bool to_fold            \
-                                            |const bool runtime            \
-                                            |const bool deferrable         \
-                                            |NN SV* contents               \
-                                            |NN bool *user_defined_ptr     \
-                                            |NN SV * msg                   \
-                                            |const STRLEN level
-ERS    |REGEXP*|compile_wildcard|NN const char * name|const STRLEN len     \
-                                |const bool ignore_case
-ES     |I32    |execute_wildcard|NN REGEXP * const prog|NN char* stringarg \
-                               |NN char* strend|NN char* strbeg \
-                               |SSize_t minend |NN SV* screamer|U32 nosave
 #  ifdef DEBUGGING
 EFp    |int    |re_indentf     |NN const char *fmt|U32 depth|...
 ES     |void        |regdump_intflags|NULLOK const char *lead| const U32 flags
diff --git a/embed.h b/embed.h
index 603638f..4960ad6 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define cntrl_to_mnemonic      Perl_cntrl_to_mnemonic
 #define current_re_engine()    Perl_current_re_engine(aTHX)
 #define cv_ckproto_len_flags(a,b,c,d,e)        Perl_cv_ckproto_len_flags(aTHX_ a,b,c,d,e)
+#define do_uniprop_match       Perl_do_uniprop_match
 #define get_and_check_backslash_N_name(a,b,c,d)        Perl_get_and_check_backslash_N_name(aTHX_ a,b,c,d)
+#define get_deprecated_property_msg    Perl_get_deprecated_property_msg
+#define get_prop_definition(a) Perl_get_prop_definition(aTHX_ a)
+#define get_prop_values                Perl_get_prop_values
 #define grok_atoUV             Perl_grok_atoUV
 #define load_charnames(a,b,c,d)        Perl_load_charnames(aTHX_ a,b,c,d)
 #define mg_find_mglob(a)       Perl_mg_find_mglob(aTHX_ a)
 #define handle_named_backref(a,b,c,d)  S_handle_named_backref(aTHX_ a,b,c,d)
 #define handle_possible_posix(a,b,c,d,e)       S_handle_possible_posix(aTHX_ a,b,c,d,e)
 #define handle_regex_sets(a,b,c,d,e)   S_handle_regex_sets(aTHX_ a,b,c,d,e)
-#define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j)      Perl_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j)
+#define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j)      S_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j)
 #define invlist_contents(a,b)  S_invlist_contents(aTHX_ a,b)
 #define invlist_is_iterating   S_invlist_is_iterating
 #define invlist_lowest         S_invlist_lowest
 #define nextchar(a)            S_nextchar(aTHX_ a)
 #define output_posix_warnings(a,b)     S_output_posix_warnings(aTHX_ a,b)
 #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
-#define parse_uniprop_string(a,b,c,d,e,f,g,h,i)        Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i)
+#define parse_uniprop_string(a,b,c,d,e,f,g,h,i)        S_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i)
 #define populate_ANYOF_from_invlist(a,b)       S_populate_ANYOF_from_invlist(aTHX_ a,b)
 #define re_op_compile_wrapper(a,b,c)   S_re_op_compile_wrapper(aTHX_ a,b,c)
 #define reg(a,b,c,d)           S_reg(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h
index c825c07..d4c1972 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -938,6 +938,11 @@ PERL_CALLCONV Off_t        Perl_do_tell(pTHX_ GV* gv)
 PERL_CALLCONV Size_t   Perl_do_trans(pTHX_ SV* sv);
 #define PERL_ARGS_ASSERT_DO_TRANS      \
        assert(sv)
+PERL_CALLCONV I16      Perl_do_uniprop_match(const char * const key, const U16 key_len)
+                       __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_DO_UNIPROP_MATCH      \
+       assert(key)
+
 PERL_CALLCONV UV       Perl_do_vecget(pTHX_ SV* sv, STRLEN offset, int size);
 #define PERL_ARGS_ASSERT_DO_VECGET     \
        assert(sv)
@@ -1099,6 +1104,10 @@ PERL_CALLCONV CV*        Perl_get_cvn_flags(pTHX_ const char* name, STRLEN len, I32 fla
 PERL_CALLCONV void     Perl_get_db_sub(pTHX_ SV **svp, CV *cv);
 #define PERL_ARGS_ASSERT_GET_DB_SUB    \
        assert(cv)
+PERL_CALLCONV const char *     Perl_get_deprecated_property_msg(const Size_t warning_offset)
+                       __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_GET_DEPRECATED_PROPERTY_MSG
+
 PERL_CALLCONV void     Perl_get_hash_seed(pTHX_ unsigned char * const seed_buffer);
 #define PERL_ARGS_ASSERT_GET_HASH_SEED \
        assert(seed_buffer)
@@ -1130,6 +1139,14 @@ PERL_CALLCONV PPADDR_t*  Perl_get_ppaddr(pTHX)
                        __attribute__pure__;
 #define PERL_ARGS_ASSERT_GET_PPADDR
 
+PERL_CALLCONV SV *     Perl_get_prop_definition(pTHX_ const int table_index)
+                       __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_GET_PROP_DEFINITION
+
+PERL_CALLCONV const char * const *     Perl_get_prop_values(const int table_index)
+                       __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_GET_PROP_VALUES
+
 PERL_CALLCONV REGEXP * Perl_get_re_arg(pTHX_ SV *sv);
 #define PERL_ARGS_ASSERT_GET_RE_ARG
 PERL_CALLCONV SV*      Perl_get_sv(pTHX_ const char *name, I32 flags);
@@ -5621,7 +5638,7 @@ STATIC int        S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char*
 STATIC regnode_offset  S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
 #define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS     \
        assert(pRExC_state); assert(flagp); assert(oregcomp_parse)
-PERL_CALLCONV SV *     Perl_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level);
+STATIC SV *    S_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level);
 #define PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY  \
        assert(name); assert(contents); assert(user_defined_ptr); assert(msg)
 #ifndef PERL_NO_INLINE_FUNCTIONS
@@ -5673,7 +5690,7 @@ STATIC void       S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_w
 STATIC void    S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state);
 #define PERL_ARGS_ASSERT_PARSE_LPAREN_QUESTION_FLAGS   \
        assert(pRExC_state)
-PERL_CALLCONV SV *     Perl_parse_uniprop_string(pTHX_ const char * const name, Size_t name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, bool * user_defined_ptr, SV * msg, const STRLEN level);
+STATIC SV *    S_parse_uniprop_string(pTHX_ const char * const name, Size_t name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, bool * user_defined_ptr, SV * msg, const STRLEN level);
 #define PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING  \
        assert(name); assert(user_defined_ptr); assert(msg)
 STATIC void    S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr);
index c9d34db..fbd0df2 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -22942,6 +22942,43 @@ Perl_init_uniprops(pTHX)
 #  endif
 }
 
+/* These four functions are compiled only in regcomp.c, where they have access
+ * to the data they return.  They are a way for re_comp.c to get access to that
+ * data without having to compile the whole data structures. */
+
+I16
+Perl_do_uniprop_match(const char * const key, const U16 key_len)
+{
+    PERL_ARGS_ASSERT_DO_UNIPROP_MATCH;
+
+    return match_uniprop((U8 *) key, key_len);
+}
+
+SV *
+Perl_get_prop_definition(pTHX_ const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_DEFINITION;
+
+    /* Create and return the inversion list */
+    return _new_invlist_C_array(uni_prop_ptrs[table_index]);
+}
+
+const char * const *
+Perl_get_prop_values(const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_VALUES;
+
+    return UNI_prop_value_ptrs[table_index];
+}
+
+const char *
+Perl_get_deprecated_property_msg(const Size_t warning_offset)
+{
+    PERL_ARGS_ASSERT_GET_DEPRECATED_PROPERTY_MSG;
+
+    return deprecated_property_msgs[warning_offset];
+}
+
 #  if 0
 
 This code was mainly added for backcompat to give a warning for non-portable
@@ -23011,10 +23048,8 @@ S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
     return result;
 }
 
-#ifndef PERL_IN_XSUB_RE
-
 SV *
-Perl_handle_user_defined_property(pTHX_
+S_handle_user_defined_property(pTHX_
 
     /* Parses the contents of a user-defined property definition; returning the
      * expanded definition if possible.  If so, the return is an inversion
@@ -23388,8 +23423,8 @@ S_get_fq_name(pTHX_
     return fq_name;
 }
 
-SV *
-Perl_parse_uniprop_string(pTHX_
+STATIC SV *
+S_parse_uniprop_string(pTHX_
 
     /* Parse the interior of a \p{}, \P{}.  Returns its definition if knowable
      * now.  If so, the return is an inversion list.
@@ -23605,10 +23640,10 @@ Perl_parse_uniprop_string(pTHX_
         {
             /* Find the property.  The table includes the equals sign, so we
              * use 'j' as-is */
-            table_index = match_uniprop((U8 *) lookup_name, j);
+            table_index = do_uniprop_match(lookup_name, j);
             if (table_index) {
                 const char * const * prop_values
-                                            = UNI_prop_value_ptrs[table_index];
+                                                = get_prop_values(table_index);
                 REGEXP * subpattern_re;
                 char open = name[i++];
                 char close;
@@ -24401,7 +24436,7 @@ Perl_parse_uniprop_string(pTHX_
 
     /* Get the index into our pointer table of the inversion list corresponding
      * to the property */
-    table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+    table_index = do_uniprop_match(lookup_name, lookup_len);
 
     /* If it didn't find the property ... */
     if (table_index == 0) {
@@ -24416,7 +24451,7 @@ Perl_parse_uniprop_string(pTHX_
             equals_pos -= 2;
             slash_pos -= 2;
 
-            table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+            table_index = do_uniprop_match(lookup_name, lookup_len);
         }
 
         if (table_index == 0) {
@@ -24580,7 +24615,7 @@ Perl_parse_uniprop_string(pTHX_
             }
 
             /* Here, we have the number in canonical form.  Try that */
-            table_index = match_uniprop((U8 *) canonical, strlen(canonical));
+            table_index = do_uniprop_match(canonical, strlen(canonical));
             if (table_index == 0) {
                 goto failed;
             }
@@ -24604,7 +24639,8 @@ Perl_parse_uniprop_string(pTHX_
         table_index %= MAX_UNI_KEYWORD_INDEX;
         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
                 "Use of '%.*s' in \\p{} or \\P{} is deprecated because: %s",
-                (int) name_len, name, deprecated_property_msgs[warning_offset]);
+                (int) name_len, name,
+                get_deprecated_property_msg(warning_offset));
     }
 
     /* In a few properties, a different property is used under /i.  These are
@@ -24632,10 +24668,9 @@ Perl_parse_uniprop_string(pTHX_
     }
 
     /* Create and return the inversion list */
-    prop_definition =_new_invlist_C_array(uni_prop_ptrs[table_index]);
+    prop_definition = get_prop_definition(table_index);
     sv_2mortal(prop_definition);
 
-
     /* See if there is a private use override to add to this definition */
     {
         COPHH * hinthash = (IN_PERL_COMPILETIME)
@@ -24755,8 +24790,6 @@ Perl_parse_uniprop_string(pTHX_
     }
 }
 
-#endif /* end of ! PERL_IN_XSUB_RE */
-
 /*
  * ex: set ts=8 sts=4 sw=4 et:
  */