From 4c404f263914b5bf989d64b86ad715cc085b84a0 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 21 Aug 2018 22:27:19 -0600 Subject: [PATCH] Remove relics of regex swash use This removes the most obvious and easy things that are no longer needed since regexes no longer use swashes at all. tr/// continues, for the time being, to use swashes, so not all swash handling is removable now. But tr/// doesn't use inversion lists, and so a bunch of code is ripped out here. Other code could have been, but I did only the relatively easy stuff. The rest can be ripped out all at once when tr/// is stops using swashes. --- embed.fnc | 17 +- embed.h | 9 +- lib/utf8_heavy.pl | 3 - proto.h | 23 +-- regcomp.c | 199 ++++----------------- regexec.c | 35 +--- utf8.c | 511 +++--------------------------------------------------- utf8.h | 5 - 8 files changed, 63 insertions(+), 739 deletions(-) diff --git a/embed.fnc b/embed.fnc index 9b34cfa..07a38a1 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1343,9 +1343,6 @@ Apmb |OP* |ref |NULLOK OP* o|I32 type s |OP* |refkids |NULLOK OP* o|I32 type #endif Ap |void |regdump |NN const regexp* r -ApM |SV* |regclass_swash |NULLOK const regexp *prog \ - |NN const struct regnode *node|bool doinit \ - |NULLOK SV **listsvp|NULLOK SV **altsvp #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C) EXpR |SV* |_new_invlist_C_array|NN const UV* const list EXMp |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b @@ -1735,19 +1732,12 @@ EXpM |void |_invlist_union_maybe_complement_2nd \ EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result EXpM |void |_invlist_invert|NN SV* const invlist EXMpR |SV* |_new_invlist |IV initial_size -EXMpR |SV* |_swash_to_invlist |NN SV* const swash EXMpR |SV* |_add_range_to_invlist |NULLOK SV* invlist|UV start|UV end EXMpR |SV* |_setup_canned_invlist|const STRLEN size|const UV element0|NN UV** other_elements_ptr -EXMpn |void |_invlist_populate_swatch |NN SV* const invlist|const UV start|const UV end|NN U8* swatch #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C) EMpX |SV* |invlist_clone |NN SV* const invlist|NULLOK SV* newlist #endif -#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C) -EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name \ - |NN SV* listsv|I32 minbits|I32 none \ - |NULLOK SV* invlist|NULLOK U8* const flags_p -#endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) EiMRn |UV* |invlist_array |NN SV* const invlist EiMRn |bool |is_invlist |NN SV* const invlist @@ -1755,7 +1745,6 @@ EiMRn |bool* |get_invlist_offset_addr|NN SV* invlist EiMRn |UV |_invlist_len |NN SV* const invlist EMiRn |bool |_invlist_contains_cp|NN SV* const invlist|const UV cp EXpMRn |SSize_t|_invlist_search |NN SV* const invlist|const UV cp -EXMpR |SV* |_get_swash_invlist|NN SV* const swash #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) EXpM |SV* |_get_regclass_nonbitmap_data \ @@ -2385,10 +2374,8 @@ Es |regnode_offset|regbranch |NN RExC_state_t *pRExC_state \ Es |void |set_ANYOF_arg |NN RExC_state_t* const pRExC_state \ |NN regnode* const node \ |NULLOK SV* const cp_list \ - |NULLOK SV* const runtime_defns \ - |NULLOK SV* const only_utf8_locale_list \ - |NULLOK SV* const swash \ - |const bool has_user_defined_property + |NULLOK SV* const runtime_defns \ + |NULLOK SV* const only_utf8_locale_list Es |void |output_posix_warnings \ |NN RExC_state_t *pRExC_state \ |NN AV* posix_warnings diff --git a/embed.h b/embed.h index 6c9984a..fa1a376 100644 --- a/embed.h +++ b/embed.h @@ -631,7 +631,6 @@ #define reg_named_buff_firstkey(a,b) Perl_reg_named_buff_firstkey(aTHX_ a,b) #define reg_named_buff_nextkey(a,b) Perl_reg_named_buff_nextkey(aTHX_ a,b) #define reg_named_buff_scalar(a,b) Perl_reg_named_buff_scalar(aTHX_ a,b) -#define regclass_swash(a,b,c,d,e) Perl_regclass_swash(aTHX_ a,b,c,d,e) #define regdump(a) Perl_regdump(aTHX_ a) #define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h) #define regfree_internal(a) Perl_regfree_internal(aTHX_ a) @@ -1219,7 +1218,7 @@ #define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c) #define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d) #define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d) -#define set_ANYOF_arg(a,b,c,d,e,f,g) S_set_ANYOF_arg(aTHX_ a,b,c,d,e,f,g) +#define set_ANYOF_arg(a,b,c,d,e) S_set_ANYOF_arg(aTHX_ a,b,c,d,e) #define set_regex_pv(a,b) S_set_regex_pv(aTHX_ a,b) #define skip_to_be_ignored_text(a,b,c) S_skip_to_be_ignored_text(aTHX_ a,b,c) #define ssc_add_range(a,b,c) S_ssc_add_range(aTHX_ a,b,c) @@ -1251,7 +1250,6 @@ #define regprop(a,b,c,d,e) Perl_regprop(aTHX_ a,b,c,d,e) # endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) -#define _get_swash_invlist(a) Perl__get_swash_invlist(aTHX_ a) #define _invlist_contains_cp S__invlist_contains_cp #define _invlist_len S__invlist_len #define _invlist_search Perl__invlist_search @@ -1259,9 +1257,6 @@ #define invlist_array S_invlist_array #define is_invlist S_is_invlist # endif -# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C) -#define _core_swash_init(a,b,c,d,e,f,g) Perl__core_swash_init(aTHX_ a,b,c,d,e,f,g) -# endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C) #define invlist_clone(a,b) Perl_invlist_clone(aTHX_ a,b) # endif @@ -1276,11 +1271,9 @@ #define _add_range_to_invlist(a,b,c) Perl__add_range_to_invlist(aTHX_ a,b,c) #define _invlist_intersection_maybe_complement_2nd(a,b,c,d) Perl__invlist_intersection_maybe_complement_2nd(aTHX_ a,b,c,d) #define _invlist_invert(a) Perl__invlist_invert(aTHX_ a) -#define _invlist_populate_swatch Perl__invlist_populate_swatch #define _invlist_union_maybe_complement_2nd(a,b,c,d) Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d) #define _new_invlist(a) Perl__new_invlist(aTHX_ a) #define _setup_canned_invlist(a,b,c) Perl__setup_canned_invlist(aTHX_ a,b,c) -#define _swash_to_invlist(a) Perl__swash_to_invlist(aTHX_ a) # endif # if defined(PERL_IN_REGEXEC_C) #define advance_one_LB(a,b,c) S_advance_one_LB(aTHX_ a,b,c) diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 8882cf4..22cee9e 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -75,9 +75,6 @@ sub _loose_name ($) { ## ## Callers of swash_init: ## op.c:pmtrans -- for tr/// and y/// - ## regexec.c:regclass_swash -- for /[]/, \p, and \P - ## utf8.c:is_utf8_common -- for common Unicode properties - ## utf8.c:S__to_utf8_case -- for lc, uc, ucfirst, etc. and //i ## Unicode::UCD::prop_invlist ## Unicode::UCD::prop_invmap ## diff --git a/proto.h b/proto.h index d2e0457..680733c 100644 --- a/proto.h +++ b/proto.h @@ -2862,9 +2862,6 @@ PERL_CALLCONV SV* Perl_reg_qr_package(pTHX_ REGEXP * const rx); PERL_CALLCONV REGEXP* Perl_reg_temp_copy(pTHX_ REGEXP* dsv, REGEXP* ssv); #define PERL_ARGS_ASSERT_REG_TEMP_COPY \ assert(ssv) -PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ const regexp *prog, const struct regnode *node, bool doinit, SV **listsvp, SV **altsvp); -#define PERL_ARGS_ASSERT_REGCLASS_SWASH \ - assert(node) PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r); #define PERL_ARGS_ASSERT_REGDUMP \ assert(r) @@ -5564,7 +5561,7 @@ STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf); #define PERL_ARGS_ASSERT_SCAN_COMMIT \ assert(pRExC_state); assert(data); assert(minlenp) -STATIC void S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, SV* const only_utf8_locale_list, SV* const swash, const bool has_user_defined_property); +STATIC void S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, SV* const only_utf8_locale_list); #define PERL_ARGS_ASSERT_SET_ANYOF_ARG \ assert(pRExC_state); assert(node) STATIC void S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx); @@ -5654,11 +5651,6 @@ PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* assert(sv); assert(o) #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) -PERL_CALLCONV SV* Perl__get_swash_invlist(pTHX_ SV* const swash) - __attribute__warn_unused_result__; -#define PERL_ARGS_ASSERT__GET_SWASH_INVLIST \ - assert(swash) - #ifndef PERL_NO_INLINE_FUNCTIONS PERL_STATIC_INLINE bool S__invlist_contains_cp(SV* const invlist, const UV cp) __attribute__warn_unused_result__; @@ -5700,11 +5692,6 @@ PERL_STATIC_INLINE bool S_is_invlist(SV* const invlist) #endif #endif -#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C) -PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV* listsv, I32 minbits, I32 none, SV* invlist, U8* const flags_p); -#define PERL_ARGS_ASSERT__CORE_SWASH_INIT \ - assert(pkg); assert(name); assert(listsv) -#endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C) PERL_CALLCONV SV* Perl_invlist_clone(pTHX_ SV* const invlist, SV* newlist); #define PERL_ARGS_ASSERT_INVLIST_CLONE \ @@ -5750,9 +5737,6 @@ PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* con PERL_CALLCONV void Perl__invlist_invert(pTHX_ SV* const invlist); #define PERL_ARGS_ASSERT__INVLIST_INVERT \ assert(invlist) -PERL_CALLCONV void Perl__invlist_populate_swatch(SV* const invlist, const UV start, const UV end, U8* swatch); -#define PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH \ - assert(invlist); assert(swatch) /* PERL_CALLCONV void _invlist_subtract(pTHX_ SV* const a, SV* const b, SV** result); */ /* PERL_CALLCONV void _invlist_union(pTHX_ SV* const a, SV* const b, SV** output); */ PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output); @@ -5766,11 +5750,6 @@ PERL_CALLCONV SV* Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV e #define PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST \ assert(other_elements_ptr) -PERL_CALLCONV SV* Perl__swash_to_invlist(pTHX_ SV* const swash) - __attribute__warn_unused_result__; -#define PERL_ARGS_ASSERT__SWASH_TO_INVLIST \ - assert(swash) - #endif #if defined(PERL_IN_REGEXEC_C) STATIC LB_enum S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target) diff --git a/regcomp.c b/regcomp.c index c7af3fe..41d2582 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2109,8 +2109,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc) populate_ANYOF_from_invlist( (regnode *) ssc, &invlist); - set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, - NULL, NULL, NULL, FALSE); + set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL); /* Make sure is clone-safe */ ssc->invlist = NULL; @@ -9386,100 +9385,6 @@ Perl__invlist_search(SV* const invlist, const UV cp) } void -Perl__invlist_populate_swatch(SV* const invlist, - const UV start, const UV end, U8* swatch) -{ - /* populates a swatch of a swash the same way swatch_get() does in utf8.c, - * but is used when the swash has an inversion list. This makes this much - * faster, as it uses a binary search instead of a linear one. This is - * intimately tied to that function, and perhaps should be in utf8.c, - * except it is intimately tied to inversion lists as well. It assumes - * that is all 0's on input */ - - UV current = start; - const IV len = _invlist_len(invlist); - IV i; - const UV * array; - - PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH; - - if (len == 0) { /* Empty inversion list */ - return; - } - - array = invlist_array(invlist); - - /* Find which element it is */ - i = _invlist_search(invlist, start); - - /* We populate from to */ - while (current < end) { - UV upper; - - /* The inversion list gives the results for every possible code point - * after the first one in the list. Only those ranges whose index is - * even are ones that the inversion list matches. For the odd ones, - * and if the initial code point is not in the list, we have to skip - * forward to the next element */ - if (i == -1 || ! ELEMENT_RANGE_MATCHES_INVLIST(i)) { - i++; - if (i >= len) { /* Finished if beyond the end of the array */ - return; - } - current = array[i]; - if (current >= end) { /* Finished if beyond the end of what we - are populating */ - if (LIKELY(end < UV_MAX)) { - return; - } - - /* We get here when the upper bound is the maximum - * representable on the machine, and we are looking for just - * that code point. Have to special case it */ - i = len; - goto join_end_of_list; - } - } - assert(current >= start); - - /* The current range ends one below the next one, except don't go past - * */ - i++; - upper = (i < len && array[i] < end) ? array[i] : end; - - /* Here we are in a range that matches. Populate a bit in the 3-bit U8 - * for each code point in it */ - for (; current < upper; current++) { - const STRLEN offset = (STRLEN)(current - start); - swatch[offset >> 3] |= 1 << (offset & 7); - } - - join_end_of_list: - - /* Quit if at the end of the list */ - if (i >= len) { - - /* But first, have to deal with the highest possible code point on - * the platform. The previous code assumes that is one - * beyond where we want to populate, but that is impossible at the - * platform's infinity, so have to handle it specially */ - if (UNLIKELY(end == UV_MAX && ELEMENT_RANGE_MATCHES_INVLIST(len-1))) - { - const STRLEN offset = (STRLEN)(end - start); - swatch[offset >> 3] |= 1 << (offset & 7); - } - return; - } - - /* Advance to the next range, which will be for code points not in the - * inversion list */ - current = array[i]; - } - - return; -} - -void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output) { @@ -16554,7 +16459,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * * ANYOF nodes contain a bit map for the first NUM_ANYOF_CODE_POINTS * characters, with the corresponding bit set if that character is in the - * list. For characters above this, a range list or swash is used. There + * list. For characters above this, an inversion list is used. There * are extra bits for \w, etc. in locale ANYOFs, as what these match is not * determinable at compile time * @@ -16603,14 +16508,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool skip_white = cBOOL( ret_invlist || (RExC_flags & RXf_PMf_EXTENDED_MORE)); - /* Unicode properties are stored in a swash; this holds the current one - * being parsed. If this swash is the only above-latin1 component of the - * character class, an optimization is to pass it directly on to the - * execution engine. Otherwise, it is set to NULL to indicate that there - * are other things in the class that have to be dealt with at execution - * time */ - SV* swash = NULL; /* Code points that match \p{} \P{} */ - /* inversion list of code points this node matches only when the target * string is in UTF-8. These are all non-ASCII, < 256. (Because is under * /d) */ @@ -16934,8 +16831,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, { char *e; - SvREFCNT_dec(swash); /* Free any left-overs */ - /* \p means they want Unicode semantics */ REQUIRE_UNI_RULES(flagp, 0); @@ -17075,12 +16970,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, _invlist_union_complement_2nd(properties, prop_definition, &properties); - - /* The swash can't be used as-is, because we've - * inverted things; delay removing it to here after - * have copied its invlist above */ - SvREFCNT_dec(swash); - swash = NULL; } else { _invlist_union(properties, prop_definition, &properties); @@ -17899,8 +17788,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, /* And combine the result (if any) with any inversion lists from posix * classes. The lists are kept separate up to now because we don't want to - * fold the classes (folding of those is automatically handled by the swash - * fetching code) */ + * fold the classes */ if (simple_posixes) { /* These are the classes known to be unaffected by /a, /aa, and /d */ if (cp_list) { @@ -18081,10 +17969,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * folded until runtime */ /* If we didn't do folding, it's because some information isn't available - * until runtime; set the run-time fold flag for these. (We don't have to - * worry about properties folding, as that is taken care of by the swash - * fetching). We know to set the flag if we have a non-NULL list for UTF-8 - * locales, or the class matches at least one 0-255 range code point */ + * until runtime; set the run-time fold flag for these We know to set the + * flag if we have a non-NULL list for UTF-8 locales, or the class matches + * at least one 0-255 range code point */ if (LOC && FOLD) { /* Some things on the list might be unconditionally included because of @@ -18134,18 +18021,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, { _invlist_invert(cp_list); - /* Any swash can't be used as-is, because we've inverted things */ - if (swash) { - SvREFCNT_dec_NN(swash); - swash = NULL; - } - - invert = FALSE; + /* Clear the invert flag since have just done it here */ + invert = FALSE; } if (ret_invlist) { *ret_invlist = cp_list; - SvREFCNT_dec(swash); return RExC_emit; } @@ -18940,23 +18821,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, ANYOF_FLAGS(REGNODE_p(ret)) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP; } - /* If there is a swash and more than one element, we can't use the swash in - * the optimization below. */ - if (swash && element_count > 1) { - SvREFCNT_dec_NN(swash); - swash = NULL; - } - - /* Note that the optimization of using 'swash' if it is the only thing in - * the class doesn't have us change swash at all, so it can include things - * that are also in the bitmap; otherwise we have purposely deleted that - * duplicate information */ set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list, (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION) ? listsv : NULL, - only_utf8_locale_list, - swash, cBOOL(has_runtime_dependency - & HAS_USER_DEFINED_PROPERTY)); + only_utf8_locale_list); return ret; not_anyof: @@ -18977,9 +18845,7 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, - SV* const only_utf8_locale_list, - SV* const swash, - const bool has_user_defined_property) + SV* const only_utf8_locale_list) { /* Sets the arg field of an ANYOF-type node 'node', using information about * the node passed-in. If there is nothing outside the node's bitmap, the @@ -19038,14 +18904,15 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, { /* For internal core use only. - * Returns the swash for the input 'node' in the regex 'prog'. - * If is 'true', will attempt to create the swash if not already - * done. + * Returns the inversion list for the input 'node' in the regex 'prog'. + * If is 'true', will attempt to create the inversion list if not + * already done. * If is non-null, will return the printable contents of the - * swash. This can be used to get debugging information even before the - * swash exists, by calling this function with 'doinit' set to false, in - * which case the components that will be used to eventually create the - * swash are returned (in a printable form). + * property definition. This can be used to get debugging information + * even before the inversion list exists, by calling this function with + * 'doinit' set to false, in which case the components that will be used + * to eventually create the inversion list are returned (in a printable + * form). * If is not NULL, it is where this routine is to * store an inversion list of code points that should match only if the * execution-time locale is a UTF-8 one. @@ -19053,17 +18920,17 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, * inversion list of the code points that would be instead returned in * if this were NULL. Thus, what gets output in * when this parameter is used, is just the non-code point data that - * will go into creating the swash. This currently should be just + * will go into creating the inversion list. This currently should be just * user-defined properties whose definitions were not known at compile * time. Using this parameter allows for easier manipulation of the - * swash's data by the caller. It is illegal to call this function with - * this parameter set, but not + * inversion list's data by the caller. It is illegal to call this + * function with this parameter set, but not * * Tied intimately to how S_set_ANYOF_arg sets up the data structure. Note - * that, in spite of this function's name, the swash it returns may include - * the bitmap data as well */ + * that, in spite of this function's name, the inversion list it returns + * may include the bitmap data as well */ - SV *si = NULL; /* Input swash initialization string */ + SV *si = NULL; /* Input initialization string */ SV* invlist = NULL; RXi_GET_DECL(prog, progi); @@ -19136,15 +19003,15 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, } } - /* If requested, return a printable version of what this swash matches */ + /* If requested, return a printable version of what this ANYOF node matches + * */ if (listsvp) { SV* matches_string = NULL; - /* The swash should be used, if possible, to get the data, as it - * contains the resolved data. But this function can be called at - * compile-time, before everything gets resolved, in which case we - * return the currently best available information, which is the string - * that will eventually be used to do that resolving, 'si' */ + /* This function can be called at compile-time, before everything gets + * resolved, in which case we return the currently best available + * information, which is the string that will eventually be used to do + * that resolving, 'si' */ if (si) { /* Here, we only have 'si' (and possibly some passed-in data in * 'invlist', which is handled below) If the caller only wants @@ -19238,12 +19105,10 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, if (SvCUR(matches_string)) { /* Get rid of trailing blank */ SvCUR_set(matches_string, SvCUR(matches_string) - 1); } - } /* end of has an 'si' but no swash */ + } /* end of has an 'si' */ } - /* If we have a swash in place, its equivalent inversion list was above - * placed into 'invlist'. If not, this variable may contain a stored - * inversion list which is information beyond what is in 'si' */ + /* Add the stuff that's already known */ if (invlist) { /* Again, if the caller doesn't want the output inversion list, put diff --git a/regexec.c b/regexec.c index bd9d255..b612f04 100644 --- a/regexec.c +++ b/regexec.c @@ -504,7 +504,7 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e) * This just calls isFOO_lc on the code point for the character if it is in * the range 0-255. Outside that range, all characters use Unicode * rules, ignoring any locale. So use the Unicode function if this class - * requires a swash, and use the Unicode macro otherwise. */ + * requires an inversion list, and use the Unicode macro otherwise. */ PERL_ARGS_ASSERT_ISFOO_UTF8_LC; @@ -9620,27 +9620,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, return(c); } - -#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) -/* -- regclass_swash - prepare the utf8 swash. Wraps the shared core version to -create a copy so that changes the caller makes won't change the shared one. -If is non-null, will return NULL in it, for back-compat. - */ -SV * -Perl_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV **altsvp) -{ - PERL_ARGS_ASSERT_REGCLASS_SWASH; - - if (altsvp) { - *altsvp = NULL; - } - - return newSVsv(_get_regclass_nonbitmap_data(prog, node, doinit, listsvp, NULL, NULL)); -} - -#endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */ - /* - reginclass - determine if a character falls into a character class @@ -9789,9 +9768,9 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const && IN_UTF8_CTYPE_LOCALE))) { SV* only_utf8_locale = NULL; - SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0, - &only_utf8_locale, NULL); - if (sw) { + SV * const definition = _get_regclass_nonbitmap_data(prog, n, TRUE, + 0, &only_utf8_locale, NULL); + if (definition) { U8 utf8_buffer[2]; U8 * utf8_p; if (utf8_target) { @@ -9808,21 +9787,21 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const && isALPHA_FOLD_EQ(*p, 'i')) { if (*p == 'i') { - if (_invlist_contains_cp(sw, + if (_invlist_contains_cp(definition, LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)) { match = TRUE; } } else if (*p == 'I') { - if (_invlist_contains_cp(sw, + if (_invlist_contains_cp(definition, LATIN_SMALL_LETTER_DOTLESS_I)) { match = TRUE; } } } - else if (_invlist_contains_cp(sw, c)) { + else if (_invlist_contains_cp(definition, c)) { match = TRUE; } } diff --git a/utf8.c b/utf8.c index 6354f85..ff5d4ad 100644 --- a/utf8.c +++ b/utf8.c @@ -4220,81 +4220,43 @@ SV* Perl_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 minbits, I32 none) { - PERL_ARGS_ASSERT_SWASH_INIT; - /* Returns a copy of a swash initiated by the called function. This is the * public interface, and returning a copy prevents others from doing - * mischief on the original */ - - return newSVsv(_core_swash_init(pkg, name, listsv, minbits, none, - NULL, NULL)); -} - -SV* -Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, - I32 minbits, I32 none, SV* invlist, - U8* const flags_p) -{ + * mischief on the original. The only remaining use of this is in tr/// */ /*NOTE NOTE NOTE - If you want to use "return" in this routine you MUST * use the following define */ -#define CORE_SWASH_INIT_RETURN(x) \ +#define SWASH_INIT_RETURN(x) \ PL_curpm= old_PL_curpm; \ - return x + return newSVsv(x) /* Initialize and return a swash, creating it if necessary. It does this - * by calling utf8_heavy.pl in the general case. The returned value may be - * the swash's inversion list instead if the input parameters allow it. - * Which is returned should be immaterial to callers, as the only - * operations permitted on a swash, swash_fetch(), _get_swash_invlist(), - * and swash_to_invlist() handle both these transparently. - * - * This interface should only be used by functions that won't destroy or - * adversely change the swash, as doing so affects all other uses of the - * swash in the program; the general public should use 'Perl_swash_init' - * instead. + * by calling utf8_heavy.pl in the general case. * * pkg is the name of the package that should be in. - * name is the name of the swash to find. Typically it is a Unicode - * property name, including user-defined ones + * name is the name of the swash to find. * listsv is a string to initialize the swash with. It must be of the form * documented as the subroutine return value in * L * minbits is the number of bits required to represent each data element. - * It is '1' for binary properties. * none I (khw) do not understand this one, but it is used only in tr///. - * invlist is an inversion list to initialize the swash with (or NULL) - * flags_p if non-NULL is the address of various input and output flag bits - * to the routine, as follows: ('I' means is input to the routine; - * 'O' means output from the routine. Only flags marked O are - * meaningful on return.) - * _CORE_SWASH_INIT_USER_DEFINED_PROPERTY indicates if the swash - * came from a user-defined property. (I O) - * _CORE_SWASH_INIT_RETURN_IF_UNDEF indicates that instead of croaking - * when the swash cannot be located, to simply return NULL. (I) - * _CORE_SWASH_INIT_ACCEPT_INVLIST indicates that the caller will accept a - * return of an inversion list instead of a swash hash if this routine - * thinks that would result in faster execution of swash_fetch() later - * on. (I) * - * Thus there are three possible inputs to find the swash: , - * , and . At least one must be specified. The result + * Thus there are two possible inputs to find the swash: and + * . At least one must be specified. The result * will be the union of the specified ones, although 's various * actions can intersect, etc. what gives. To avoid going out to * disk at all, should specify completely what the swash should * have, and should be &PL_sv_undef and should be "". - * - * is only valid for binary properties */ + */ PMOP *old_PL_curpm= PL_curpm; /* save away the old PL_curpm */ SV* retval = &PL_sv_undef; - HV* swash_hv = NULL; - const bool use_invlist= (flags_p && *flags_p & _CORE_SWASH_INIT_ACCEPT_INVLIST); - assert(listsv != &PL_sv_undef || strNE(name, "") || invlist); - assert(! invlist || minbits == 1); + PERL_ARGS_ASSERT_SWASH_INIT; + + assert(listsv != &PL_sv_undef || strNE(name, "")); PL_curpm= NULL; /* reset PL_curpm so that we dont get confused between the regex that triggered the swash init and the swash init @@ -4310,7 +4272,6 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, SV* errsv_save; GV *method; - PERL_ARGS_ASSERT__CORE_SWASH_INIT; PUSHSTACKi(PERLSI_MAGIC); ENTER; @@ -4383,115 +4344,10 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, if (IN_PERL_COMPILETIME) { CopHINTS_set(PL_curcop, PL_hints); } - if (!SvROK(retval) || SvTYPE(SvRV(retval)) != SVt_PVHV) { - if (SvPOK(retval)) { - - /* If caller wants to handle missing properties, let them */ - if (flags_p && *flags_p & _CORE_SWASH_INIT_RETURN_IF_UNDEF) { - CORE_SWASH_INIT_RETURN(NULL); - } - Perl_croak(aTHX_ - "Can't find Unicode property definition \"%" SVf "\"", - SVfARG(retval)); - NOT_REACHED; /* NOTREACHED */ - } - } } /* End of calling the module to find the swash */ - /* If this operation fetched a swash, and we will need it later, get it */ - if (retval != &PL_sv_undef - && (minbits == 1 || (flags_p - && ! (*flags_p - & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY)))) - { - swash_hv = MUTABLE_HV(SvRV(retval)); - - /* If we don't already know that there is a user-defined component to - * this swash, and the user has indicated they wish to know if there is - * one (by passing ), find out */ - if (flags_p && ! (*flags_p & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY)) { - SV** user_defined = hv_fetchs(swash_hv, "USER_DEFINED", FALSE); - if (user_defined && SvUV(*user_defined)) { - *flags_p |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY; - } - } - } - - /* Make sure there is an inversion list for binary properties */ - if (minbits == 1) { - SV** swash_invlistsvp = NULL; - SV* swash_invlist = NULL; - bool invlist_in_swash_is_valid = FALSE; - bool swash_invlist_unclaimed = FALSE; /* whether swash_invlist has - an unclaimed reference count */ - - /* If this operation fetched a swash, get its already existing - * inversion list, or create one for it */ - - if (swash_hv) { - swash_invlistsvp = hv_fetchs(swash_hv, "V", FALSE); - if (swash_invlistsvp) { - swash_invlist = *swash_invlistsvp; - invlist_in_swash_is_valid = TRUE; - } - else { - swash_invlist = _swash_to_invlist(retval); - swash_invlist_unclaimed = TRUE; - } - } - - /* If an inversion list was passed in, have to include it */ - if (invlist) { - - /* Any fetched swash will by now have an inversion list in it; - * otherwise will be NULL, indicating that we - * didn't fetch a swash */ - if (swash_invlist) { - - /* Add the passed-in inversion list, which invalidates the one - * already stored in the swash */ - invlist_in_swash_is_valid = FALSE; - SvREADONLY_off(swash_invlist); /* Turned on again below */ - _invlist_union(invlist, swash_invlist, &swash_invlist); - } - else { - - /* Here, there is no swash already. Set up a minimal one, if - * we are going to return a swash */ - if (! use_invlist) { - swash_hv = newHV(); - retval = newRV_noinc(MUTABLE_SV(swash_hv)); - } - swash_invlist = invlist; - } - } - - /* Here, we have computed the union of all the passed-in data. It may - * be that there was an inversion list in the swash which didn't get - * touched; otherwise save the computed one */ - if (! invlist_in_swash_is_valid && ! use_invlist) { - if (! hv_stores(MUTABLE_HV(SvRV(retval)), "V", swash_invlist)) - { - Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed"); - } - /* We just stole a reference count. */ - if (swash_invlist_unclaimed) swash_invlist_unclaimed = FALSE; - else SvREFCNT_inc_simple_void_NN(swash_invlist); - } - - /* The result is immutable. Forbid attempts to change it. */ - SvREADONLY_on(swash_invlist); - - if (use_invlist) { - SvREFCNT_dec(retval); - if (!swash_invlist_unclaimed) - SvREFCNT_inc_simple_void_NN(swash_invlist); - retval = newRV_noinc(swash_invlist); - } - } - - CORE_SWASH_INIT_RETURN(retval); -#undef CORE_SWASH_INIT_RETURN + SWASH_INIT_RETURN(retval); +#undef SWASH_INIT_RETURN } @@ -4814,41 +4670,32 @@ STATIC SV* S_swatch_get(pTHX_ SV* swash, UV start, UV span) { SV *swatch; - U8 *l, *lend, *x, *xend, *s, *send; + U8 *l, *lend, *x, *xend, *s; STRLEN lcur, xcur, scur; HV *const hv = MUTABLE_HV(SvRV(swash)); - SV** const invlistsvp = hv_fetchs(hv, "V", FALSE); SV** listsvp = NULL; /* The string containing the main body of the table */ SV** extssvp = NULL; - SV** invert_it_svp = NULL; U8* typestr = NULL; - STRLEN bits; + STRLEN bits = 0; STRLEN octets; /* if bits == 1, then octets == 0 */ UV none; UV end = start + span; - if (invlistsvp == NULL) { SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE); SV** const nonesvp = hv_fetchs(hv, "NONE", FALSE); SV** const typesvp = hv_fetchs(hv, "TYPE", FALSE); extssvp = hv_fetchs(hv, "EXTRAS", FALSE); listsvp = hv_fetchs(hv, "LIST", FALSE); - invert_it_svp = hv_fetchs(hv, "INVERT_IT", FALSE); bits = SvUV(*bitssvp); none = SvUV(*nonesvp); typestr = (U8*)SvPV_nolen(*typesvp); - } - else { - bits = 1; - none = 0; - } octets = bits >> 3; /* if bits == 1, then octets == 0 */ PERL_ARGS_ASSERT_SWATCH_GET; - if (bits != 1 && bits != 8 && bits != 16 && bits != 32) { + if (bits != 8 && bits != 16 && bits != 32) { Perl_croak(aTHX_ "panic: swatch_get doesn't expect bits %" UVuf, (UV)bits); } @@ -4888,16 +4735,11 @@ S_swatch_get(pTHX_ SV* swash, UV start, UV span) SvCUR_set(swatch, scur); s = (U8*)SvPVX(swatch); - if (invlistsvp) { /* If has an inversion list set up use that */ - _invlist_populate_swatch(*invlistsvp, start, end, s); - return swatch; - } - /* read $swash->{LIST} */ l = (U8*)SvPV(*listsvp, lcur); lend = l + lcur; while (l < lend) { - UV min, max, val, upper; + UV min = 0, max = 0, val = 0, upper; l = swash_scan_list_line(l, lend, &min, &max, &val, cBOOL(octets), typestr); if (l > lend) { @@ -4946,43 +4788,9 @@ S_swatch_get(pTHX_ SV* swash, UV start, UV span) ++val; } } - else { /* bits == 1, then val should be ignored */ - UV key; - if (min < start) - min = start; - - for (key = min; key <= upper; key++) { - const STRLEN offset = (STRLEN)(key - start); - s[offset >> 3] |= 1 << (offset & 7); - } - } } /* while */ - /* Invert if the data says it should be. Assumes that bits == 1 */ - if (invert_it_svp && SvUV(*invert_it_svp)) { - - /* Unicode properties should come with all bits above PERL_UNICODE_MAX - * be 0, and their inversion should also be 0, as we don't succeed any - * Unicode property matches for non-Unicode code points */ - if (start <= PERL_UNICODE_MAX) { - - /* The code below assumes that we never cross the - * Unicode/above-Unicode boundary in a range, as otherwise we would - * have to figure out where to stop flipping the bits. Since this - * boundary is divisible by a large power of 2, and swatches comes - * in small powers of 2, this should be a valid assumption */ - assert(start + span - 1 <= PERL_UNICODE_MAX); - - send = s + scur; - while (s < send) { - *s = ~(*s); - s++; - } - } - } - - /* read $swash->{EXTRAS} - * This code also copied to swash_to_invlist() below */ + /* read $swash->{EXTRAS} */ x = (U8*)SvPV(*extssvp, xcur); xend = x + xcur; while (x < xend) { @@ -5038,34 +4846,7 @@ S_swatch_get(pTHX_ SV* swash, UV start, UV span) Perl_croak(aTHX_ "panic: swatch_get got improper swatch"); s = (U8*)SvPV(swatch, slen); - if (bits == 1 && otherbits == 1) { - if (slen != olen) - Perl_croak(aTHX_ "panic: swatch_get found swatch length " - "mismatch, slen=%" UVuf ", olen=%" UVuf, - (UV)slen, (UV)olen); - - switch (opc) { - case '+': - while (slen--) - *s++ |= *o++; - break; - case '!': - while (slen--) - *s++ |= ~*o++; - break; - case '-': - while (slen--) - *s++ &= ~*o++; - break; - case '&': - while (slen--) - *s++ &= *o++; - break; - default: - break; - } - } - else { + { STRLEN otheroctets = otherbits >> 3; STRLEN offset = 0; U8* const send = s + slen; @@ -5111,265 +4892,13 @@ S_swatch_get(pTHX_ SV* swash, UV start, UV span) *s++ = (U8)((otherval >> 8) & 0xff); *s++ = (U8)( otherval & 0xff); } - } + } } sv_free(other); /* through with it! */ } /* while */ return swatch; } -SV* -Perl__swash_to_invlist(pTHX_ SV* const swash) -{ - - /* Subject to change or removal. For use only in one place in regcomp.c. - * Ownership is given to one reference count in the returned SV* */ - - U8 *l, *lend; - char *loc; - STRLEN lcur; - HV *const hv = MUTABLE_HV(SvRV(swash)); - UV elements = 0; /* Number of elements in the inversion list */ - U8 empty[] = ""; - SV** listsvp; - SV** typesvp; - SV** bitssvp; - SV** extssvp; - SV** invert_it_svp; - - U8* typestr; - STRLEN bits; - STRLEN octets; /* if bits == 1, then octets == 0 */ - U8 *x, *xend; - STRLEN xcur; - - SV* invlist; - - PERL_ARGS_ASSERT__SWASH_TO_INVLIST; - - /* If not a hash, it must be the swash's inversion list instead */ - if (SvTYPE(hv) != SVt_PVHV) { - return SvREFCNT_inc_simple_NN((SV*) hv); - } - - /* The string containing the main body of the table */ - listsvp = hv_fetchs(hv, "LIST", FALSE); - typesvp = hv_fetchs(hv, "TYPE", FALSE); - bitssvp = hv_fetchs(hv, "BITS", FALSE); - extssvp = hv_fetchs(hv, "EXTRAS", FALSE); - invert_it_svp = hv_fetchs(hv, "INVERT_IT", FALSE); - - typestr = (U8*)SvPV_nolen(*typesvp); - bits = SvUV(*bitssvp); - octets = bits >> 3; /* if bits == 1, then octets == 0 */ - - /* read $swash->{LIST} */ - if (SvPOK(*listsvp)) { - l = (U8*)SvPV(*listsvp, lcur); - } - else { - /* LIST legitimately doesn't contain a string during compilation phases - * of Perl itself, before the Unicode tables are generated. In this - * case, just fake things up by creating an empty list */ - l = empty; - lcur = 0; - } - loc = (char *) l; - lend = l + lcur; - - if (*l == 'V') { /* Inversion list format */ - const char *after_atou = (char *) lend; - UV element0; - UV* other_elements_ptr; - - /* The first number is a count of the rest */ - l++; - if (!grok_atoUV((const char *)l, &elements, &after_atou)) { - Perl_croak(aTHX_ "panic: Expecting a valid count of elements" - " at start of inversion list"); - } - if (elements == 0) { - invlist = _new_invlist(0); - } - else { - l = (U8 *) after_atou; - - /* Get the 0th element, which is needed to setup the inversion list - * */ - while (isSPACE(*l)) l++; - after_atou = (char *) lend; - if (!grok_atoUV((const char *)l, &element0, &after_atou)) { - Perl_croak(aTHX_ "panic: Expecting a valid 0th element for" - " inversion list"); - } - l = (U8 *) after_atou; - invlist = _setup_canned_invlist(elements, element0, - &other_elements_ptr); - elements--; - - /* Then just populate the rest of the input */ - while (elements-- > 0) { - if (l > lend) { - Perl_croak(aTHX_ "panic: Expecting %" UVuf " more" - " elements than available", elements); - } - while (isSPACE(*l)) l++; - after_atou = (char *) lend; - if (!grok_atoUV((const char *)l, other_elements_ptr++, - &after_atou)) - { - Perl_croak(aTHX_ "panic: Expecting a valid element" - " in inversion list"); - } - l = (U8 *) after_atou; - } - } - } - else { - - /* Scan the input to count the number of lines to preallocate array - * size based on worst possible case, which is each line in the input - * creates 2 elements in the inversion list: 1) the beginning of a - * range in the list; 2) the beginning of a range not in the list. */ - while ((loc = (char *) memchr(loc, '\n', lend - (U8 *) loc)) != NULL) { - elements += 2; - loc++; - } - - /* If the ending is somehow corrupt and isn't a new line, add another - * element for the final range that isn't in the inversion list */ - if (! (*lend == '\n' - || (*lend == '\0' && (lcur == 0 || *(lend - 1) == '\n')))) - { - elements++; - } - - invlist = _new_invlist(elements); - - /* Now go through the input again, adding each range to the list */ - while (l < lend) { - UV start, end; - UV val; /* Not used by this function */ - - l = swash_scan_list_line(l, lend, &start, &end, &val, - cBOOL(octets), typestr); - - if (l > lend) { - break; - } - - invlist = _add_range_to_invlist(invlist, start, end); - } - } - - /* Invert if the data says it should be */ - if (invert_it_svp && SvUV(*invert_it_svp)) { - _invlist_invert(invlist); - } - - /* This code is copied from swatch_get() - * read $swash->{EXTRAS} */ - x = (U8*)SvPV(*extssvp, xcur); - xend = x + xcur; - while (x < xend) { - STRLEN namelen; - U8 *namestr; - SV** othersvp; - HV* otherhv; - STRLEN otherbits; - SV **otherbitssvp, *other; - U8 *nl; - - const U8 opc = *x++; - if (opc == '\n') - continue; - - nl = (U8*)memchr(x, '\n', xend - x); - - if (opc != '-' && opc != '+' && opc != '!' && opc != '&') { - if (nl) { - x = nl + 1; /* 1 is length of "\n" */ - continue; - } - else { - x = xend; /* to EXTRAS' end at which \n is not found */ - break; - } - } - - namestr = x; - if (nl) { - namelen = nl - namestr; - x = nl + 1; - } - else { - namelen = xend - namestr; - x = xend; - } - - othersvp = hv_fetch(hv, (char *)namestr, namelen, FALSE); - otherhv = MUTABLE_HV(SvRV(*othersvp)); - otherbitssvp = hv_fetchs(otherhv, "BITS", FALSE); - otherbits = (STRLEN)SvUV(*otherbitssvp); - - if (bits != otherbits || bits != 1) { - Perl_croak(aTHX_ "panic: _swash_to_invlist only operates on boolean " - "properties, bits=%" UVuf ", otherbits=%" UVuf, - (UV)bits, (UV)otherbits); - } - - /* The "other" swatch must be destroyed after. */ - other = _swash_to_invlist((SV *)*othersvp); - - /* End of code copied from swatch_get() */ - switch (opc) { - case '+': - _invlist_union(invlist, other, &invlist); - break; - case '!': - _invlist_union_maybe_complement_2nd(invlist, other, TRUE, &invlist); - break; - case '-': - _invlist_subtract(invlist, other, &invlist); - break; - case '&': - _invlist_intersection(invlist, other, &invlist); - break; - default: - break; - } - sv_free(other); /* through with it! */ - } - - SvREADONLY_on(invlist); - return invlist; -} - -SV* -Perl__get_swash_invlist(pTHX_ SV* const swash) -{ - SV** ptr; - - PERL_ARGS_ASSERT__GET_SWASH_INVLIST; - - if (! SvROK(swash)) { - return NULL; - } - - /* If it really isn't a hash, it isn't really swash; must be an inversion - * list */ - if (SvTYPE(SvRV(swash)) != SVt_PVHV) { - return SvRV(swash); - } - - ptr = hv_fetchs(MUTABLE_HV(SvRV(swash)), "V", FALSE); - if (! ptr) { - return NULL; - } - - return *ptr; -} - bool Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) { diff --git a/utf8.h b/utf8.h index 57be2e4..99e795d 100644 --- a/utf8.h +++ b/utf8.h @@ -34,11 +34,6 @@ #define FOLD_FLAGS_FULL 0x2 #define FOLD_FLAGS_NOMIX_ASCII 0x4 -/* For _core_swash_init(), internal core use only */ -#define _CORE_SWASH_INIT_USER_DEFINED_PROPERTY 0x1 -#define _CORE_SWASH_INIT_RETURN_IF_UNDEF 0x2 -#define _CORE_SWASH_INIT_ACCEPT_INVLIST 0x4 - /* =head1 Unicode Support L has an introduction to this API. -- 1.8.3.1