s |OP* |refkids |NULLOK OP* o|I32 type
#endif
Ap |void |regdump |NN const regexp* r
-ApM |SV* |regclass_swash |NULLOK const regexp *prog \
- |NN const struct regnode *node|bool doinit \
- |NULLOK SV **listsvp|NULLOK SV **altsvp
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_UTF8_C)
EXpR |SV* |_new_invlist_C_array|NN const UV* const list
EXMp |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b
EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result
EXpM |void |_invlist_invert|NN SV* const invlist
EXMpR |SV* |_new_invlist |IV initial_size
-EXMpR |SV* |_swash_to_invlist |NN SV* const swash
EXMpR |SV* |_add_range_to_invlist |NULLOK SV* invlist|UV start|UV end
EXMpR |SV* |_setup_canned_invlist|const STRLEN size|const UV element0|NN UV** other_elements_ptr
-EXMpn |void |_invlist_populate_swatch |NN SV* const invlist|const UV start|const UV end|NN U8* swatch
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C)
EMpX |SV* |invlist_clone |NN SV* const invlist|NULLOK SV* newlist
#endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C)
-EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name \
- |NN SV* listsv|I32 minbits|I32 none \
- |NULLOK SV* invlist|NULLOK U8* const flags_p
-#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
EiMRn |UV* |invlist_array |NN SV* const invlist
EiMRn |bool |is_invlist |NN SV* const invlist
EiMRn |UV |_invlist_len |NN SV* const invlist
EMiRn |bool |_invlist_contains_cp|NN SV* const invlist|const UV cp
EXpMRn |SSize_t|_invlist_search |NN SV* const invlist|const UV cp
-EXMpR |SV* |_get_swash_invlist|NN SV* const swash
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
EXpM |SV* |_get_regclass_nonbitmap_data \
Es |void |set_ANYOF_arg |NN RExC_state_t* const pRExC_state \
|NN regnode* const node \
|NULLOK SV* const cp_list \
- |NULLOK SV* const runtime_defns \
- |NULLOK SV* const only_utf8_locale_list \
- |NULLOK SV* const swash \
- |const bool has_user_defined_property
+ |NULLOK SV* const runtime_defns \
+ |NULLOK SV* const only_utf8_locale_list
Es |void |output_posix_warnings \
|NN RExC_state_t *pRExC_state \
|NN AV* posix_warnings
#define reg_named_buff_firstkey(a,b) Perl_reg_named_buff_firstkey(aTHX_ a,b)
#define reg_named_buff_nextkey(a,b) Perl_reg_named_buff_nextkey(aTHX_ a,b)
#define reg_named_buff_scalar(a,b) Perl_reg_named_buff_scalar(aTHX_ a,b)
-#define regclass_swash(a,b,c,d,e) Perl_regclass_swash(aTHX_ a,b,c,d,e)
#define regdump(a) Perl_regdump(aTHX_ a)
#define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h)
#define regfree_internal(a) Perl_regfree_internal(aTHX_ a)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)
#define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d)
-#define set_ANYOF_arg(a,b,c,d,e,f,g) S_set_ANYOF_arg(aTHX_ a,b,c,d,e,f,g)
+#define set_ANYOF_arg(a,b,c,d,e) S_set_ANYOF_arg(aTHX_ a,b,c,d,e)
#define set_regex_pv(a,b) S_set_regex_pv(aTHX_ a,b)
#define skip_to_be_ignored_text(a,b,c) S_skip_to_be_ignored_text(aTHX_ a,b,c)
#define ssc_add_range(a,b,c) S_ssc_add_range(aTHX_ a,b,c)
#define regprop(a,b,c,d,e) Perl_regprop(aTHX_ a,b,c,d,e)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
-#define _get_swash_invlist(a) Perl__get_swash_invlist(aTHX_ a)
#define _invlist_contains_cp S__invlist_contains_cp
#define _invlist_len S__invlist_len
#define _invlist_search Perl__invlist_search
#define invlist_array S_invlist_array
#define is_invlist S_is_invlist
# endif
-# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C)
-#define _core_swash_init(a,b,c,d,e,f,g) Perl__core_swash_init(aTHX_ a,b,c,d,e,f,g)
-# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C)
#define invlist_clone(a,b) Perl_invlist_clone(aTHX_ a,b)
# endif
#define _add_range_to_invlist(a,b,c) Perl__add_range_to_invlist(aTHX_ a,b,c)
#define _invlist_intersection_maybe_complement_2nd(a,b,c,d) Perl__invlist_intersection_maybe_complement_2nd(aTHX_ a,b,c,d)
#define _invlist_invert(a) Perl__invlist_invert(aTHX_ a)
-#define _invlist_populate_swatch Perl__invlist_populate_swatch
#define _invlist_union_maybe_complement_2nd(a,b,c,d) Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d)
#define _new_invlist(a) Perl__new_invlist(aTHX_ a)
#define _setup_canned_invlist(a,b,c) Perl__setup_canned_invlist(aTHX_ a,b,c)
-#define _swash_to_invlist(a) Perl__swash_to_invlist(aTHX_ a)
# endif
# if defined(PERL_IN_REGEXEC_C)
#define advance_one_LB(a,b,c) S_advance_one_LB(aTHX_ a,b,c)
##
## Callers of swash_init:
## op.c:pmtrans -- for tr/// and y///
- ## regexec.c:regclass_swash -- for /[]/, \p, and \P
- ## utf8.c:is_utf8_common -- for common Unicode properties
- ## utf8.c:S__to_utf8_case -- for lc, uc, ucfirst, etc. and //i
## Unicode::UCD::prop_invlist
## Unicode::UCD::prop_invmap
##
PERL_CALLCONV REGEXP* Perl_reg_temp_copy(pTHX_ REGEXP* dsv, REGEXP* ssv);
#define PERL_ARGS_ASSERT_REG_TEMP_COPY \
assert(ssv)
-PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ const regexp *prog, const struct regnode *node, bool doinit, SV **listsvp, SV **altsvp);
-#define PERL_ARGS_ASSERT_REGCLASS_SWASH \
- assert(node)
PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r);
#define PERL_ARGS_ASSERT_REGDUMP \
assert(r)
STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
#define PERL_ARGS_ASSERT_SCAN_COMMIT \
assert(pRExC_state); assert(data); assert(minlenp)
-STATIC void S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, SV* const only_utf8_locale_list, SV* const swash, const bool has_user_defined_property);
+STATIC void S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* const node, SV* const cp_list, SV* const runtime_defns, SV* const only_utf8_locale_list);
#define PERL_ARGS_ASSERT_SET_ANYOF_ARG \
assert(pRExC_state); assert(node)
STATIC void S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx);
assert(sv); assert(o)
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
-PERL_CALLCONV SV* Perl__get_swash_invlist(pTHX_ SV* const swash)
- __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT__GET_SWASH_INVLIST \
- assert(swash)
-
#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE bool S__invlist_contains_cp(SV* const invlist, const UV cp)
__attribute__warn_unused_result__;
#endif
#endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C)
-PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV* listsv, I32 minbits, I32 none, SV* invlist, U8* const flags_p);
-#define PERL_ARGS_ASSERT__CORE_SWASH_INIT \
- assert(pkg); assert(name); assert(listsv)
-#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_SV_C)
PERL_CALLCONV SV* Perl_invlist_clone(pTHX_ SV* const invlist, SV* newlist);
#define PERL_ARGS_ASSERT_INVLIST_CLONE \
PERL_CALLCONV void Perl__invlist_invert(pTHX_ SV* const invlist);
#define PERL_ARGS_ASSERT__INVLIST_INVERT \
assert(invlist)
-PERL_CALLCONV void Perl__invlist_populate_swatch(SV* const invlist, const UV start, const UV end, U8* swatch);
-#define PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH \
- assert(invlist); assert(swatch)
/* PERL_CALLCONV void _invlist_subtract(pTHX_ SV* const a, SV* const b, SV** result); */
/* PERL_CALLCONV void _invlist_union(pTHX_ SV* const a, SV* const b, SV** output); */
PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output);
#define PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST \
assert(other_elements_ptr)
-PERL_CALLCONV SV* Perl__swash_to_invlist(pTHX_ SV* const swash)
- __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT__SWASH_TO_INVLIST \
- assert(swash)
-
#endif
#if defined(PERL_IN_REGEXEC_C)
STATIC LB_enum S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
- set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist,
- NULL, NULL, NULL, FALSE);
+ set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL);
/* Make sure is clone-safe */
ssc->invlist = NULL;
}
void
-Perl__invlist_populate_swatch(SV* const invlist,
- const UV start, const UV end, U8* swatch)
-{
- /* populates a swatch of a swash the same way swatch_get() does in utf8.c,
- * but is used when the swash has an inversion list. This makes this much
- * faster, as it uses a binary search instead of a linear one. This is
- * intimately tied to that function, and perhaps should be in utf8.c,
- * except it is intimately tied to inversion lists as well. It assumes
- * that <swatch> is all 0's on input */
-
- UV current = start;
- const IV len = _invlist_len(invlist);
- IV i;
- const UV * array;
-
- PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH;
-
- if (len == 0) { /* Empty inversion list */
- return;
- }
-
- array = invlist_array(invlist);
-
- /* Find which element it is */
- i = _invlist_search(invlist, start);
-
- /* We populate from <start> to <end> */
- while (current < end) {
- UV upper;
-
- /* The inversion list gives the results for every possible code point
- * after the first one in the list. Only those ranges whose index is
- * even are ones that the inversion list matches. For the odd ones,
- * and if the initial code point is not in the list, we have to skip
- * forward to the next element */
- if (i == -1 || ! ELEMENT_RANGE_MATCHES_INVLIST(i)) {
- i++;
- if (i >= len) { /* Finished if beyond the end of the array */
- return;
- }
- current = array[i];
- if (current >= end) { /* Finished if beyond the end of what we
- are populating */
- if (LIKELY(end < UV_MAX)) {
- return;
- }
-
- /* We get here when the upper bound is the maximum
- * representable on the machine, and we are looking for just
- * that code point. Have to special case it */
- i = len;
- goto join_end_of_list;
- }
- }
- assert(current >= start);
-
- /* The current range ends one below the next one, except don't go past
- * <end> */
- i++;
- upper = (i < len && array[i] < end) ? array[i] : end;
-
- /* Here we are in a range that matches. Populate a bit in the 3-bit U8
- * for each code point in it */
- for (; current < upper; current++) {
- const STRLEN offset = (STRLEN)(current - start);
- swatch[offset >> 3] |= 1 << (offset & 7);
- }
-
- join_end_of_list:
-
- /* Quit if at the end of the list */
- if (i >= len) {
-
- /* But first, have to deal with the highest possible code point on
- * the platform. The previous code assumes that <end> is one
- * beyond where we want to populate, but that is impossible at the
- * platform's infinity, so have to handle it specially */
- if (UNLIKELY(end == UV_MAX && ELEMENT_RANGE_MATCHES_INVLIST(len-1)))
- {
- const STRLEN offset = (STRLEN)(end - start);
- swatch[offset >> 3] |= 1 << (offset & 7);
- }
- return;
- }
-
- /* Advance to the next range, which will be for code points not in the
- * inversion list */
- current = array[i];
- }
-
- return;
-}
-
-void
Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
const bool complement_b, SV** output)
{
*
* ANYOF nodes contain a bit map for the first NUM_ANYOF_CODE_POINTS
* characters, with the corresponding bit set if that character is in the
- * list. For characters above this, a range list or swash is used. There
+ * list. For characters above this, an inversion list is used. There
* are extra bits for \w, etc. in locale ANYOFs, as what these match is not
* determinable at compile time
*
const bool skip_white = cBOOL( ret_invlist
|| (RExC_flags & RXf_PMf_EXTENDED_MORE));
- /* Unicode properties are stored in a swash; this holds the current one
- * being parsed. If this swash is the only above-latin1 component of the
- * character class, an optimization is to pass it directly on to the
- * execution engine. Otherwise, it is set to NULL to indicate that there
- * are other things in the class that have to be dealt with at execution
- * time */
- SV* swash = NULL; /* Code points that match \p{} \P{} */
-
/* inversion list of code points this node matches only when the target
* string is in UTF-8. These are all non-ASCII, < 256. (Because is under
* /d) */
{
char *e;
- SvREFCNT_dec(swash); /* Free any left-overs */
-
/* \p means they want Unicode semantics */
REQUIRE_UNI_RULES(flagp, 0);
_invlist_union_complement_2nd(properties,
prop_definition,
&properties);
-
- /* The swash can't be used as-is, because we've
- * inverted things; delay removing it to here after
- * have copied its invlist above */
- SvREFCNT_dec(swash);
- swash = NULL;
}
else {
_invlist_union(properties, prop_definition, &properties);
/* And combine the result (if any) with any inversion lists from posix
* classes. The lists are kept separate up to now because we don't want to
- * fold the classes (folding of those is automatically handled by the swash
- * fetching code) */
+ * fold the classes */
if (simple_posixes) { /* These are the classes known to be unaffected by
/a, /aa, and /d */
if (cp_list) {
* folded until runtime */
/* If we didn't do folding, it's because some information isn't available
- * until runtime; set the run-time fold flag for these. (We don't have to
- * worry about properties folding, as that is taken care of by the swash
- * fetching). We know to set the flag if we have a non-NULL list for UTF-8
- * locales, or the class matches at least one 0-255 range code point */
+ * until runtime; set the run-time fold flag for these We know to set the
+ * flag if we have a non-NULL list for UTF-8 locales, or the class matches
+ * at least one 0-255 range code point */
if (LOC && FOLD) {
/* Some things on the list might be unconditionally included because of
{
_invlist_invert(cp_list);
- /* Any swash can't be used as-is, because we've inverted things */
- if (swash) {
- SvREFCNT_dec_NN(swash);
- swash = NULL;
- }
-
- invert = FALSE;
+ /* Clear the invert flag since have just done it here */
+ invert = FALSE;
}
if (ret_invlist) {
*ret_invlist = cp_list;
- SvREFCNT_dec(swash);
return RExC_emit;
}
ANYOF_FLAGS(REGNODE_p(ret)) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
}
- /* If there is a swash and more than one element, we can't use the swash in
- * the optimization below. */
- if (swash && element_count > 1) {
- SvREFCNT_dec_NN(swash);
- swash = NULL;
- }
-
- /* Note that the optimization of using 'swash' if it is the only thing in
- * the class doesn't have us change swash at all, so it can include things
- * that are also in the bitmap; otherwise we have purposely deleted that
- * duplicate information */
set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list,
(HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
? listsv : NULL,
- only_utf8_locale_list,
- swash, cBOOL(has_runtime_dependency
- & HAS_USER_DEFINED_PROPERTY));
+ only_utf8_locale_list);
return ret;
not_anyof:
regnode* const node,
SV* const cp_list,
SV* const runtime_defns,
- SV* const only_utf8_locale_list,
- SV* const swash,
- const bool has_user_defined_property)
+ SV* const only_utf8_locale_list)
{
/* Sets the arg field of an ANYOF-type node 'node', using information about
* the node passed-in. If there is nothing outside the node's bitmap, the
{
/* For internal core use only.
- * Returns the swash for the input 'node' in the regex 'prog'.
- * If <doinit> is 'true', will attempt to create the swash if not already
- * done.
+ * Returns the inversion list for the input 'node' in the regex 'prog'.
+ * If <doinit> is 'true', will attempt to create the inversion list if not
+ * already done.
* If <listsvp> is non-null, will return the printable contents of the
- * swash. This can be used to get debugging information even before the
- * swash exists, by calling this function with 'doinit' set to false, in
- * which case the components that will be used to eventually create the
- * swash are returned (in a printable form).
+ * property definition. This can be used to get debugging information
+ * even before the inversion list exists, by calling this function with
+ * 'doinit' set to false, in which case the components that will be used
+ * to eventually create the inversion list are returned (in a printable
+ * form).
* If <only_utf8_locale_ptr> is not NULL, it is where this routine is to
* store an inversion list of code points that should match only if the
* execution-time locale is a UTF-8 one.
* inversion list of the code points that would be instead returned in
* <listsvp> if this were NULL. Thus, what gets output in <listsvp>
* when this parameter is used, is just the non-code point data that
- * will go into creating the swash. This currently should be just
+ * will go into creating the inversion list. This currently should be just
* user-defined properties whose definitions were not known at compile
* time. Using this parameter allows for easier manipulation of the
- * swash's data by the caller. It is illegal to call this function with
- * this parameter set, but not <listsvp>
+ * inversion list's data by the caller. It is illegal to call this
+ * function with this parameter set, but not <listsvp>
*
* Tied intimately to how S_set_ANYOF_arg sets up the data structure. Note
- * that, in spite of this function's name, the swash it returns may include
- * the bitmap data as well */
+ * that, in spite of this function's name, the inversion list it returns
+ * may include the bitmap data as well */
- SV *si = NULL; /* Input swash initialization string */
+ SV *si = NULL; /* Input initialization string */
SV* invlist = NULL;
RXi_GET_DECL(prog, progi);
}
}
- /* If requested, return a printable version of what this swash matches */
+ /* If requested, return a printable version of what this ANYOF node matches
+ * */
if (listsvp) {
SV* matches_string = NULL;
- /* The swash should be used, if possible, to get the data, as it
- * contains the resolved data. But this function can be called at
- * compile-time, before everything gets resolved, in which case we
- * return the currently best available information, which is the string
- * that will eventually be used to do that resolving, 'si' */
+ /* This function can be called at compile-time, before everything gets
+ * resolved, in which case we return the currently best available
+ * information, which is the string that will eventually be used to do
+ * that resolving, 'si' */
if (si) {
/* Here, we only have 'si' (and possibly some passed-in data in
* 'invlist', which is handled below) If the caller only wants
if (SvCUR(matches_string)) { /* Get rid of trailing blank */
SvCUR_set(matches_string, SvCUR(matches_string) - 1);
}
- } /* end of has an 'si' but no swash */
+ } /* end of has an 'si' */
}
- /* If we have a swash in place, its equivalent inversion list was above
- * placed into 'invlist'. If not, this variable may contain a stored
- * inversion list which is information beyond what is in 'si' */
+ /* Add the stuff that's already known */
if (invlist) {
/* Again, if the caller doesn't want the output inversion list, put
* This just calls isFOO_lc on the code point for the character if it is in
* the range 0-255. Outside that range, all characters use Unicode
* rules, ignoring any locale. So use the Unicode function if this class
- * requires a swash, and use the Unicode macro otherwise. */
+ * requires an inversion list, and use the Unicode macro otherwise. */
PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
return(c);
}
-
-#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
-/*
-- regclass_swash - prepare the utf8 swash. Wraps the shared core version to
-create a copy so that changes the caller makes won't change the shared one.
-If <altsvp> is non-null, will return NULL in it, for back-compat.
- */
-SV *
-Perl_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
-{
- PERL_ARGS_ASSERT_REGCLASS_SWASH;
-
- if (altsvp) {
- *altsvp = NULL;
- }
-
- return newSVsv(_get_regclass_nonbitmap_data(prog, node, doinit, listsvp, NULL, NULL));
-}
-
-#endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */
-
/*
- reginclass - determine if a character falls into a character class
&& IN_UTF8_CTYPE_LOCALE)))
{
SV* only_utf8_locale = NULL;
- SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0,
- &only_utf8_locale, NULL);
- if (sw) {
+ SV * const definition = _get_regclass_nonbitmap_data(prog, n, TRUE,
+ 0, &only_utf8_locale, NULL);
+ if (definition) {
U8 utf8_buffer[2];
U8 * utf8_p;
if (utf8_target) {
&& isALPHA_FOLD_EQ(*p, 'i'))
{
if (*p == 'i') {
- if (_invlist_contains_cp(sw,
+ if (_invlist_contains_cp(definition,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
{
match = TRUE;
}
}
else if (*p == 'I') {
- if (_invlist_contains_cp(sw,
+ if (_invlist_contains_cp(definition,
LATIN_SMALL_LETTER_DOTLESS_I))
{
match = TRUE;
}
}
}
- else if (_invlist_contains_cp(sw, c)) {
+ else if (_invlist_contains_cp(definition, c)) {
match = TRUE;
}
}
Perl_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv,
I32 minbits, I32 none)
{
- PERL_ARGS_ASSERT_SWASH_INIT;
-
/* Returns a copy of a swash initiated by the called function. This is the
* public interface, and returning a copy prevents others from doing
- * mischief on the original */
-
- return newSVsv(_core_swash_init(pkg, name, listsv, minbits, none,
- NULL, NULL));
-}
-
-SV*
-Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv,
- I32 minbits, I32 none, SV* invlist,
- U8* const flags_p)
-{
+ * mischief on the original. The only remaining use of this is in tr/// */
/*NOTE NOTE NOTE - If you want to use "return" in this routine you MUST
* use the following define */
-#define CORE_SWASH_INIT_RETURN(x) \
+#define SWASH_INIT_RETURN(x) \
PL_curpm= old_PL_curpm; \
- return x
+ return newSVsv(x)
/* Initialize and return a swash, creating it if necessary. It does this
- * by calling utf8_heavy.pl in the general case. The returned value may be
- * the swash's inversion list instead if the input parameters allow it.
- * Which is returned should be immaterial to callers, as the only
- * operations permitted on a swash, swash_fetch(), _get_swash_invlist(),
- * and swash_to_invlist() handle both these transparently.
- *
- * This interface should only be used by functions that won't destroy or
- * adversely change the swash, as doing so affects all other uses of the
- * swash in the program; the general public should use 'Perl_swash_init'
- * instead.
+ * by calling utf8_heavy.pl in the general case.
*
* pkg is the name of the package that <name> should be in.
- * name is the name of the swash to find. Typically it is a Unicode
- * property name, including user-defined ones
+ * name is the name of the swash to find.
* listsv is a string to initialize the swash with. It must be of the form
* documented as the subroutine return value in
* L<perlunicode/User-Defined Character Properties>
* minbits is the number of bits required to represent each data element.
- * It is '1' for binary properties.
* none I (khw) do not understand this one, but it is used only in tr///.
- * invlist is an inversion list to initialize the swash with (or NULL)
- * flags_p if non-NULL is the address of various input and output flag bits
- * to the routine, as follows: ('I' means is input to the routine;
- * 'O' means output from the routine. Only flags marked O are
- * meaningful on return.)
- * _CORE_SWASH_INIT_USER_DEFINED_PROPERTY indicates if the swash
- * came from a user-defined property. (I O)
- * _CORE_SWASH_INIT_RETURN_IF_UNDEF indicates that instead of croaking
- * when the swash cannot be located, to simply return NULL. (I)
- * _CORE_SWASH_INIT_ACCEPT_INVLIST indicates that the caller will accept a
- * return of an inversion list instead of a swash hash if this routine
- * thinks that would result in faster execution of swash_fetch() later
- * on. (I)
*
- * Thus there are three possible inputs to find the swash: <name>,
- * <listsv>, and <invlist>. At least one must be specified. The result
+ * Thus there are two possible inputs to find the swash: <name> and
+ * <listsv>. At least one must be specified. The result
* will be the union of the specified ones, although <listsv>'s various
* actions can intersect, etc. what <name> gives. To avoid going out to
* disk at all, <invlist> should specify completely what the swash should
* have, and <listsv> should be &PL_sv_undef and <name> should be "".
- *
- * <invlist> is only valid for binary properties */
+ */
PMOP *old_PL_curpm= PL_curpm; /* save away the old PL_curpm */
SV* retval = &PL_sv_undef;
- HV* swash_hv = NULL;
- const bool use_invlist= (flags_p && *flags_p & _CORE_SWASH_INIT_ACCEPT_INVLIST);
- assert(listsv != &PL_sv_undef || strNE(name, "") || invlist);
- assert(! invlist || minbits == 1);
+ PERL_ARGS_ASSERT_SWASH_INIT;
+
+ assert(listsv != &PL_sv_undef || strNE(name, ""));
PL_curpm= NULL; /* reset PL_curpm so that we dont get confused between the
regex that triggered the swash init and the swash init
SV* errsv_save;
GV *method;
- PERL_ARGS_ASSERT__CORE_SWASH_INIT;
PUSHSTACKi(PERLSI_MAGIC);
ENTER;
if (IN_PERL_COMPILETIME) {
CopHINTS_set(PL_curcop, PL_hints);
}
- if (!SvROK(retval) || SvTYPE(SvRV(retval)) != SVt_PVHV) {
- if (SvPOK(retval)) {
-
- /* If caller wants to handle missing properties, let them */
- if (flags_p && *flags_p & _CORE_SWASH_INIT_RETURN_IF_UNDEF) {
- CORE_SWASH_INIT_RETURN(NULL);
- }
- Perl_croak(aTHX_
- "Can't find Unicode property definition \"%" SVf "\"",
- SVfARG(retval));
- NOT_REACHED; /* NOTREACHED */
- }
- }
} /* End of calling the module to find the swash */
- /* If this operation fetched a swash, and we will need it later, get it */
- if (retval != &PL_sv_undef
- && (minbits == 1 || (flags_p
- && ! (*flags_p
- & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY))))
- {
- swash_hv = MUTABLE_HV(SvRV(retval));
-
- /* If we don't already know that there is a user-defined component to
- * this swash, and the user has indicated they wish to know if there is
- * one (by passing <flags_p>), find out */
- if (flags_p && ! (*flags_p & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY)) {
- SV** user_defined = hv_fetchs(swash_hv, "USER_DEFINED", FALSE);
- if (user_defined && SvUV(*user_defined)) {
- *flags_p |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
- }
- }
- }
-
- /* Make sure there is an inversion list for binary properties */
- if (minbits == 1) {
- SV** swash_invlistsvp = NULL;
- SV* swash_invlist = NULL;
- bool invlist_in_swash_is_valid = FALSE;
- bool swash_invlist_unclaimed = FALSE; /* whether swash_invlist has
- an unclaimed reference count */
-
- /* If this operation fetched a swash, get its already existing
- * inversion list, or create one for it */
-
- if (swash_hv) {
- swash_invlistsvp = hv_fetchs(swash_hv, "V", FALSE);
- if (swash_invlistsvp) {
- swash_invlist = *swash_invlistsvp;
- invlist_in_swash_is_valid = TRUE;
- }
- else {
- swash_invlist = _swash_to_invlist(retval);
- swash_invlist_unclaimed = TRUE;
- }
- }
-
- /* If an inversion list was passed in, have to include it */
- if (invlist) {
-
- /* Any fetched swash will by now have an inversion list in it;
- * otherwise <swash_invlist> will be NULL, indicating that we
- * didn't fetch a swash */
- if (swash_invlist) {
-
- /* Add the passed-in inversion list, which invalidates the one
- * already stored in the swash */
- invlist_in_swash_is_valid = FALSE;
- SvREADONLY_off(swash_invlist); /* Turned on again below */
- _invlist_union(invlist, swash_invlist, &swash_invlist);
- }
- else {
-
- /* Here, there is no swash already. Set up a minimal one, if
- * we are going to return a swash */
- if (! use_invlist) {
- swash_hv = newHV();
- retval = newRV_noinc(MUTABLE_SV(swash_hv));
- }
- swash_invlist = invlist;
- }
- }
-
- /* Here, we have computed the union of all the passed-in data. It may
- * be that there was an inversion list in the swash which didn't get
- * touched; otherwise save the computed one */
- if (! invlist_in_swash_is_valid && ! use_invlist) {
- if (! hv_stores(MUTABLE_HV(SvRV(retval)), "V", swash_invlist))
- {
- Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
- }
- /* We just stole a reference count. */
- if (swash_invlist_unclaimed) swash_invlist_unclaimed = FALSE;
- else SvREFCNT_inc_simple_void_NN(swash_invlist);
- }
-
- /* The result is immutable. Forbid attempts to change it. */
- SvREADONLY_on(swash_invlist);
-
- if (use_invlist) {
- SvREFCNT_dec(retval);
- if (!swash_invlist_unclaimed)
- SvREFCNT_inc_simple_void_NN(swash_invlist);
- retval = newRV_noinc(swash_invlist);
- }
- }
-
- CORE_SWASH_INIT_RETURN(retval);
-#undef CORE_SWASH_INIT_RETURN
+ SWASH_INIT_RETURN(retval);
+#undef SWASH_INIT_RETURN
}
S_swatch_get(pTHX_ SV* swash, UV start, UV span)
{
SV *swatch;
- U8 *l, *lend, *x, *xend, *s, *send;
+ U8 *l, *lend, *x, *xend, *s;
STRLEN lcur, xcur, scur;
HV *const hv = MUTABLE_HV(SvRV(swash));
- SV** const invlistsvp = hv_fetchs(hv, "V", FALSE);
SV** listsvp = NULL; /* The string containing the main body of the table */
SV** extssvp = NULL;
- SV** invert_it_svp = NULL;
U8* typestr = NULL;
- STRLEN bits;
+ STRLEN bits = 0;
STRLEN octets; /* if bits == 1, then octets == 0 */
UV none;
UV end = start + span;
- if (invlistsvp == NULL) {
SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE);
SV** const nonesvp = hv_fetchs(hv, "NONE", FALSE);
SV** const typesvp = hv_fetchs(hv, "TYPE", FALSE);
extssvp = hv_fetchs(hv, "EXTRAS", FALSE);
listsvp = hv_fetchs(hv, "LIST", FALSE);
- invert_it_svp = hv_fetchs(hv, "INVERT_IT", FALSE);
bits = SvUV(*bitssvp);
none = SvUV(*nonesvp);
typestr = (U8*)SvPV_nolen(*typesvp);
- }
- else {
- bits = 1;
- none = 0;
- }
octets = bits >> 3; /* if bits == 1, then octets == 0 */
PERL_ARGS_ASSERT_SWATCH_GET;
- if (bits != 1 && bits != 8 && bits != 16 && bits != 32) {
+ if (bits != 8 && bits != 16 && bits != 32) {
Perl_croak(aTHX_ "panic: swatch_get doesn't expect bits %" UVuf,
(UV)bits);
}
SvCUR_set(swatch, scur);
s = (U8*)SvPVX(swatch);
- if (invlistsvp) { /* If has an inversion list set up use that */
- _invlist_populate_swatch(*invlistsvp, start, end, s);
- return swatch;
- }
-
/* read $swash->{LIST} */
l = (U8*)SvPV(*listsvp, lcur);
lend = l + lcur;
while (l < lend) {
- UV min, max, val, upper;
+ UV min = 0, max = 0, val = 0, upper;
l = swash_scan_list_line(l, lend, &min, &max, &val,
cBOOL(octets), typestr);
if (l > lend) {
++val;
}
}
- else { /* bits == 1, then val should be ignored */
- UV key;
- if (min < start)
- min = start;
-
- for (key = min; key <= upper; key++) {
- const STRLEN offset = (STRLEN)(key - start);
- s[offset >> 3] |= 1 << (offset & 7);
- }
- }
} /* while */
- /* Invert if the data says it should be. Assumes that bits == 1 */
- if (invert_it_svp && SvUV(*invert_it_svp)) {
-
- /* Unicode properties should come with all bits above PERL_UNICODE_MAX
- * be 0, and their inversion should also be 0, as we don't succeed any
- * Unicode property matches for non-Unicode code points */
- if (start <= PERL_UNICODE_MAX) {
-
- /* The code below assumes that we never cross the
- * Unicode/above-Unicode boundary in a range, as otherwise we would
- * have to figure out where to stop flipping the bits. Since this
- * boundary is divisible by a large power of 2, and swatches comes
- * in small powers of 2, this should be a valid assumption */
- assert(start + span - 1 <= PERL_UNICODE_MAX);
-
- send = s + scur;
- while (s < send) {
- *s = ~(*s);
- s++;
- }
- }
- }
-
- /* read $swash->{EXTRAS}
- * This code also copied to swash_to_invlist() below */
+ /* read $swash->{EXTRAS} */
x = (U8*)SvPV(*extssvp, xcur);
xend = x + xcur;
while (x < xend) {
Perl_croak(aTHX_ "panic: swatch_get got improper swatch");
s = (U8*)SvPV(swatch, slen);
- if (bits == 1 && otherbits == 1) {
- if (slen != olen)
- Perl_croak(aTHX_ "panic: swatch_get found swatch length "
- "mismatch, slen=%" UVuf ", olen=%" UVuf,
- (UV)slen, (UV)olen);
-
- switch (opc) {
- case '+':
- while (slen--)
- *s++ |= *o++;
- break;
- case '!':
- while (slen--)
- *s++ |= ~*o++;
- break;
- case '-':
- while (slen--)
- *s++ &= ~*o++;
- break;
- case '&':
- while (slen--)
- *s++ &= *o++;
- break;
- default:
- break;
- }
- }
- else {
+ {
STRLEN otheroctets = otherbits >> 3;
STRLEN offset = 0;
U8* const send = s + slen;
*s++ = (U8)((otherval >> 8) & 0xff);
*s++ = (U8)( otherval & 0xff);
}
- }
+ }
}
sv_free(other); /* through with it! */
} /* while */
return swatch;
}
-SV*
-Perl__swash_to_invlist(pTHX_ SV* const swash)
-{
-
- /* Subject to change or removal. For use only in one place in regcomp.c.
- * Ownership is given to one reference count in the returned SV* */
-
- U8 *l, *lend;
- char *loc;
- STRLEN lcur;
- HV *const hv = MUTABLE_HV(SvRV(swash));
- UV elements = 0; /* Number of elements in the inversion list */
- U8 empty[] = "";
- SV** listsvp;
- SV** typesvp;
- SV** bitssvp;
- SV** extssvp;
- SV** invert_it_svp;
-
- U8* typestr;
- STRLEN bits;
- STRLEN octets; /* if bits == 1, then octets == 0 */
- U8 *x, *xend;
- STRLEN xcur;
-
- SV* invlist;
-
- PERL_ARGS_ASSERT__SWASH_TO_INVLIST;
-
- /* If not a hash, it must be the swash's inversion list instead */
- if (SvTYPE(hv) != SVt_PVHV) {
- return SvREFCNT_inc_simple_NN((SV*) hv);
- }
-
- /* The string containing the main body of the table */
- listsvp = hv_fetchs(hv, "LIST", FALSE);
- typesvp = hv_fetchs(hv, "TYPE", FALSE);
- bitssvp = hv_fetchs(hv, "BITS", FALSE);
- extssvp = hv_fetchs(hv, "EXTRAS", FALSE);
- invert_it_svp = hv_fetchs(hv, "INVERT_IT", FALSE);
-
- typestr = (U8*)SvPV_nolen(*typesvp);
- bits = SvUV(*bitssvp);
- octets = bits >> 3; /* if bits == 1, then octets == 0 */
-
- /* read $swash->{LIST} */
- if (SvPOK(*listsvp)) {
- l = (U8*)SvPV(*listsvp, lcur);
- }
- else {
- /* LIST legitimately doesn't contain a string during compilation phases
- * of Perl itself, before the Unicode tables are generated. In this
- * case, just fake things up by creating an empty list */
- l = empty;
- lcur = 0;
- }
- loc = (char *) l;
- lend = l + lcur;
-
- if (*l == 'V') { /* Inversion list format */
- const char *after_atou = (char *) lend;
- UV element0;
- UV* other_elements_ptr;
-
- /* The first number is a count of the rest */
- l++;
- if (!grok_atoUV((const char *)l, &elements, &after_atou)) {
- Perl_croak(aTHX_ "panic: Expecting a valid count of elements"
- " at start of inversion list");
- }
- if (elements == 0) {
- invlist = _new_invlist(0);
- }
- else {
- l = (U8 *) after_atou;
-
- /* Get the 0th element, which is needed to setup the inversion list
- * */
- while (isSPACE(*l)) l++;
- after_atou = (char *) lend;
- if (!grok_atoUV((const char *)l, &element0, &after_atou)) {
- Perl_croak(aTHX_ "panic: Expecting a valid 0th element for"
- " inversion list");
- }
- l = (U8 *) after_atou;
- invlist = _setup_canned_invlist(elements, element0,
- &other_elements_ptr);
- elements--;
-
- /* Then just populate the rest of the input */
- while (elements-- > 0) {
- if (l > lend) {
- Perl_croak(aTHX_ "panic: Expecting %" UVuf " more"
- " elements than available", elements);
- }
- while (isSPACE(*l)) l++;
- after_atou = (char *) lend;
- if (!grok_atoUV((const char *)l, other_elements_ptr++,
- &after_atou))
- {
- Perl_croak(aTHX_ "panic: Expecting a valid element"
- " in inversion list");
- }
- l = (U8 *) after_atou;
- }
- }
- }
- else {
-
- /* Scan the input to count the number of lines to preallocate array
- * size based on worst possible case, which is each line in the input
- * creates 2 elements in the inversion list: 1) the beginning of a
- * range in the list; 2) the beginning of a range not in the list. */
- while ((loc = (char *) memchr(loc, '\n', lend - (U8 *) loc)) != NULL) {
- elements += 2;
- loc++;
- }
-
- /* If the ending is somehow corrupt and isn't a new line, add another
- * element for the final range that isn't in the inversion list */
- if (! (*lend == '\n'
- || (*lend == '\0' && (lcur == 0 || *(lend - 1) == '\n'))))
- {
- elements++;
- }
-
- invlist = _new_invlist(elements);
-
- /* Now go through the input again, adding each range to the list */
- while (l < lend) {
- UV start, end;
- UV val; /* Not used by this function */
-
- l = swash_scan_list_line(l, lend, &start, &end, &val,
- cBOOL(octets), typestr);
-
- if (l > lend) {
- break;
- }
-
- invlist = _add_range_to_invlist(invlist, start, end);
- }
- }
-
- /* Invert if the data says it should be */
- if (invert_it_svp && SvUV(*invert_it_svp)) {
- _invlist_invert(invlist);
- }
-
- /* This code is copied from swatch_get()
- * read $swash->{EXTRAS} */
- x = (U8*)SvPV(*extssvp, xcur);
- xend = x + xcur;
- while (x < xend) {
- STRLEN namelen;
- U8 *namestr;
- SV** othersvp;
- HV* otherhv;
- STRLEN otherbits;
- SV **otherbitssvp, *other;
- U8 *nl;
-
- const U8 opc = *x++;
- if (opc == '\n')
- continue;
-
- nl = (U8*)memchr(x, '\n', xend - x);
-
- if (opc != '-' && opc != '+' && opc != '!' && opc != '&') {
- if (nl) {
- x = nl + 1; /* 1 is length of "\n" */
- continue;
- }
- else {
- x = xend; /* to EXTRAS' end at which \n is not found */
- break;
- }
- }
-
- namestr = x;
- if (nl) {
- namelen = nl - namestr;
- x = nl + 1;
- }
- else {
- namelen = xend - namestr;
- x = xend;
- }
-
- othersvp = hv_fetch(hv, (char *)namestr, namelen, FALSE);
- otherhv = MUTABLE_HV(SvRV(*othersvp));
- otherbitssvp = hv_fetchs(otherhv, "BITS", FALSE);
- otherbits = (STRLEN)SvUV(*otherbitssvp);
-
- if (bits != otherbits || bits != 1) {
- Perl_croak(aTHX_ "panic: _swash_to_invlist only operates on boolean "
- "properties, bits=%" UVuf ", otherbits=%" UVuf,
- (UV)bits, (UV)otherbits);
- }
-
- /* The "other" swatch must be destroyed after. */
- other = _swash_to_invlist((SV *)*othersvp);
-
- /* End of code copied from swatch_get() */
- switch (opc) {
- case '+':
- _invlist_union(invlist, other, &invlist);
- break;
- case '!':
- _invlist_union_maybe_complement_2nd(invlist, other, TRUE, &invlist);
- break;
- case '-':
- _invlist_subtract(invlist, other, &invlist);
- break;
- case '&':
- _invlist_intersection(invlist, other, &invlist);
- break;
- default:
- break;
- }
- sv_free(other); /* through with it! */
- }
-
- SvREADONLY_on(invlist);
- return invlist;
-}
-
-SV*
-Perl__get_swash_invlist(pTHX_ SV* const swash)
-{
- SV** ptr;
-
- PERL_ARGS_ASSERT__GET_SWASH_INVLIST;
-
- if (! SvROK(swash)) {
- return NULL;
- }
-
- /* If it really isn't a hash, it isn't really swash; must be an inversion
- * list */
- if (SvTYPE(SvRV(swash)) != SVt_PVHV) {
- return SvRV(swash);
- }
-
- ptr = hv_fetchs(MUTABLE_HV(SvRV(swash)), "V", FALSE);
- if (! ptr) {
- return NULL;
- }
-
- return *ptr;
-}
-
bool
Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
{
#define FOLD_FLAGS_FULL 0x2
#define FOLD_FLAGS_NOMIX_ASCII 0x4
-/* For _core_swash_init(), internal core use only */
-#define _CORE_SWASH_INIT_USER_DEFINED_PROPERTY 0x1
-#define _CORE_SWASH_INIT_RETURN_IF_UNDEF 0x2
-#define _CORE_SWASH_INIT_ACCEPT_INVLIST 0x4
-
/*
=head1 Unicode Support
L<perlguts/Unicode Support> has an introduction to this API.