-STATIC U8
-S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-
- /* Handle the setting of folds in the bitmap for non-locale ANYOF nodes.
- * Locale folding is done at run-time, so this function should not be
- * called for nodes that are for locales.
- *
- * This function sets the bit corresponding to the fold of the input
- * 'value', if not already set. The fold of 'f' is 'F', and the fold of
- * 'F' is 'f'.
- *
- * It also knows about the characters that are in the bitmap that have
- * folds that are matchable only outside it, and sets the appropriate lists
- * and flags.
- *
- * It returns the number of bits that actually changed from 0 to 1 */
-
- U8 stored = 0;
- U8 fold;
-
- PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD;
-
- fold = (AT_LEAST_UNI_SEMANTICS) ? PL_fold_latin1[value]
- : PL_fold[value];
-
- /* It assumes the bit for 'value' has already been set */
- if (fold != value && ! ANYOF_BITMAP_TEST(node, fold)) {
- ANYOF_BITMAP_SET(node, fold);
- stored++;
- }
- if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value) && (! isASCII(value) || ! MORE_ASCII_RESTRICTED)) {
- /* Certain Latin1 characters have matches outside the bitmap. To get
- * here, 'value' is one of those characters. None of these matches is
- * valid for ASCII characters under /aa, which have been excluded by
- * the 'if' above. The matches fall into three categories:
- * 1) They are singly folded-to or -from an above 255 character, as
- * LATIN SMALL LETTER Y WITH DIAERESIS and LATIN CAPITAL LETTER Y
- * WITH DIAERESIS;
- * 2) They are part of a multi-char fold with another character in the
- * bitmap, only LATIN SMALL LETTER SHARP S => "ss" fits that bill;
- * 3) They are part of a multi-char fold with a character not in the
- * bitmap, such as various ligatures.
- * We aren't dealing fully with multi-char folds, except we do deal
- * with the pattern containing a character that has a multi-char fold
- * (not so much the inverse).
- * For types 1) and 3), the matches only happen when the target string
- * is utf8; that's not true for 2), and we set a flag for it.
- *
- * The code below adds to the passed in inversion list the single fold
- * closures for 'value'. The values are hard-coded here so that an
- * innocent-looking character class, like /[ks]/i won't have to go out
- * to disk to find the possible matches. XXX It would be better to
- * generate these via regen, in case a new version of the Unicode
- * standard adds new mappings, though that is not really likely. */
- switch (value) {
- case 'k':
- case 'K':
- /* KELVIN SIGN */
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212A);
- break;
- case 's':
- case 'S':
- /* LATIN SMALL LETTER LONG S */
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x017F);
- break;
- case MICRO_SIGN:
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
- GREEK_SMALL_LETTER_MU);
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
- GREEK_CAPITAL_LETTER_MU);
- break;
- case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
- case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
- /* ANGSTROM SIGN */
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212B);
- if (DEPENDS_SEMANTICS) { /* See DEPENDS comment below */
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
- PL_fold_latin1[value]);
- }
- break;
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
- LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
- break;
- case LATIN_SMALL_LETTER_SHARP_S:
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
- LATIN_CAPITAL_LETTER_SHARP_S);
-
- /* Under /a, /d, and /u, this can match the two chars "ss" */
- if (! MORE_ASCII_RESTRICTED) {
- add_alternate(alternate_ptr, (U8 *) "ss", 2);
-
- /* And under /u or /a, it can match even if the target is
- * not utf8 */
- if (AT_LEAST_UNI_SEMANTICS) {
- ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
- }
- }
- break;
- case 'F': case 'f':
- case 'I': case 'i':
- case 'L': case 'l':
- case 'T': case 't':
- case 'A': case 'a':
- case 'H': case 'h':
- case 'J': case 'j':
- case 'N': case 'n':
- case 'W': case 'w':
- case 'Y': case 'y':
- /* These all are targets of multi-character folds from code
- * points that require UTF8 to express, so they can't match
- * unless the target string is in UTF-8, so no action here is
- * necessary, as regexec.c properly handles the general case
- * for UTF-8 matching */
- break;
- default:
- /* Use deprecated warning to increase the chances of this
- * being output */
- ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%x; please use the perlbug utility to report;", value);
- break;
- }
- }
- else if (DEPENDS_SEMANTICS
- && ! isASCII(value)
- && PL_fold_latin1[value] != value)
- {
- /* Under DEPENDS rules, non-ASCII Latin1 characters match their
- * folds only when the target string is in UTF-8. We add the fold
- * here to the list of things to match outside the bitmap, which
- * won't be looked at unless it is UTF8 (or else if something else
- * says to look even if not utf8, but those things better not happen
- * under DEPENDS semantics. */
- *invlist_ptr = add_cp_to_invlist(*invlist_ptr, PL_fold_latin1[value]);
- }
-
- return stored;
-}
-
-
-PERL_STATIC_INLINE U8
-S_set_regclass_bit(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
- /* This inline function sets a bit in the bitmap if not already set, and if
- * appropriate, its fold, returning the number of bits that actually
- * changed from 0 to 1 */
-
- U8 stored;
-
- PERL_ARGS_ASSERT_SET_REGCLASS_BIT;
-
- if (ANYOF_BITMAP_TEST(node, value)) { /* Already set */
- return 0;
- }
-
- ANYOF_BITMAP_SET(node, value);
- stored = 1;
-
- if (FOLD && ! LOC) { /* Locale folds aren't known until runtime */
- stored += set_regclass_bit_fold(pRExC_state, node, value, invlist_ptr, alternate_ptr);
- }
-
- return stored;
-}
-