- else if ((flags & FOLD_FLAGS_FULL) && c == LATIN_SMALL_LETTER_SHARP_S) {
-
- /* If can't cross 127/128 boundary, can't return "ss"; instead return
- * two U+017F characters, as fc("\df") should eq fc("\x{17f}\x{17f}")
- * under those circumstances. */
- if (flags & FOLD_FLAGS_NOMIX_ASCII) {
- *lenp = 2 * sizeof(LATIN_SMALL_LETTER_LONG_S_UTF8) - 2;
- Copy(LATIN_SMALL_LETTER_LONG_S_UTF8 LATIN_SMALL_LETTER_LONG_S_UTF8,
- p, *lenp, U8);
- return LATIN_SMALL_LETTER_LONG_S;
- }
- else {
- *(p)++ = 's';
- *p = 's';
- *lenp = 2;
- return 's';
- }
- }
- else { /* In this range the fold of all other characters is their lower
- case */
- converted = toLOWER_LATIN1(c);
- }
-
- if (NATIVE_IS_INVARIANT(converted)) {
- *p = (U8) converted;
- *lenp = 1;
- }
- else {
- *(p)++ = UTF8_TWO_BYTE_HI(converted);
- *p = UTF8_TWO_BYTE_LO(converted);
- *lenp = 2;
- }
-
- return converted;
-}
-
-UV
-Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, const U8 flags)
-{
-
- /* Not currently externally documented, and subject to change
- * <flags> bits meanings:
- * FOLD_FLAGS_FULL iff full folding is to be used;
- * FOLD_FLAGS_LOCALE iff in locale
- * FOLD_FLAGS_NOMIX_ASCII iff non-ASCII to ASCII folds are prohibited
- */
-
- PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS;
-
- if (c < 256) {
- UV result = _to_fold_latin1((U8) c, p, lenp,
- flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
- /* It is illegal for the fold to cross the 255/256 boundary under
- * locale; in this case return the original */
- return (result > 256 && flags & FOLD_FLAGS_LOCALE)
- ? c
- : result;
- }
-
- /* If no special needs, just use the macro */
- if ( ! (flags & (FOLD_FLAGS_LOCALE|FOLD_FLAGS_NOMIX_ASCII))) {
- uvchr_to_utf8(p, c);
- return CALL_FOLD_CASE(p, p, lenp, flags & FOLD_FLAGS_FULL);
- }
- else { /* Otherwise, _to_utf8_fold_flags has the intelligence to deal with
- the special flags. */
- U8 utf8_c[UTF8_MAXBYTES + 1];
- uvchr_to_utf8(utf8_c, c);
- return _to_utf8_fold_flags(utf8_c, p, lenp, flags, NULL);
- }
-}
-
-bool
-Perl_is_uni_alnum_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isALNUM_LC(c);
- }
- return _is_uni_FOO(_CC_WORDCHAR, c);
-}
-
-bool
-Perl_is_uni_alnumc_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isALPHANUMERIC_LC(c);
- }
- return _is_uni_FOO(_CC_ALPHANUMERIC, c);
-}
-
-bool
-Perl_is_uni_idfirst_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isIDFIRST_LC(c);
- }
- return _is_uni_perl_idstart(c);
-}
-
-bool
-Perl_is_uni_alpha_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isALPHA_LC(c);
- }
- return _is_uni_FOO(_CC_ALPHA, c);
-}
-
-bool
-Perl_is_uni_ascii_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isASCII_LC(c);
- }
- return 0;
-}
-
-bool
-Perl_is_uni_blank_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isBLANK_LC(c);
- }
- return isBLANK_uni(c);
-}
-
-bool
-Perl_is_uni_space_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isSPACE_LC(c);
- }
- return isSPACE_uni(c);
-}
-
-bool
-Perl_is_uni_digit_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isDIGIT_LC(c);
- }
- return _is_uni_FOO(_CC_DIGIT, c);
-}
-
-bool
-Perl_is_uni_upper_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isUPPER_LC(c);
- }
- return _is_uni_FOO(_CC_UPPER, c);
-}
-
-bool
-Perl_is_uni_lower_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isLOWER_LC(c);
- }
- return _is_uni_FOO(_CC_LOWER, c);
-}
-
-bool
-Perl_is_uni_cntrl_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isCNTRL_LC(c);
- }
- return 0;
-}
-
-bool
-Perl_is_uni_graph_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isGRAPH_LC(c);
- }
- return _is_uni_FOO(_CC_GRAPH, c);
-}
-
-bool
-Perl_is_uni_print_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isPRINT_LC(c);
- }
- return _is_uni_FOO(_CC_PRINT, c);
-}
-
-bool
-Perl_is_uni_punct_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isPUNCT_LC(c);
- }
- return _is_uni_FOO(_CC_PUNCT, c);
-}
-
-bool
-Perl_is_uni_xdigit_lc(pTHX_ UV c)
-{
- if (c < 256) {
- return isXDIGIT_LC(c);
- }
- return isXDIGIT_uni(c);
-}
-
-U32
-Perl_to_uni_upper_lc(pTHX_ U32 c)
-{
- /* XXX returns only the first character -- do not use XXX */
- /* XXX no locale support yet */
- STRLEN len;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- return (U32)to_uni_upper(c, tmpbuf, &len);
-}
-
-U32
-Perl_to_uni_title_lc(pTHX_ U32 c)
-{
- /* XXX returns only the first character XXX -- do not use XXX */
- /* XXX no locale support yet */
- STRLEN len;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- return (U32)to_uni_title(c, tmpbuf, &len);
-}
-
-U32
-Perl_to_uni_lower_lc(pTHX_ U32 c)
-{
- /* XXX returns only the first character -- do not use XXX */
- /* XXX no locale support yet */
- STRLEN len;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- return (U32)to_uni_lower(c, tmpbuf, &len);
-}
-
-PERL_STATIC_INLINE bool
-S_is_utf8_common(pTHX_ const U8 *const p, SV **swash,
- const char *const swashname)
-{
- /* returns a boolean giving whether or not the UTF8-encoded character that
- * starts at <p> is in the swash indicated by <swashname>. <swash>
- * contains a pointer to where the swash indicated by <swashname>
- * is to be stored; which this routine will do, so that future calls will
- * look at <*swash> and only generate a swash if it is not null
- *
- * Note that it is assumed that the buffer length of <p> is enough to
- * contain all the bytes that comprise the character. Thus, <*p> should
- * have been checked before this call for mal-formedness enough to assure
- * that. */
-
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_COMMON;
-
- /* The API should have included a length for the UTF-8 character in <p>,
- * but it doesn't. We therefore assume that p has been validated at least
- * as far as there being enough bytes available in it to accommodate the
- * character without reading beyond the end, and pass that number on to the
- * validating routine */
- if (! is_utf8_char_buf(p, p + UTF8SKIP(p))) {
- if (ckWARN_d(WARN_UTF8)) {
- Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED,WARN_UTF8),
- "Passing malformed UTF-8 to \"%s\" is deprecated", swashname);
- if (ckWARN(WARN_UTF8)) { /* This will output details as to the
- what the malformation is */
- utf8_to_uvchr_buf(p, p + UTF8SKIP(p), NULL);
- }
- }
- return FALSE;
- }
- if (!*swash) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- *swash = _core_swash_init("utf8", swashname, &PL_sv_undef, 1, 0, NULL, &flags);
- }
-
- return swash_fetch(*swash, p, TRUE) != 0;
-}
-
-bool
-Perl__is_utf8_FOO(pTHX_ const U8 classnum, const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT__IS_UTF8_FOO;
-
- assert(classnum < _FIRST_NON_SWASH_CC);
-
- return is_utf8_common(p, &PL_utf8_swash_ptrs[classnum], swash_property_names[classnum]);
-}
-
-bool
-Perl_is_utf8_alnum(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_ALNUM;
-
- /* NOTE: "IsWord", not "IsAlnum", since Alnum is a true
- * descendant of isalnum(3), in other words, it doesn't
- * contain the '_'. --jhi */
- return is_utf8_common(p, &PL_utf8_swash_ptrs[_CC_WORDCHAR], "IsWord");
-}
-
-bool
-Perl_is_utf8_alnumc(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_ALNUMC;
-
- return is_utf8_common(p, &PL_utf8_swash_ptrs[_CC_ALPHANUMERIC], "IsAlnum");
-}
-
-bool
-Perl_is_utf8_idfirst(pTHX_ const U8 *p) /* The naming is historical. */
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_IDFIRST;
-
- return S_is_utf8_idfirst(aTHX_ p);
-}
-
-bool
-Perl_is_utf8_xidfirst(pTHX_ const U8 *p) /* The naming is historical. */
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_XIDFIRST;
-
- if (*p == '_')
- return TRUE;
- /* is_utf8_idstart would be more logical. */
- return is_utf8_common(p, &PL_utf8_xidstart, "XIdStart");
-}
-
-bool
-Perl__is_utf8_perl_idstart(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT__IS_UTF8_PERL_IDSTART;
-
- return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart");
-}
-
-bool
-Perl__is_utf8_perl_idcont(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT__IS_UTF8_PERL_IDCONT;
-
- return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont");
-}
-
-
-bool
-Perl_is_utf8_idcont(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_IDCONT;
-
- return is_utf8_common(p, &PL_utf8_idcont, "IdContinue");
-}
-
-bool
-Perl_is_utf8_xidcont(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_XIDCONT;
-
- return is_utf8_common(p, &PL_utf8_idcont, "XIdContinue");
-}
-
-bool
-Perl_is_utf8_alpha(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_ALPHA;
-
- return is_utf8_common(p, &PL_utf8_swash_ptrs[_CC_ALPHA], "IsAlpha");
-}
-
-bool
-Perl_is_utf8_ascii(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_ASCII;
-
- /* ASCII characters are the same whether in utf8 or not. So the macro
- * works on both utf8 and non-utf8 representations. */
- return isASCII(*p);
-}
-
-bool
-Perl_is_utf8_blank(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_BLANK;
-
- return isBLANK_utf8(p);
-}
-
-bool
-Perl_is_utf8_space(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_SPACE;
-
- return isSPACE_utf8(p);
-}