if (e < s)
goto warn_and_return;
while (s < e) {
- if (!UTF8_IS_INVARIANT(*s))
- s += UTF8SKIP(s);
- else
- s++;
+ s += UTF8SKIP(s);
len++;
}
return is_utf8_common(p, &PL_utf8_X_extend, "_X_Extend");
}
-bool
-Perl_is_utf8_X_prepend(pTHX_ const U8 *p)
-{
- /* If no code points in the Unicode version being worked on match
- * GCB=Prepend, this will set PL_utf8_X_prepend to &PL_sv_undef during its
- * first call. Otherwise, it will set it to a swash created for it.
- * swash_fetch() hence can't be used without checking first if it is valid
- * to do so. */
-
- dVAR;
- bool initialized = cBOOL(PL_utf8_X_prepend);
- bool ret;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_PREPEND;
-
- if (PL_utf8_X_prepend == &PL_sv_undef) {
- return FALSE;
- }
-
- if ((ret = is_utf8_common(p, &PL_utf8_X_prepend, "_X_GCB_Prepend"))
- || initialized)
- {
- return ret;
- }
-
- /* Here the code point being checked was not a prepend, and we hadn't
- * initialized PL_utf8_X_prepend, so we don't know if it is just this
- * particular input code point that didn't match, or if the table is
- * completely empty. The is_utf8_common() call did the initialization, so
- * we can inspect the swash's inversion list to find out. If there are no
- * elements in its inversion list, it's empty, and nothing will ever match,
- * so set things up so we can skip the check in future calls. */
- if (_invlist_len(_get_swash_invlist(PL_utf8_X_prepend)) == 0) {
- SvREFCNT_dec(PL_utf8_X_prepend);
- PL_utf8_X_prepend = &PL_sv_undef;
- }
-
- return FALSE;
-}
-
-bool
-Perl_is_utf8_X_special_begin(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_SPECIAL_BEGIN;
-
- return is_utf8_common(p, &PL_utf8_X_special_begin, "_X_Special_Begin");
-}
-
-bool
-Perl_is_utf8_X_L(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_L;
-
- return is_utf8_common(p, &PL_utf8_X_L, "_X_GCB_L");
-}
-
-bool
-Perl_is_utf8_X_RI(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_RI;
-
- return is_utf8_common(p, &PL_utf8_X_RI, "_X_RI");
-}
-
-/* These constants are for finding GCB=LV and GCB=LVT. These are for the
- * pre-composed Hangul syllables, which are all in a contiguous block and
- * arranged there in such a way so as to facilitate alorithmic determination of
- * their characteristics. As such, they don't need a swash, but can be
- * determined by simple arithmetic. Almost all are GCB=LVT, but every 28th one
- * is a GCB=LV */
-#define SBASE 0xAC00 /* Start of block */
-#define SCount 11172 /* Length of block */
-#define TCount 28
-
-#if 0 /* This routine is not currently used */
-bool
-Perl_is_utf8_X_LV(pTHX_ const U8 *p)
-{
- /* Unlike most other similarly named routines here, this does not create a
- * swash, so swash_fetch() cannot be used on PL_utf8_X_LV. */
-
- dVAR;
-
- UV cp = valid_utf8_to_uvchr(p, NULL);
-
- PERL_ARGS_ASSERT_IS_UTF8_X_LV;
-
- /* The earliest Unicode releases did not have these precomposed Hangul
- * syllables. Set to point to undef in that case, so will return false on
- * every call */
- if (! PL_utf8_X_LV) { /* Set up if this is the first time called */
- PL_utf8_X_LV = swash_init("utf8", "_X_GCB_LV", &PL_sv_undef, 1, 0);
- if (_invlist_len(_get_swash_invlist(PL_utf8_X_LV)) == 0) {
- SvREFCNT_dec(PL_utf8_X_LV);
- PL_utf8_X_LV = &PL_sv_undef;
- }
- }
-
- return (PL_utf8_X_LV != &PL_sv_undef
- && cp >= SBASE && cp < SBASE + SCount
- && (cp - SBASE) % TCount == 0); /* Only every TCount one is LV */
-}
-#endif
-
-bool
-Perl_is_utf8_X_LVT(pTHX_ const U8 *p)
-{
- /* Unlike most other similarly named routines here, this does not create a
- * swash, so swash_fetch() cannot be used on PL_utf8_X_LVT. */
-
- dVAR;
-
- UV cp = valid_utf8_to_uvchr(p, NULL);
-
- PERL_ARGS_ASSERT_IS_UTF8_X_LVT;
-
- /* The earliest Unicode releases did not have these precomposed Hangul
- * syllables. Set to point to undef in that case, so will return false on
- * every call */
- if (! PL_utf8_X_LVT) { /* Set up if this is the first time called */
- PL_utf8_X_LVT = swash_init("utf8", "_X_GCB_LVT", &PL_sv_undef, 1, 0);
- if (_invlist_len(_get_swash_invlist(PL_utf8_X_LVT)) == 0) {
- SvREFCNT_dec(PL_utf8_X_LVT);
- PL_utf8_X_LVT = &PL_sv_undef;
- }
- }
-
- return (PL_utf8_X_LVT != &PL_sv_undef
- && cp >= SBASE && cp < SBASE + SCount
- && (cp - SBASE) % TCount != 0); /* All but every TCount one is LV */
-}
-
-bool
-Perl_is_utf8_X_T(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_T;
-
- return is_utf8_common(p, &PL_utf8_X_T, "_X_GCB_T");
-}
-
-bool
-Perl_is_utf8_X_V(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_V;
-
- return is_utf8_common(p, &PL_utf8_X_V, "_X_GCB_V");
-}
-
-bool
-Perl_is_utf8_X_LV_LVT_V(pTHX_ const U8 *p)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_IS_UTF8_X_LV_LVT_V;
-
- return is_utf8_common(p, &PL_utf8_X_LV_LVT_V, "_X_LV_LVT_V");
-}
-
-bool
-Perl__is_utf8_quotemeta(pTHX_ const U8 *p)
-{
- /* For exclusive use of pp_quotemeta() */
-
- dVAR;
-
- PERL_ARGS_ASSERT__IS_UTF8_QUOTEMETA;
-
- return is_utf8_common(p, &PL_utf8_quotemeta, "_Perl_Quotemeta");
-}
-
/*
=for apidoc to_utf8_case
ENTER;
SAVEHINTS();
save_re_context();
+ /* We might get here via a subroutine signature which uses a utf8
+ * parameter name, at which point PL_subname will have been set
+ * but not yet used. */
+ save_item(PL_subname);
if (PL_parser && PL_parser->error_count)
SAVEI8(PL_parser->error_count), PL_parser->error_count = 0;
method = gv_fetchmeth(stash, "SWASHNEW", 8, -1);
if (!method) { /* demand load utf8 */
ENTER;
errsv_save = newSVsv(ERRSV);
- /* We might get here via a subroutine signature which uses a utf8
- * parameter name, at which point PL_subname will have been set
- * but not yet used. */
- save_item(PL_subname);
/* It is assumed that callers of this routine are not passing in
* any user derived data. */
/* Need to do this after save_re_context() as it will set
Perl__swash_inversion_hash(pTHX_ SV* const swash)
{
- /* Subject to change or removal. For use only in one place in regcomp.c.
+ /* Subject to change or removal. For use only in regcomp.c and regexec.c
* Can't be used on a property that is subject to user override, as it
* relies on the value of SPECIALS in the swash which would be set by
* utf8_heavy.pl to the hash in the non-overriden file, and hence is not set