goto target; \
} STMT_END
-#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-
#ifndef STATIC
#define STATIC static
#endif
UV c1 = (UV)CHRTEST_NOT_A_CP_1;
UV c2 = (UV)CHRTEST_NOT_A_CP_2;
bool use_chrtest_void = FALSE;
- const bool is_utf8_pat = reginfo->is_utf8_pat;
+ const bool utf8_pat = reginfo->is_utf8_pat;
/* Used when we have both utf8 input and utf8 output, to avoid converting
* to/from code points */
U8 *pat = (U8*)STRING(text_node);
U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
+ const U8 op = OP(text_node);
if (! isEXACTFish(OP(text_node))) {
* character. If both the pat and the target are UTF-8, we can just
* copy the input to the output, avoiding finding the code point of
* that character */
- if (!is_utf8_pat) {
+ if (! utf8_pat) {
assert(! isEXACT_REQ8(OP(text_node)));
c2 = c1 = *pat;
}
* fold. But, in such a pattern only locale-problematic characters
* aren't folded, so we can skip this completely if the first character
* in the node isn't one of the tricky ones */
- if (OP(text_node) == EXACTFL) {
+ if (op == EXACTFL) {
- if (! is_utf8_pat) {
+ if (! utf8_pat) {
if (IN_UTF8_CTYPE_LOCALE && *pat == LATIN_SMALL_LETTER_SHARP_S)
{
folded[0] = folded[1] = 's';
}
}
- if ( ( is_utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
- || (!is_utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
+ if ( ( utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
+ || (!utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
{
/* Multi-character folds require more context to sort out. Also
* PL_utf8_foldclosures used below doesn't handle them, so have to
use_chrtest_void = TRUE;
}
else { /* an EXACTFish node which doesn't begin with a multi-char fold */
- c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
+ c1 = utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
if ( UNLIKELY(PL_in_utf8_turkic_locale)
- && OP(text_node) == EXACTFL
+ && op == EXACTFL
&& UNLIKELY( c1 == 'i' || c1 == 'I'
|| c1 == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
|| c1 == LATIN_SMALL_LETTER_DOTLESS_I))
* circumstances. If it isn't, it means the only legal
* match of c1 is itself. */
if ( c2 < 256
- && ( ( OP(text_node) == EXACTFL
+ && ( ( op == EXACTFL
&& ! IN_UTF8_CTYPE_LOCALE)
- || (( OP(text_node) == EXACTFAA
- || OP(text_node) == EXACTFAA_NO_TRIE)
+ || (( op == EXACTFAA
+ || op == EXACTFAA_NO_TRIE)
&& (isASCII(c1) || isASCII(c2)))))
{
c2 = c1;
else /* Here, c1 is <= 255 */
if ( utf8_target
&& HAS_NONLATIN1_FOLD_CLOSURE(c1)
- && ( ! (OP(text_node) == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
- && ( ( OP(text_node) != EXACTFAA
- && OP(text_node) != EXACTFAA_NO_TRIE)
+ && ( ! (op == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
+ && ( ( op != EXACTFAA
+ && op != EXACTFAA_NO_TRIE)
|| ! isASCII(c1)))
{
/* Here, there could be something above Latin1 in the target
}
else { /* Here nothing above Latin1 can fold to the pattern
character */
- switch (OP(text_node)) {
+ switch (op) {
case EXACTFL: /* /l rules */
c2 = PL_fold_locale[c1];
case EXACTF: /* This node only generated for non-utf8
patterns */
- assert(! is_utf8_pat);
+ assert(! utf8_pat);
if (! utf8_target) { /* /d rules */
c2 = PL_fold[c1];
break;
* EXACTFAA as nothing in Latin1 folds to ASCII */
case EXACTFAA_NO_TRIE: /* This node only generated for
non-utf8 patterns */
- assert(! is_utf8_pat);
+ assert(! utf8_pat);
/* FALLTHROUGH */
case EXACTFAA:
case EXACTFUP:
NOT_REACHED; /* NOTREACHED */
default:
- Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
+ Perl_croak(aTHX_ "panic: Unexpected op %u", op);
NOT_REACHED; /* NOTREACHED */
}
}
/* Macros for regmatch(), using its internal variables */
#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
-#define NEXTCHR_IS_EOS (nextchr < 0)
+#define NEXTCHR_IS_EOS (nextbyte < 0)
#define SET_nextchr \
- nextchr = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
+ nextbyte = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
#define SET_locinput(p) \
locinput = (p); \
char *pushinput; /* where to continue after a PUSH */
char *pusheol; /* where to stop matching (loceol) after a PUSH */
U8 *pushsr0; /* save starting pos of script run */
- I32 nextchr; /* is always set to UCHARAT(locinput), or -1 at EOS */
+ PERL_INT_FAST16_T nextbyte; /* is always set to UCHARAT(locinput), or -1
+ at EOS */
bool result = 0; /* return value of S_regmatch */
U32 depth = 0; /* depth of backtrack stack */
st = PL_regmatch_state;
- /* Note that nextchr is a byte even in UTF */
+ /* Note that nextbyte is a byte even in UTF */
SET_nextchr;
scan = prog;
to_complement = 0;
SET_nextchr;
- assert(nextchr < 256 && (nextchr >= 0 || nextchr == NEXTCHR_EOS));
+ assert(nextbyte < 256 && (nextbyte >= 0 || nextbyte == NEXTCHR_EOS));
switch (state_num) {
case SBOL: /* /^../ and /\A../ */
NOT_REACHED; /* NOTREACHED */
case MEOL: /* /..$/m */
- if (!NEXTCHR_IS_EOS && nextchr != '\n')
+ if (!NEXTCHR_IS_EOS && nextbyte != '\n')
sayNO;
break;
case SEOL: /* /..$/ */
- if (!NEXTCHR_IS_EOS && nextchr != '\n')
+ if (!NEXTCHR_IS_EOS && nextbyte != '\n')
sayNO;
if (reginfo->strend - locinput > 1)
sayNO;
case REG_ANY: /* /./ */
if ( NEXTCHR_IS_EOS
|| locinput >= loceol
- || nextchr == '\n')
+ || nextbyte == '\n')
{
sayNO;
}
*/
if ( ! NEXTCHR_IS_EOS
&& locinput < loceol
- && ! ANYOF_BITMAP_TEST(scan, nextchr))
+ && ! ANYOF_BITMAP_TEST(scan, nextbyte))
{
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_ "%sTRIE: failed to match trie start class...%s\n",
_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
if (utf8_target
&& ! NEXTCHR_IS_EOS
- && UTF8_IS_ABOVE_LATIN1(nextchr)
+ && UTF8_IS_ABOVE_LATIN1(nextbyte)
&& scan->flags == EXACTL)
{
/* We only output for EXACTL, as we let the folder
if ( trie->bitmap
&& ( NEXTCHR_IS_EOS
|| locinput >= loceol
- || ! TRIE_BITMAP_TEST(trie, nextchr)))
+ || ! TRIE_BITMAP_TEST(trie, nextbyte)))
{
if (trie->states[ state ].wordnum) {
DEBUG_EXECUTE_r(
/* The target and the pattern have the same utf8ness. */
/* Inline the first character, for speed. */
if ( loceol - locinput < ln
- || UCHARAT(s) != nextchr
+ || UCHARAT(s) != nextbyte
|| (ln > 1 && memNE(s, locinput, ln)))
{
sayNO;
}
/* Neither the target nor the pattern are utf8 */
- if (UCHARAT(s) != nextchr
+ if (UCHARAT(s) != nextbyte
&& !NEXTCHR_IS_EOS
- && UCHARAT(s) != fold_array[nextchr])
+ && UCHARAT(s) != fold_array[nextbyte])
{
sayNO;
}
: isWORDCHAR_LC(UCHARAT(locinput - 1));
b2 = (NEXTCHR_IS_EOS)
? isWORDCHAR_LC('\n')
- : isWORDCHAR_LC(nextchr);
+ : isWORDCHAR_LC(nextbyte);
}
if (to_complement ^ (b1 == b2)) {
sayNO;
: isWORDCHAR_A(UCHARAT(locinput - 1));
b2 = (NEXTCHR_IS_EOS)
? isWORDCHAR_A('\n')
- : isWORDCHAR_A(nextchr);
+ : isWORDCHAR_A(nextbyte);
if (to_complement ^ (b1 == b2)) {
sayNO;
}
: isWORDCHAR_L1(UCHARAT(locinput - 1));
b2 = (NEXTCHR_IS_EOS)
? 0 /* isWORDCHAR_L1('\n') */
- : isWORDCHAR_L1(nextchr);
+ : isWORDCHAR_L1(nextbyte);
match = cBOOL(b1 != b2);
break;
}
/* Use isFOO_lc() for characters within Latin1. (Note that
* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
* wouldn't be invariant) */
- if (UTF8_IS_INVARIANT(nextchr) || ! utf8_target) {
- if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) {
+ if (UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
+ if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextbyte)))) {
sayNO;
}
/* Here is a UTF-8 variant code point below 256 and the target is
* UTF-8 */
if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
- EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
+ EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
*(locinput + 1))))))
{
sayNO;
}
/* All UTF-8 variants match */
- if (! UTF8_IS_INVARIANT(nextchr)) {
+ if (! UTF8_IS_INVARIANT(nextbyte)) {
goto increment_locinput;
}
join_nposixa:
- if (! (to_complement ^ cBOOL(_generic_isCC_A(nextchr,
+ if (! (to_complement ^ cBOOL(_generic_isCC_A(nextbyte,
FLAGS(scan)))))
{
sayNO;
/* Use _generic_isCC() for characters within Latin1. (Note that
* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
* wouldn't be invariant) */
- if (UTF8_IS_INVARIANT(nextchr) || ! utf8_target) {
- if (! (to_complement ^ cBOOL(_generic_isCC(nextchr,
+ if (UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
+ if (! (to_complement ^ cBOOL(_generic_isCC(nextbyte,
FLAGS(scan)))))
{
sayNO;
}
else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(locinput, reginfo->strend)) {
if (! (to_complement
- ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
+ ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
*(locinput + 1)),
FLAGS(scan)))))
{
/* Match either CR LF or '.', as all the other possibilities
* require utf8 */
locinput++; /* Match the . or CR */
- if (nextchr == '\r' /* And if it was CR, and the next is LF,
+ if (nextbyte == '\r' /* And if it was CR, and the next is LF,
match the LF */
&& locinput < loceol
&& UCHARAT(locinput) == '\n')
/* Not utf8: Inline the first character, for speed. */
if ( ! NEXTCHR_IS_EOS
&& locinput < loceol
- && UCHARAT(s) != nextchr
+ && UCHARAT(s) != nextbyte
&& ( type == REF
- || UCHARAT(s) != fold_array[nextchr]))
+ || UCHARAT(s) != fold_array[nextbyte]))
{
sayNO;
}
depth, (IV)ST.count)
);
if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
- if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
+ if (! UTF8_IS_INVARIANT(nextbyte) && utf8_target) {
/* (We can use memEQ and memNE in this file without
* having to worry about one being shorter than the
goto reenter_switch;
}
}
- else if (nextchr != ST.c1 && nextchr != ST.c2) {
+ else if (nextbyte != ST.c1 && nextbyte != ST.c2) {
/* simulate B failing */
DEBUG_OPTIMISE_r(
Perl_re_exec_indentf( aTHX_ "CURLYM Fast bail next target=0x%X c1=0x%X c2=0x%X\n",
depth,
- (int) nextchr, ST.c1, ST.c2)
+ (int) nextbyte, ST.c1, ST.c2)
);
state_num = CURLYM_B_fail;
goto reenter_switch;
increment_locinput:
assert(!NEXTCHR_IS_EOS);
if (utf8_target) {
- locinput += PL_utf8skip[nextchr];
+ locinput += PL_utf8skip[nextbyte];
/* locinput is allowed to go 1 char off the end (signifying
* EOS), but not 2+ */
if (locinput > loceol)
}
/*
-=for apidoc_section Unicode Support
+=for apidoc_section $unicode
=for apidoc isSCRIPT_RUN