This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Restrict scope/Shorten some very long macro names
[perl5.git] / regexec.c
index ca2dc4e..f3edc3a 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -118,8 +118,6 @@ static const char non_utf8_target_but_utf8_required[]
     goto target;                                                         \
 } STMT_END
 
-#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-
 #ifndef STATIC
 #define        STATIC  static
 #endif
@@ -4512,7 +4510,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
     UV c1 = (UV)CHRTEST_NOT_A_CP_1;
     UV c2 = (UV)CHRTEST_NOT_A_CP_2;
     bool use_chrtest_void = FALSE;
-    const bool is_utf8_pat = reginfo->is_utf8_pat;
+    const bool utf8_pat = reginfo->is_utf8_pat;
 
     /* Used when we have both utf8 input and utf8 output, to avoid converting
      * to/from code points */
@@ -4521,6 +4519,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
 
     U8 *pat = (U8*)STRING(text_node);
     U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
+    const U8 op = OP(text_node);
 
     if (! isEXACTFish(OP(text_node))) {
 
@@ -4528,7 +4527,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          * character.  If both the pat and the target are UTF-8, we can just
          * copy the input to the output, avoiding finding the code point of
          * that character */
-        if (!is_utf8_pat) {
+        if (! utf8_pat) {
             assert(! isEXACT_REQ8(OP(text_node)));
             c2 = c1 = *pat;
         }
@@ -4558,9 +4557,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          * fold.  But, in such a pattern only locale-problematic characters
          * aren't folded, so we can skip this completely if the first character
          * in the node isn't one of the tricky ones */
-        if (OP(text_node) == EXACTFL) {
+        if (op == EXACTFL) {
 
-            if (! is_utf8_pat) {
+            if (! utf8_pat) {
                 if (IN_UTF8_CTYPE_LOCALE && *pat == LATIN_SMALL_LETTER_SHARP_S)
                 {
                     folded[0] = folded[1] = 's';
@@ -4595,8 +4594,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
             }
         }
 
-        if (    ( is_utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
-             || (!is_utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
+        if (    ( utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
+             || (!utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
         {
             /* Multi-character folds require more context to sort out.  Also
              * PL_utf8_foldclosures used below doesn't handle them, so have to
@@ -4604,10 +4603,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
             use_chrtest_void = TRUE;
         }
         else { /* an EXACTFish node which doesn't begin with a multi-char fold */
-            c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
+            c1 = utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
 
             if (   UNLIKELY(PL_in_utf8_turkic_locale)
-                && OP(text_node) == EXACTFL
+                && op == EXACTFL
                 && UNLIKELY(   c1 == 'i' || c1 == 'I'
                             || c1 == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
                             || c1 == LATIN_SMALL_LETTER_DOTLESS_I))
@@ -4653,10 +4652,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                      * circumstances.  If it isn't, it means the only legal
                      * match of c1 is itself. */
                     if (    c2 < 256
-                        && (   (   OP(text_node) == EXACTFL
+                        && (   (   op == EXACTFL
                                 && ! IN_UTF8_CTYPE_LOCALE)
-                            || ((     OP(text_node) == EXACTFAA
-                                   || OP(text_node) == EXACTFAA_NO_TRIE)
+                            || ((     op == EXACTFAA
+                                   || op == EXACTFAA_NO_TRIE)
                                 && (isASCII(c1) || isASCII(c2)))))
                     {
                         c2 = c1;
@@ -4666,9 +4665,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
             else /* Here, c1 is <= 255 */
                 if (   utf8_target
                     && HAS_NONLATIN1_FOLD_CLOSURE(c1)
-                    && ( ! (OP(text_node) == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
-                    && (   (   OP(text_node) != EXACTFAA
-                            && OP(text_node) != EXACTFAA_NO_TRIE)
+                    && ( ! (op == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
+                    && (   (   op != EXACTFAA
+                            && op != EXACTFAA_NO_TRIE)
                         ||   ! isASCII(c1)))
             {
                 /* Here, there could be something above Latin1 in the target
@@ -4685,7 +4684,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
             }
             else { /* Here nothing above Latin1 can fold to the pattern
                       character */
-                switch (OP(text_node)) {
+                switch (op) {
 
                     case EXACTFL:   /* /l rules */
                         c2 = PL_fold_locale[c1];
@@ -4693,7 +4692,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
 
                     case EXACTF:   /* This node only generated for non-utf8
                                     patterns */
-                        assert(! is_utf8_pat);
+                        assert(! utf8_pat);
                         if (! utf8_target) {    /* /d rules */
                             c2 = PL_fold[c1];
                             break;
@@ -4703,7 +4702,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                         * EXACTFAA as nothing in Latin1 folds to ASCII */
                     case EXACTFAA_NO_TRIE:   /* This node only generated for
                                                 non-utf8 patterns */
-                        assert(! is_utf8_pat);
+                        assert(! utf8_pat);
                         /* FALLTHROUGH */
                     case EXACTFAA:
                     case EXACTFUP:
@@ -4715,7 +4714,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                         NOT_REACHED; /* NOTREACHED */
 
                     default:
-                        Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
+                        Perl_croak(aTHX_ "panic: Unexpected op %u", op);
                         NOT_REACHED; /* NOTREACHED */
                 }
             }
@@ -5766,10 +5765,10 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
 
 /* Macros for regmatch(), using its internal variables */
 #define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
-#define NEXTCHR_IS_EOS (nextchr < 0)
+#define NEXTCHR_IS_EOS (nextbyte < 0)
 
 #define SET_nextchr \
-    nextchr = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
+    nextbyte = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
 
 #define SET_locinput(p) \
     locinput = (p);  \
@@ -6007,7 +6006,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
     char *pushinput; /* where to continue after a PUSH */
     char *pusheol;   /* where to stop matching (loceol) after a PUSH */
     U8   *pushsr0;   /* save starting pos of script run */
-    I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
+    PERL_INT_FAST16_T nextbyte;   /* is always set to UCHARAT(locinput), or -1
+                                     at EOS */
 
     bool result = 0;       /* return value of S_regmatch */
     U32 depth = 0;            /* depth of backtrack stack */
@@ -6082,7 +6082,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
     st = PL_regmatch_state;
 
-    /* Note that nextchr is a byte even in UTF */
+    /* Note that nextbyte is a byte even in UTF */
     SET_nextchr;
     scan = prog;
 
@@ -6118,7 +6118,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         to_complement = 0;
 
         SET_nextchr;
-        assert(nextchr < 256 && (nextchr >= 0 || nextchr == NEXTCHR_EOS));
+        assert(nextbyte < 256 && (nextbyte >= 0 || nextbyte == NEXTCHR_EOS));
 
        switch (state_num) {
        case SBOL: /*  /^../ and /\A../  */
@@ -6154,12 +6154,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            NOT_REACHED; /* NOTREACHED */
 
        case MEOL: /* /..$/m  */
-           if (!NEXTCHR_IS_EOS && nextchr != '\n')
+           if (!NEXTCHR_IS_EOS && nextbyte != '\n')
                sayNO;
            break;
 
        case SEOL: /* /..$/  */
-           if (!NEXTCHR_IS_EOS && nextchr != '\n')
+           if (!NEXTCHR_IS_EOS && nextbyte != '\n')
                sayNO;
            if (reginfo->strend - locinput > 1)
                sayNO;
@@ -6178,7 +6178,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
        case REG_ANY: /*  /./  */
            if (   NEXTCHR_IS_EOS
                 || locinput >= loceol
-                || nextchr == '\n')
+                || nextbyte == '\n')
             {
                sayNO;
             }
@@ -6193,7 +6193,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              */
             if ( !   NEXTCHR_IS_EOS
                 &&   locinput < loceol
-                && ! ANYOF_BITMAP_TEST(scan, nextchr))
+                && ! ANYOF_BITMAP_TEST(scan, nextbyte))
             {
                 DEBUG_EXECUTE_r(
                     Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
@@ -6262,7 +6262,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
                     if (utf8_target
                         && ! NEXTCHR_IS_EOS
-                        && UTF8_IS_ABOVE_LATIN1(nextchr)
+                        && UTF8_IS_ABOVE_LATIN1(nextbyte)
                         && scan->flags == EXACTL)
                     {
                         /* We only output for EXACTL, as we let the folder
@@ -6275,7 +6275,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 if (   trie->bitmap
                     && (     NEXTCHR_IS_EOS
                         ||   locinput >= loceol
-                        || ! TRIE_BITMAP_TEST(trie, nextchr)))
+                        || ! TRIE_BITMAP_TEST(trie, nextbyte)))
                 {
                    if (trie->states[ state ].wordnum) {
                         DEBUG_EXECUTE_r(
@@ -6655,7 +6655,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 /* The target and the pattern have the same utf8ness. */
                 /* Inline the first character, for speed. */
                 if (   loceol - locinput < ln
-                    || UCHARAT(s) != nextchr
+                    || UCHARAT(s) != nextbyte
                     || (ln > 1 && memNE(s, locinput, ln)))
                 {
                     sayNO;
@@ -6762,9 +6762,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            }
 
            /* Neither the target nor the pattern are utf8 */
-           if (UCHARAT(s) != nextchr
+           if (UCHARAT(s) != nextbyte
                 && !NEXTCHR_IS_EOS
-               && UCHARAT(s) != fold_array[nextchr])
+               && UCHARAT(s) != fold_array[nextbyte])
            {
                sayNO;
            }
@@ -6809,7 +6809,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      : isWORDCHAR_LC(UCHARAT(locinput - 1));
                 b2 = (NEXTCHR_IS_EOS)
                     ? isWORDCHAR_LC('\n')
-                    : isWORDCHAR_LC(nextchr);
+                    : isWORDCHAR_LC(nextbyte);
            }
             if (to_complement ^ (b1 == b2)) {
                 sayNO;
@@ -6850,7 +6850,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  : isWORDCHAR_A(UCHARAT(locinput - 1));
             b2 = (NEXTCHR_IS_EOS)
                 ? isWORDCHAR_A('\n')
-                : isWORDCHAR_A(nextchr);
+                : isWORDCHAR_A(nextbyte);
             if (to_complement ^ (b1 == b2)) {
                 sayNO;
             }
@@ -6982,7 +6982,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                             : isWORDCHAR_L1(UCHARAT(locinput - 1));
                         b2 = (NEXTCHR_IS_EOS)
                             ? 0 /* isWORDCHAR_L1('\n') */
-                            : isWORDCHAR_L1(nextchr);
+                            : isWORDCHAR_L1(nextbyte);
                         match = cBOOL(b1 != b2);
                         break;
                     }
@@ -7216,8 +7216,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* Use isFOO_lc() for characters within Latin1.  (Note that
              * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
              * wouldn't be invariant) */
-            if (UTF8_IS_INVARIANT(nextchr) || ! utf8_target) {
-                if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) {
+            if (UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
+                if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextbyte)))) {
                     sayNO;
                 }
 
@@ -7235,7 +7235,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* Here is a UTF-8 variant code point below 256 and the target is
              * UTF-8 */
             if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
-                                            EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
+                                            EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
                                             *(locinput + 1))))))
             {
                 sayNO;
@@ -7260,7 +7260,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
 
             /* All UTF-8 variants match */
-            if (! UTF8_IS_INVARIANT(nextchr)) {
+            if (! UTF8_IS_INVARIANT(nextbyte)) {
                 goto increment_locinput;
             }
 
@@ -7280,7 +7280,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
           join_nposixa:
 
-            if (! (to_complement ^ cBOOL(_generic_isCC_A(nextchr,
+            if (! (to_complement ^ cBOOL(_generic_isCC_A(nextbyte,
                                                                 FLAGS(scan)))))
             {
                 sayNO;
@@ -7304,8 +7304,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* Use _generic_isCC() for characters within Latin1.  (Note that
              * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
              * wouldn't be invariant) */
-            if (UTF8_IS_INVARIANT(nextchr) || ! utf8_target) {
-                if (! (to_complement ^ cBOOL(_generic_isCC(nextchr,
+            if (UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
+                if (! (to_complement ^ cBOOL(_generic_isCC(nextbyte,
                                                            FLAGS(scan)))))
                 {
                     sayNO;
@@ -7314,7 +7314,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
             else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(locinput, reginfo->strend)) {
                 if (! (to_complement
-                       ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
+                       ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
                                                                *(locinput + 1)),
                                              FLAGS(scan)))))
                 {
@@ -7385,7 +7385,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                /* Match either CR LF  or '.', as all the other possibilities
                 * require utf8 */
                locinput++;         /* Match the . or CR */
-               if (nextchr == '\r' /* And if it was CR, and the next is LF,
+               if (nextbyte == '\r' /* And if it was CR, and the next is LF,
                                       match the LF */
                    && locinput <  loceol
                    && UCHARAT(locinput) == '\n')
@@ -7545,9 +7545,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            /* Not utf8:  Inline the first character, for speed. */
            if ( ! NEXTCHR_IS_EOS
                 && locinput < loceol
-                && UCHARAT(s) != nextchr
+                && UCHARAT(s) != nextbyte
                 && (   type == REF
-                    || UCHARAT(s) != fold_array[nextchr]))
+                    || UCHARAT(s) != fold_array[nextbyte]))
             {
                sayNO;
             }
@@ -8696,7 +8696,7 @@ NULL
                     depth, (IV)ST.count)
                );
            if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
-                if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
+                if (! UTF8_IS_INVARIANT(nextbyte) && utf8_target) {
 
                            /* (We can use memEQ and memNE in this file without
                             * having to worry about one being shorter than the
@@ -8719,12 +8719,12 @@ NULL
                         goto reenter_switch;
                     }
                 }
-                else if (nextchr != ST.c1 && nextchr != ST.c2) {
+                else if (nextbyte != ST.c1 && nextbyte != ST.c2) {
                     /* simulate B failing */
                     DEBUG_OPTIMISE_r(
                         Perl_re_exec_indentf( aTHX_  "CURLYM Fast bail next target=0x%X c1=0x%X c2=0x%X\n",
                             depth,
-                            (int) nextchr, ST.c1, ST.c2)
+                            (int) nextbyte, ST.c1, ST.c2)
                     );
                     state_num = CURLYM_B_fail;
                     goto reenter_switch;
@@ -9408,7 +9408,7 @@ NULL
           increment_locinput:
             assert(!NEXTCHR_IS_EOS);
             if (utf8_target) {
-                locinput += PL_utf8skip[nextchr];
+                locinput += PL_utf8skip[nextbyte];
                 /* locinput is allowed to go 1 char off the end (signifying
                  * EOS), but not 2+ */
                 if (locinput >  loceol)
@@ -10911,7 +10911,7 @@ Perl_is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const
 }
 
 /*
-=for apidoc_section Unicode Support
+=for apidoc_section $unicode
 
 =for apidoc isSCRIPT_RUN