This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
inline_invlist.c -> invlist_inline.h
[perl5.git] / regcomp.c
index 50a9e6c..411e3c7 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -87,7 +87,7 @@ EXTERN_C const struct regexp_engine my_reg_engine;
 #endif
 
 #include "dquote_static.c"
-#include "inline_invlist.c"
+#include "invlist_inline.h"
 #include "unicode_constants.h"
 
 #define HAS_NONLATIN1_FOLD_CLOSURE(i) \
@@ -808,9 +808,6 @@ static const scan_data_t zero_scan_data =
             if (RExC_seen & REG_GPOS_SEEN)                                  \
                 PerlIO_printf(Perl_debug_log,"REG_GPOS_SEEN ");             \
                                                                             \
-            if (RExC_seen & REG_CANY_SEEN)                                  \
-                PerlIO_printf(Perl_debug_log,"REG_CANY_SEEN ");             \
-                                                                            \
             if (RExC_seen & REG_RECURSE_SEEN)                               \
                 PerlIO_printf(Perl_debug_log,"REG_RECURSE_SEEN ");          \
                                                                             \
@@ -5069,7 +5066,6 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                    Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d",
                                                                      OP(scan));
 #endif
-               case CANY:
                case SANY:
                    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
                        ssc_match_all_cp(data->start_class);
@@ -6180,6 +6176,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 
        ENTER;
        SAVETMPS;
+       save_re_context();
        PUSHSTACKi(PERLSI_REQUIRE);
         /* G_RE_REPARSING causes the toker to collapse \\ into \ when
          * parsing qr''; normally only q'' does this. It also alters
@@ -7287,8 +7284,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                                 lookbehind */
     if (pRExC_state->num_code_blocks)
        r->extflags |= RXf_EVAL_SEEN;
-    if (RExC_seen & REG_CANY_SEEN)
-        r->intflags |= PREGf_CANY_SEEN;
     if (RExC_seen & REG_VERBARG_SEEN)
     {
        r->intflags |= PREGf_VERBARG_SEEN;
@@ -7319,7 +7314,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          * flags appropriately - Yves */
         regnode *first = ri->program + 1;
         U8 fop = OP(first);
-        regnode *next = NEXTOPER(first);
+        regnode *next = regnext(first);
         U8 nop = OP(next);
 
         if (PL_regkind[fop] == NOTHING && nop == END)
@@ -7333,13 +7328,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             r->extflags |= RXf_START_ONLY;
         else if (fop == PLUS
                  && PL_regkind[nop] == POSIXD && FLAGS(next) == _CC_SPACE
-                 && OP(regnext(first)) == END)
+                 && nop == END)
             r->extflags |= RXf_WHITE;
         else if ( r->extflags & RXf_SPLIT
                   && (fop == EXACT || fop == EXACTL)
                   && STR_LEN(first) == 1
                   && *(STRING(first)) == ' '
-                  && OP(regnext(first)) == END )
+                  && nop == END )
             r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
 
     }
@@ -7700,13 +7695,8 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
         sv_setpvn(sv, s, i);
         TAINT_set(oldtainted);
 #endif
-        if ( (rx->intflags & PREGf_CANY_SEEN)
-            ? (RXp_MATCH_UTF8(rx)
-                        && (!i || is_utf8_string((U8*)s, i)))
-            : (RXp_MATCH_UTF8(rx)) )
-        {
+        if (RXp_MATCH_UTF8(rx))
             SvUTF8_on(sv);
-        }
         else
             SvUTF8_off(sv);
         if (TAINTING_get) {
@@ -7991,7 +7981,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  * Some of the methods should always be private to the implementation, and some
  * should eventually be made public */
 
-/* The header definitions are in F<inline_invlist.c> */
+/* The header definitions are in F<invlist_inline.h> */
 
 PERL_STATIC_INLINE UV*
 S__invlist_array_init(SV* const invlist, const bool will_have_0)
@@ -11806,13 +11796,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            RExC_seen_zerolen++;                /* Do not optimize RE away */
            goto finish_meta_pat;
        case 'C':
-           ret = reg_node(pRExC_state, CANY);
-            RExC_seen |= REG_CANY_SEEN;
-           *flagp |= HASWIDTH|SIMPLE;
-            if (PASS2) {
-                ckWARNdep(RExC_parse+1, "\\C is deprecated");
-            }
-           goto finish_meta_pat;
+           vFAIL("\\C no longer supported");
        case 'X':
            ret = reg_node(pRExC_state, CLUMP);
            *flagp |= HASWIDTH;
@@ -15394,10 +15378,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     op = POSIXA;
                 }
             }
-            else if (AT_LEAST_ASCII_RESTRICTED || ! FOLD) {
+            else if (! FOLD || ASCII_FOLD_RESTRICTED) {
                 /* We can optimize A-Z or a-z, but not if they could match
-                 * something like the KELVIN SIGN under /i (/a means they
-                 * can't) */
+                 * something like the KELVIN SIGN under /i. */
                 if (prevvalue == 'A') {
                     if (value == 'Z'
 #ifdef EBCDIC
@@ -16756,7 +16739,7 @@ Perl_regdump(pTHX_ const regexp *r)
            PerlIO_printf(Perl_debug_log, "(SBOL)");
         if (r->intflags & PREGf_ANCH_GPOS)
            PerlIO_printf(Perl_debug_log, "(GPOS)");
-       PerlIO_putc(Perl_debug_log, ' ');
+       (void)PerlIO_putc(Perl_debug_log, ' ');
     }
     if (r->intflags & PREGf_GPOS_SEEN)
        PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", (UV)r->gofs);
@@ -17710,6 +17693,47 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
     Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
 }
 
+/* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
+
+#ifndef PERL_IN_XSUB_RE
+void
+Perl_save_re_context(pTHX)
+{
+    I32 nparens = -1;
+    I32 i;
+
+    /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
+
+    if (PL_curpm) {
+       const REGEXP * const rx = PM_GETRE(PL_curpm);
+       if (rx)
+            nparens = RX_NPARENS(rx);
+    }
+
+    /* RT #124109. This is a complete hack; in the SWASHNEW case we know
+     * that PL_curpm will be null, but that utf8.pm and the modules it
+     * loads will only use $1..$3.
+     * The t/porting/re_context.t test file checks this assumption.
+     */
+    if (nparens == -1)
+        nparens = 3;
+
+    for (i = 1; i <= nparens; i++) {
+        char digits[TYPE_CHARS(long)];
+        const STRLEN len = my_snprintf(digits, sizeof(digits),
+                                       "%lu", (long)i);
+        GV *const *const gvp
+            = (GV**)hv_fetch(PL_defstash, digits, len, 0);
+
+        if (gvp) {
+            GV * const gv = *gvp;
+            if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
+                save_scalar(gv);
+        }
+    }
+}
+#endif
+
 #ifdef DEBUGGING
 
 STATIC void