This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Use of strings with code points over 0xFF as arguments to "vec"
[perl5.git] / regcomp.c
index fbd5c18..36f5aff 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -131,6 +131,8 @@ struct RExC_state_t {
     char       *parse;                 /* Input-scan pointer. */
     char        *copy_start;            /* start of copy of input within
                                            constructed parse string */
+    char        *save_copy_start;       /* Provides one level of saving
+                                           and restoring 'copy_start' */
     char        *copy_start_in_input;   /* Position in input string
                                            corresponding to copy_start */
     SSize_t    whilem_seen;            /* number of WHILEM in this expr */
@@ -229,6 +231,7 @@ struct RExC_state_t {
 #define RExC_precomp   (pRExC_state->precomp)
 #define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
 #define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
+#define RExC_save_copy_start_in_constructed  (pRExC_state->save_copy_start)
 #define RExC_precomp_end (pRExC_state->precomp_end)
 #define RExC_rx_sv     (pRExC_state->rx_sv)
 #define RExC_rx                (pRExC_state->rx)
@@ -821,8 +824,13 @@ static const scan_data_t zero_scan_data = {
 } STMT_END
 
 /* Setting this to NULL is a signal to not output warnings */
-#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
-#define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
+#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE                               \
+    STMT_START {                                                            \
+      RExC_save_copy_start_in_constructed  = RExC_copy_start_in_constructed;\
+      RExC_copy_start_in_constructed = NULL;                                \
+    } STMT_END
+#define RESTORE_WARNINGS                                                    \
+    RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
 
 /* Since a warning can be generated multiple times as the input is reparsed, we
  * output it the first time we come to that point in the parse, but suppress it
@@ -7242,7 +7250,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
         const char* name;
 
         name = get_regex_charset_name(RExC_rx->extflags, &len);
-        if strEQ(name, DEPENDS_PAT_MODS) {  /* /d under UTF-8 => /u */
+        if (strEQ(name, DEPENDS_PAT_MODS)) {  /* /d under UTF-8 => /u */
             assert(RExC_utf8);
             name = UNICODE_PAT_MODS;
             len = sizeof(UNICODE_PAT_MODS) - 1;
@@ -10992,14 +11000,14 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
     RExC_sawback = 1;
     ret = reganode(pRExC_state,
                    ((! FOLD)
-                     ? NREF
+                     ? REFN
                      : (ASCII_FOLD_RESTRICTED)
-                       ? NREFFA
+                       ? REFFAN
                        : (AT_LEAST_UNI_SEMANTICS)
-                         ? NREFFU
+                         ? REFFUN
                          : (LOC)
-                           ? NREFFL
-                           : NREFF),
+                           ? REFFLN
+                           : REFFN),
                     num);
     *flagp |= HASWIDTH;
 
@@ -11840,7 +11848,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         RExC_rxi->data->data[num]=(void*)sv_dat;
                         SvREFCNT_inc_simple_void_NN(sv_dat);
                     }
-                    ret = reganode(pRExC_state, NGROUPP, num);
+                    ret = reganode(pRExC_state, GROUPPN, num);
                     goto insert_if_check_paren;
                }
                else if (memBEGINs(RExC_parse,
@@ -17722,7 +17730,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                            literal
                         );
                 }
-                else if isMNEMONIC_CNTRL(value) {
+                else if (isMNEMONIC_CNTRL(value)) {
                     vWARN4(RExC_parse,
                            "\"%.*s\" is more clearly written simply as \"%s\"",
                            (int) (RExC_parse - rangebegin),
@@ -20286,7 +20294,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
             name_list= RExC_paren_name_list;
         }
         if (name_list) {
-            if ( k != REF || (OP(o) < NREF)) {
+            if ( k != REF || (OP(o) < REFN)) {
                 SV **name= av_fetch(name_list, parno, 0 );
                if (name)
                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
@@ -23054,6 +23062,14 @@ Perl_parse_uniprop_string(pTHX_
     if (could_be_user_defined) {
         CV* user_sub;
 
+        /* If the user defined property returns the empty string, it could
+         * easily be because the pattern is being compiled before the data it
+         * actually needs to compile is available.  This could be argued to be
+         * a bug in the perl code, but this is a change of behavior for Perl,
+         * so we handle it.  This means that intentionally returning nothing
+         * will not be resolved until runtime */
+        bool empty_return = FALSE;
+
         /* Here, the name could be for a user defined property, which are
          * implemented as subs. */
         user_sub = get_cvn_flags(name, name_len, 0);
@@ -23285,16 +23301,28 @@ Perl_parse_uniprop_string(pTHX_
                 prop_definition = NULL;
             }
             else {  /* G_SCALAR guarantees a single return value */
+                SV * contents = POPs;
 
                 /* The contents is supposed to be the expansion of the property
-                 * definition.  Call a function to check for valid syntax and
-                 * handle it */
-                prop_definition = handle_user_defined_property(name, name_len,
+                 * definition.  If the definition is deferrable, and we got an
+                 * empty string back, set a flag to later defer it (after clean
+                 * up below). */
+                if (      deferrable
+                    && (! SvPOK(contents) || SvCUR(contents) == 0))
+                {
+                        empty_return = TRUE;
+                }
+                else { /* Otherwise, call a function to check for valid syntax,
+                          and handle it */
+
+                    prop_definition = handle_user_defined_property(
+                                                    name, name_len,
                                                     is_utf8, to_fold, runtime,
                                                     deferrable,
-                                                    POPs, user_defined_ptr,
+                                                    contents, user_defined_ptr,
                                                     msg,
                                                     level);
+                }
             }
 
             /* Here, we have the results of the expansion.  Delete the
@@ -23306,8 +23334,9 @@ Perl_parse_uniprop_string(pTHX_
 
             S_delete_recursion_entry(aTHX_ SvPVX(key));
 
-            if (! prop_definition || is_invlist(prop_definition)) {
-
+            if (    ! empty_return
+                && (! prop_definition || is_invlist(prop_definition)))
+            {
                 /* If we got success we use the inversion list defining the
                  * property; otherwise use the error message */
                 SWITCH_TO_GLOBAL_CONTEXT;
@@ -23328,6 +23357,10 @@ Perl_parse_uniprop_string(pTHX_
             LEAVE;
             POPSTACK;
 
+            if (empty_return) {
+                goto definition_deferred;
+            }
+
             if (prop_definition) {
 
                 /* If the definition is for something not known at this time,
@@ -23403,10 +23436,12 @@ Perl_parse_uniprop_string(pTHX_
                  * NV. */
 
                 NV value;
+                SSize_t value_len = lookup_len - equals_pos;
 
                 /* Get the value */
-                if (my_atof3(lookup_name + equals_pos, &value,
-                             lookup_len - equals_pos)
+                if (   value_len <= 0
+                    || my_atof3(lookup_name + equals_pos, &value,
+                                value_len)
                           != lookup_name + lookup_len)
                 {
                     goto failed;