Document system(1, @args) under VMS in perlport.

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index c618cc1..376b697 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -110,7 +110,6 @@ typedef struct scan_frame {
      regnode *next_regnode;      /* next node to process when last is reached */
      U32 prev_recursed_depth;
      I32 stopparen;              /* what stopparen do we use */
-    U32 is_top_frame;           /* what flags do we use? */
  
      struct scan_frame *this_prev_frame; /* this previous frame */
      struct scan_frame *prev_frame;      /* previous frame */
@@ -150,7 +149,7 @@ struct RExC_state_t {
      I32                sawback;                /* Did we see \1, ...? */
      U32                seen;
      SSize_t    size;                   /* Code size. */
-    I32                npar;            /* Capture buffer count, (OPEN) plus
+    I32         npar;                   /* Capture buffer count, (OPEN) plus
                                             one. ("par" 0 is the whole
                                             pattern)*/
      I32                nestroot;               /* root parens we are in - used by
@@ -213,6 +212,7 @@ struct RExC_state_t {
      bool        seen_unfolded_sharp_s;
      bool        strict;
      bool        study_started;
+    bool        in_script_run;
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -279,6 +279,7 @@ struct RExC_state_t {
  #define RExC_strict (pRExC_state->strict)
  #define RExC_study_started      (pRExC_state->study_started)
  #define RExC_warn_text (pRExC_state->warn_text)
+#define RExC_in_script_run      (pRExC_state->in_script_run)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -1118,7 +1119,7 @@ PERL_STATIC_INLINE item*
  push(UV key,item* curr)
  {
      item* head;
-    Newxz(head, 1, item);
+    Newx(head, 1, item);
      head->key = key;
      head->value = 0;
      head->next = curr;
@@ -1188,7 +1189,7 @@ S_edit_distance(const UV* src,
      PERL_ARGS_ASSERT_EDIT_DISTANCE;
  
      /* intialize matrix start values */
-    Newxz(scores, ( (x + 2) * (y + 2)), UV);
+    Newx(scores, ( (x + 2) * (y + 2)), UV);
      scores[0] = score_ceil;
      scores[1 * (y + 2) + 0] = score_ceil;
      scores[0 * (y + 2) + 1] = score_ceil;
@@ -1702,6 +1703,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
              regnode_charclass_posixl temp;
              int add = 1;    /* To calculate the index of the complement */
  
+            Zero(&temp, 1, regnode_charclass_posixl);
              ANYOF_POSIXL_ZERO(&temp);
              for (i = 0; i < ANYOF_MAX; i++) {
                  assert(i % 2 != 0
@@ -2423,7 +2425,7 @@ is the recommended Unicode-aware way of saying
  } STMT_END
  
  #define TRIE_LIST_NEW(state) STMT_START {                       \
-    Newxz( trie->states[ state ].trans.list,               \
+    Newx( trie->states[ state ].trans.list,                     \
         4, reg_trie_trans_le );                                 \
       TRIE_LIST_CUR( state ) = 1;                                \
       TRIE_LIST_LEN( state ) = 4;                                \
@@ -3618,7 +3620,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
      aho->trie=trie_offset;
      aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
      Copy( trie->states, aho->states, numstates, reg_trie_state );
-    Newxz( q, numstates, U32);
+    Newx( q, numstates, U32);
      aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
      aho->refcount = 1;
      fail = aho->fail;
@@ -4143,7 +4145,7 @@ S_unwind_scan_frames(pTHX_ const void *p)
      } while (f);
  }
  
-
+/* the return from this sub is the minimum length that could possibly match */
  STATIC SSize_t
  S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          SSize_t *minlenp, SSize_t *deltap,
@@ -4179,6 +4181,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      PERL_ARGS_ASSERT_STUDY_CHUNK;
      RExC_study_started= 1;
  
+    Zero(&data_fake, 1, scan_data_t);
  
      if ( depth == 0 ) {
          while (first_non_open && OP(first_non_open) == OPEN)
@@ -4286,6 +4289,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
              /* we suppose the run is continuous, last=next...
               * NOTE we dont use the return here! */
+            /* DEFINEP study_chunk() recursion */
              (void)study_chunk(pRExC_state, &scan, &minlen,
                                &deltanext, next, &data_fake, stopparen,
                                recursed_depth, NULL, f, depth+1);
@@ -4353,6 +4357,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         f |= SCF_WHILEM_VISITED_POS;
  
                     /* we suppose the run is continuous, last=next...*/
+                    /* recurse study_chunk() for each BRANCH in an alternation */
                     minnext = study_chunk(pRExC_state, &scan, minlenp,
                                        &deltanext, next, &data_fake, stopparen,
                                        recursed_depth, NULL, f,depth+1);
@@ -5089,6 +5094,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     f &= ~SCF_WHILEM_VISITED_POS;
  
                 /* This will finish on WHILEM, setting scan, or on NULL: */
+                /* recurse study_chunk() on loop bodies */
                 minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
                                    last, data, stopparen, recursed_depth, NULL,
                                    (mincount == 0
@@ -5251,6 +5257,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         }
  #endif
                         /* Optimize again: */
+                        /* recurse study_chunk() on optimised CURLYX => CURLYM */
                         study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                      NULL, stopparen, recursed_depth, NULL, 0,depth+1);
                     }
@@ -5549,20 +5556,25 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      }
                      break;
  
+                case NASCII:
+                    invert = 1;
+                    /* FALLTHROUGH */
+               case ASCII:
+                    my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
+
+                    /* This can be handled as a Posix class */
+                    goto join_posix_and_ascii;
+
                  case NPOSIXA:   /* For these, we always know the exact set of
                                     what's matched */
                      invert = 1;
                      /* FALLTHROUGH */
                 case POSIXA:
-                    if (FLAGS(scan) == _CC_ASCII) {
-                        my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
-                    }
-                    else {
-                        _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
-                                              PL_XPosix_ptrs[_CC_ASCII],
-                                              &my_invlist);
-                    }
-                    goto join_posix;
+                    assert(FLAGS(scan) != _CC_ASCII);
+                    _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
+                                          PL_XPosix_ptrs[_CC_ASCII],
+                                          &my_invlist);
+                    goto join_posix_and_ascii;
  
                 case NPOSIXD:
                 case NPOSIXU:
@@ -5582,7 +5594,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                            &my_invlist);
                      }
  
-                  join_posix:
+                  join_posix_and_ascii:
  
                      if (flags & SCF_DO_STCLASS_AND) {
                          ssc_intersection(data->start_class, my_invlist, invert);
@@ -5641,6 +5653,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      f |= SCF_WHILEM_VISITED_POS;
                  next = regnext(scan);
                  nscan = NEXTOPER(NEXTOPER(scan));
+
+                /* recurse study_chunk() for lookahead body */
                  minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                        last, &data_fake, stopparen,
                                        recursed_depth, NULL, f, depth+1);
@@ -5731,6 +5745,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  next = regnext(scan);
                  nscan = NEXTOPER(NEXTOPER(scan));
  
+                /* positive lookahead study_chunk() recursion */
                  *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                          &deltanext, last, &data_fake,
                                          stopparen, recursed_depth, NULL,
@@ -5892,6 +5907,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                          /* We go from the jump point to the branch that follows
                             it. Note this means we need the vestigal unused
                             branches even though they arent otherwise used. */
+                        /* optimise study_chunk() for TRIE */
                          minnext = study_chunk(pRExC_state, &scan, minlenp,
                              &deltanext, (regnode *)nextbranch, &data_fake,
                              stopparen, recursed_depth, NULL, f,depth+1);
@@ -5932,8 +5948,12 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      data->cur_is_floating = 1; /* float */
              }
              min += min1;
-            if (delta != SSize_t_MAX)
-                delta += max1 - min1;
+            if (delta != SSize_t_MAX) {
+                if (SSize_t_MAX - (max1 - min1) >= delta)
+                    delta += max1 - min1;
+                else
+                    delta = SSize_t_MAX;
+            }
              if (flags & SCF_DO_STCLASS_OR) {
                  ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
                  if (min1) {
@@ -6907,7 +6927,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (   ! dump_len_string
              || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
          {
-            PL_dump_re_max_len = 0;
+            PL_dump_re_max_len = 60;    /* A reasonable default */
          }
  #endif
      }
@@ -7024,6 +7044,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_seen_unfolded_sharp_s = 0;
      RExC_contains_locale = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
+    RExC_in_script_run = 0;
      RExC_study_started = 0;
      pRExC_state->runtime_code_qr = NULL;
      RExC_frame_head= NULL;
@@ -7036,7 +7057,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      });
      DEBUG_COMPILE_r({
              SV *dsv= sv_newmortal();
-            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, 60);
+            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, PL_dump_re_max_len);
              Perl_re_printf( aTHX_  "%sCompiling REx%s %s\n",
                            PL_colors[4],PL_colors[5],s);
          });
@@ -7373,7 +7394,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         3-units-long substrs field. */
      Newx(r->substrs, 1, struct reg_substr_data);
      if (RExC_recurse_count) {
-        Newxz(RExC_recurse,RExC_recurse_count,regnode *);
+        Newx(RExC_recurse,RExC_recurse_count,regnode *);
          SAVEFREEPV(RExC_recurse);
      }
  
@@ -7572,6 +7593,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         data.last_closep = &last_close;
  
          DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/PATTERN/
+         * (NO top level branches)
+         */
         minlen = study_chunk(pRExC_state, &first, &minlen, &fake,
                               scan + RExC_size, /* Up to end */
              &data, -1, 0, NULL,
@@ -7697,6 +7722,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         data.last_closep = &last_close;
  
          DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../
+         * (patterns WITH top level branches)
+         */
         minlen = study_chunk(pRExC_state,
              &scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL,
              SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
@@ -7755,7 +7784,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      if (RExC_seen & REG_RECURSE_SEEN ) {
          r->intflags |= PREGf_RECURSE_SEEN;
-        Newxz(r->recurse_locinput, r->nparens + 1, char *);
+        Newx(r->recurse_locinput, r->nparens + 1, char *);
      }
      if (RExC_seen & REG_GPOS_SEEN)
          r->intflags |= PREGf_GPOS_SEEN;
@@ -10650,13 +10679,28 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
           * here (if paren ==2).  The forms '(*VERB' and '(?...' disallow such
           * intervening space, as the sequence is a token, and a token should be
           * indivisible */
-        bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
+        bool has_intervening_patws = (paren == 2 || paren == 's')
+                                  && *(RExC_parse - 1) != '(';
  
          if (RExC_parse >= RExC_end) {
             vFAIL("Unmatched (");
          }
  
-        if ( *RExC_parse == '*') { /* (*VERB:ARG) */
+        if (paren == 's') {
+
+            /* A nested script run  is a no-op besides clustering */
+            if (RExC_in_script_run) {
+                paren = ':';
+                nextchar(pRExC_state);
+                ret = NULL;
+                goto parse_rest;
+            }
+            RExC_in_script_run = 1;
+
+           ret = reg_node(pRExC_state, SROPEN);
+            is_open = 1;
+        }
+        else if ( *RExC_parse == '*') { /* (*VERB:ARG) */
             char *start_verb = RExC_parse + 1;
             STRLEN verb_len;
             char *start_arg = NULL;
@@ -10680,6 +10724,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  if (RExC_parse >= RExC_end) {
                      goto unterminated_verb_pattern;
                  }
+
                 RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                 while ( RExC_parse < RExC_end && *RExC_parse != ')' )
                      RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
@@ -10767,6 +10812,45 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             nextchar(pRExC_state);
             return ret;
          }
+        else if (*RExC_parse == '+') { /* (+...) */
+            RExC_parse++;
+
+            if (has_intervening_patws) {
+                /* XXX Note that a potential gotcha is that outside of /x '( +
+                 * ...)' means to match a space at least once ...   This is a
+                 * problem elsewhere too */
+                vFAIL("In '(+...)', the '(' and '+' must be adjacent");
+            }
+
+            if (! memBEGINPs(RExC_parse, (STRLEN) (RExC_end - RExC_parse),
+                             "script_run:"))
+            {
+                RExC_parse += strcspn(RExC_parse, ":)");
+                vFAIL("Unknown (+ pattern");
+            }
+            else {
+
+                /* This indicates Unicode rules. */
+                REQUIRE_UNI_RULES(flagp, NULL);
+
+                RExC_parse += sizeof("script_run:") - 1;
+
+                if (PASS2) {
+                    Perl_ck_warner_d(aTHX_
+                        packWARN(WARN_EXPERIMENTAL__SCRIPT_RUN),
+                        "The script_run feature is experimental"
+                        REPORT_LOCATION, REPORT_LOCATION_ARGS(RExC_parse));
+                }
+
+                ret = reg(pRExC_state, 's', &flags, depth+1);
+                if (flags & (RESTART_PASS1|NEED_UTF8)) {
+                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
+                    return NULL;
+                }
+
+                return ret;
+            }
+        }
          else if (*RExC_parse == '?') { /* (?...) */
             bool is_logical = 0;
             const char * const seqstart = RExC_parse;
@@ -11178,8 +11262,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      ret = reganode(pRExC_state,NGROUPP,num);
                      goto insert_if_check_paren;
                 }
-               else if (RExC_end - RExC_parse >= DEFINE_len
-                        && strnEQ(RExC_parse, "DEFINE", DEFINE_len))
+               else if (memBEGINs(RExC_parse,
+                                   (STRLEN) (RExC_end - RExC_parse),
+                                   "DEFINE"))
                  {
                     ret = reganode(pRExC_state,DEFINEP,0);
                     RExC_parse += DEFINE_len;
@@ -11314,7 +11399,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  vFAIL("Unknown switch condition (?(...))");
             }
             case '[':           /* (?[ ... ]) */
-                return handle_regex_sets(pRExC_state, NULL, flagp, depth,
+                return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
                                           oregcomp_parse);
              case 0: /* A NUL */
                 RExC_parse--; /* for vFAIL to print correctly */
@@ -11451,6 +11536,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              Set_Node_Offset(ender,RExC_parse+1); /* MJD */
              Set_Node_Length(ender,1); /* MJD */
             break;
+       case 's':
+           ender = reg_node(pRExC_state, SRCLOSE);
+            RExC_in_script_run = 0;
+           break;
         case '<':
         case ',':
         case '=':
@@ -12055,14 +12144,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      RExC_parse++;      /* Skip past the '{' */
  
-    endbrace = strchr(RExC_parse, '}');
+    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
      if (! endbrace) { /* no trailing brace */
          vFAIL2("Missing right brace on \\%c{}", 'N');
      }
-    else if(!(endbrace == RExC_parse           /* nothing between the {} */
-              || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
-                  && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
-                                                       error msg) */
+    else if (!(   endbrace == RExC_parse       /* nothing between the {} */
+               || memBEGINs(RExC_parse,   /* U+ (bad hex is checked below
+                                                   for a  better error msg) */
+                                  (STRLEN) (RExC_end - RExC_parse),
+                                 "U+")))
      {
         RExC_parse = endbrace;  /* position msg's '<--HERE' */
         vFAIL("\\N{NAME} must be resolved by the lexer");
@@ -12771,9 +12861,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              else {
                  STRLEN length;
                  char name = *RExC_parse;
-                char * endbrace;
+                char * endbrace = NULL;
                  RExC_parse += 2;
-                endbrace = strchr(RExC_parse, '}');
+                if (RExC_parse < RExC_end) {
+                    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
+                }
  
                  if (! endbrace) {
                      vFAIL2("Missing right brace on \\%c{}", name);
@@ -12794,8 +12886,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  }*/
                  switch (*RExC_parse) {
                      case 'g':
-                        if (length != 1
-                            && (length != 3 || strnNE(RExC_parse + 1, "cb", 2)))
+                        if (    length != 1
+                            && (memNEs(RExC_parse + 1, length - 1, "cb")))
                          {
                              goto bad_bound_type;
                          }
@@ -13352,6 +13444,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             const char* error_msg;
  
                             bool valid = grok_bslash_o(&p,
+                                                       RExC_end,
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
@@ -13378,6 +13471,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             const char* error_msg;
  
                             bool valid = grok_bslash_x(&p,
+                                                       RExC_end,
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
@@ -13559,7 +13653,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * character we are appending, hence we can delay getting
                       * its representation until PASS2. */
                      if (SIZE_ONLY) {
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              const STRLEN unilen = UVCHR_SKIP(ender);
                              s += unilen;
  
@@ -13577,7 +13671,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          }
                      } else { /* PASS2 */
                        not_fold_common:
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              U8 * new_s = uvchr_to_utf8((U8*)s, ender);
                              len += (char *) new_s - s - 1;
                              s = (char *) new_s;
@@ -14616,7 +14710,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
           * */
          switch (name_len) {
              case 4:
-                if (memEQ(name_start, "word", 4)) {
+                if (memEQs(name_start, 4, "word")) {
                      /* this is not POSIX, this is the Perl \w */
                      class_number = ANYOF_WORDCHAR;
                  }
@@ -14627,51 +14721,51 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                   * Offset 4 gives the best switch position.  */
                  switch (name_start[4]) {
                      case 'a':
-                        if (memEQ(name_start, "alph", 4)) /* alpha */
+                        if (memBEGINs(name_start, 5, "alph")) /* alpha */
                              class_number = ANYOF_ALPHA;
                          break;
                      case 'e':
-                        if (memEQ(name_start, "spac", 4)) /* space */
+                        if (memBEGINs(name_start, 5, "spac")) /* space */
                              class_number = ANYOF_SPACE;
                          break;
                      case 'h':
-                        if (memEQ(name_start, "grap", 4)) /* graph */
+                        if (memBEGINs(name_start, 5, "grap")) /* graph */
                              class_number = ANYOF_GRAPH;
                          break;
                      case 'i':
-                        if (memEQ(name_start, "asci", 4)) /* ascii */
+                        if (memBEGINs(name_start, 5, "asci")) /* ascii */
                              class_number = ANYOF_ASCII;
                          break;
                      case 'k':
-                        if (memEQ(name_start, "blan", 4)) /* blank */
+                        if (memBEGINs(name_start, 5, "blan")) /* blank */
                              class_number = ANYOF_BLANK;
                          break;
                      case 'l':
-                        if (memEQ(name_start, "cntr", 4)) /* cntrl */
+                        if (memBEGINs(name_start, 5, "cntr")) /* cntrl */
                              class_number = ANYOF_CNTRL;
                          break;
                      case 'm':
-                        if (memEQ(name_start, "alnu", 4)) /* alnum */
+                        if (memBEGINs(name_start, 5, "alnu")) /* alnum */
                              class_number = ANYOF_ALPHANUMERIC;
                          break;
                      case 'r':
-                        if (memEQ(name_start, "lowe", 4)) /* lower */
+                        if (memBEGINs(name_start, 5, "lowe")) /* lower */
                              class_number = (FOLD) ? ANYOF_CASED : ANYOF_LOWER;
-                        else if (memEQ(name_start, "uppe", 4)) /* upper */
+                        else if (memBEGINs(name_start, 5, "uppe")) /* upper */
                              class_number = (FOLD) ? ANYOF_CASED : ANYOF_UPPER;
                          break;
                      case 't':
-                        if (memEQ(name_start, "digi", 4)) /* digit */
+                        if (memBEGINs(name_start, 5, "digi")) /* digit */
                              class_number = ANYOF_DIGIT;
-                        else if (memEQ(name_start, "prin", 4)) /* print */
+                        else if (memBEGINs(name_start, 5, "prin")) /* print */
                              class_number = ANYOF_PRINT;
-                        else if (memEQ(name_start, "punc", 4)) /* punct */
+                        else if (memBEGINs(name_start, 5, "punc")) /* punct */
                              class_number = ANYOF_PUNCT;
                          break;
                  }
                  break;
              case 6:
-                if (memEQ(name_start, "xdigit", 6))
+                if (memEQs(name_start, 6, "xdigit"))
                      class_number = ANYOF_XDIGIT;
                  break;
          }
@@ -14892,6 +14986,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
      PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
  
+    DEBUG_PARSE("xcls");
+
      if (in_locale) {
          set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
      }
@@ -14909,7 +15005,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * these things, we need to realize that something preceded by a backslash
       * is escaped, so we have to keep track of backslashes */
      if (SIZE_ONLY) {
-        UV depth = 0; /* how many nested (?[...]) constructs */
+        UV nest_depth = 0; /* how many nested (?[...]) constructs */
  
          while (RExC_parse < RExC_end) {
              SV* current = NULL;
@@ -14918,8 +15014,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                      TRUE /* Force /x */ );
  
              switch (*RExC_parse) {
-                case '?':
-                    if (RExC_parse[1] == '[') depth++, RExC_parse++;
+                case '(':
+                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
+                        nest_depth++, RExC_parse+=2;
                      /* FALLTHROUGH */
                  default:
                      break;
@@ -14976,9 +15073,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  }
  
                  case ']':
-                    if (depth--) break;
-                    RExC_parse++;
-                    if (*RExC_parse == ')') {
+                    if (RExC_parse[1] == ')') {
+                        RExC_parse++;
+                        if (nest_depth--) break;
                          node = reganode(pRExC_state, ANYOF, 0);
                          RExC_size += ANYOF_SKIP;
                          nextchar(pRExC_state);
@@ -14990,20 +15087,25 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
                          return node;
                      }
-                    goto no_close;
+                    /* We output the messages even if warnings are off, because we'll fail
+                     * the very next thing, and these give a likely diagnosis for that */
+                    if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
+                        output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
+                    }
+                    RExC_parse++;
+                    vFAIL("Unexpected ']' with no following ')' in (?[...");
              }
  
              RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
          }
  
-      no_close:
          /* We output the messages even if warnings are off, because we'll fail
           * the very next thing, and these give a likely diagnosis for that */
          if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
              output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
          }
  
-        FAIL("Syntax error in (?[...])");
+        vFAIL("Syntax error in (?[...])");
      }
  
      /* Pass 2 only after this. */
@@ -15183,12 +15285,14 @@ redo_curchar:
                       * inversion list, and RExC_parse points to the trailing
                       * ']'; the next character should be the ')' */
                      RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for nested extended charclass");
  
                      /* Then the ')' matching the original '(' handled by this
                       * case: statement */
                      RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
  
                      RExC_parse++;
                      RExC_flags = save_flags;
@@ -16188,6 +16292,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          do_posix_warnings ? &posix_warnings : NULL,
                          TRUE /* checking only */);
          }
+        else if (  strict && ! skip_white
+                 && (   _generic_isCC(value, _CC_VERTSPACE)
+                     || is_VERTWS_cp_high(value)))
+        {
+            vFAIL("Literal vertical space in [] is illegal except under /x");
+        }
          else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
  
@@ -16308,7 +16418,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     vFAIL2("Empty \\%c", (U8)value);
                 if (*RExC_parse == '{') {
                     const U8 c = (U8)value;
-                   e = strchr(RExC_parse, '}');
+                   e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
                      if (!e) {
                          RExC_parse++;
                          vFAIL2("Missing right brace on \\%c{}", c);
@@ -16440,7 +16550,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * referred to outside it. [perl #121777] */
                          if (! has_pkg && curpkg) {
                              char* pkgname = HvNAME(curpkg);
-                            if (strNE(pkgname, "main")) {
+                            if (memNEs(pkgname, HvNAMELEN(curpkg), "main")) {
                                  char* full_name = Perl_form(aTHX_
                                                              "%s::%s",
                                                              pkgname,
@@ -16523,6 +16633,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                     const char* error_msg;
                     bool valid = grok_bslash_o(&RExC_parse,
+                                               RExC_end,
                                                &value,
                                                &error_msg,
                                                 PASS2,   /* warnings only in
@@ -16541,6 +16652,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                     const char* error_msg;
                     bool valid = grok_bslash_x(&RExC_parse,
+                                               RExC_end,
                                                &value,
                                                &error_msg,
                                                PASS2, /* Output warnings */
@@ -16966,7 +17078,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                            " be some subset of \"0-9\","
                                            " \"A-Z\", or \"a-z\"");
                      }
-                    else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+                    else if (prevvalue >= FIRST_NON_ASCII_DECIMAL_DIGIT) {
                          SSize_t index_start;
                          SSize_t index_final;
  
@@ -16974,8 +17086,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * can't do the same checks for above-ASCII ranges,
                           * except in the case of digit ones.  These should
                           * contain only digits from the same group of 10.  The
-                         * ASCII case is handled just above.  0x660 is the
-                         * first digit character beyond ASCII.  Hence here, the
+                         * ASCII case is handled just above.  Hence here, the
                           * range could be a range of digits.  First some
                           * unlikely special cases.  Grandfather in that a range
                           * ending in 19DA (NEW TAI LUE THAM DIGIT ONE) is bad
@@ -17269,14 +17380,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  /* The actual POSIXish node for all the rest depends on the
                   * charset modifier.  The ones in the first set depend only on
                   * ASCII or, if available on this platform, also locale */
+
                  case ANYOF_ASCII:
                  case ANYOF_NASCII:
+
  #ifdef HAS_ISASCII
-                    op = (LOC) ? POSIXL : POSIXA;
-#else
-                    op = POSIXA;
+                    if (LOC) {
+                        op = POSIXL;
+                        goto join_posix;
+                    }
  #endif
-                    goto join_posix;
+                    /* (named_class - ANY_OF_ASCII) is 0 or 1. xor'ing with
+                     * invert converts that to 1 or 0 */
+                    op = ASCII + ((namedclass - ANYOF_ASCII) ^ invert);
+                    break;
  
                  /* The following don't have any matches in the upper Latin1
                   * range, hence /d is equivalent to /u for them.  Making it /u
@@ -17418,6 +17535,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             TRUE /* downgradable to EXACT */
                                             );
              }
+            else {
+                *flagp |= HASWIDTH|SIMPLE;
+            }
  
              RExC_parse = (char *) cur_parse;
  
@@ -17682,6 +17802,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  if (_invlist_len(only_non_utf8_list) != 0) {
                      ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
                  }
+                SvREFCNT_dec_NN(only_non_utf8_list);
              }
              else {
                  /* Here there were no complemented posix classes.  That means
@@ -17973,25 +18094,43 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              const UV* cp_list_array = invlist_array(cp_list);
  
              /* Here, didn't find an optimization.  See if this matches any of
-             * the POSIX classes.  These run slightly faster for above-Unicode
-             * code points, so don't bother with POSIXA ones nor the 2 that
-             * have no above-Unicode matches.  We can avoid these checks unless
-             * the ANYOF matches at least as high as the lowest POSIX one
-             * (which was manually found to be \v.  The actual code point may
-             * increase in later Unicode releases, if a higher code point is
-             * assigned to be \v, but this code will never break.  It would
-             * just mean we could execute the checks for posix optimizations
-             * unnecessarily) */
-
-            if (cp_list_array[cp_list_len-1] > 0x2029) {
+             * the POSIX classes.  First try ASCII */
+
+            if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
+                op = ASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
+                op = NASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (cp_list_array[cp_list_len-1] >= 0x2029) {
+
+                /* Then try the other POSIX classes.  The POSIXA ones are about
+                 * the same speed as ANYOF ops, but the ones that have
+                 * above-Latin1 code point matches are somewhat faster than
+                 * ANYOF.  So optimize those, but don't bother with the POSIXA
+                 * ones nor [:cntrl:] which has no above-Latin1 matches.  If
+                 * this ANYOF node has a lower highest possible matching code
+                 * point than any of the XPosix ones, we know that it can't
+                 * possibly be the same as any of them, so we can avoid
+                 * executing this code.  The 0x2029 above for the lowest max
+                 * was determined by manual inspection of the classes, and
+                 * comes from \v.  Suppose Unicode in a later version adds a
+                 * higher code point to \v.  All that means is that this code
+                 * can be executed unnecessarily.  It will still give the
+                 * correct answer. */
+
                  for (posix_class = 0;
                       posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
                       posix_class++)
                  {
                      int try_inverted;
-                    if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) {
+
+                    if (posix_class == _CC_CNTRL) {
                          continue;
                      }
+
                      for (try_inverted = 0; try_inverted < 2; try_inverted++) {
  
                          /* Check if matches normal or inverted */
@@ -18981,7 +19120,7 @@ Perl_regdump(pTHX_ const regexp *r)
              RE_PV_QUOTED_DECL(s, 0, dsv,
                              SvPVX_const(r->substrs->data[i].substr),
                              RE_SV_DUMPLEN(r->substrs->data[i].substr),
-                            30);
+                            PL_dump_re_max_len);
              Perl_re_printf( aTHX_
                            "%s %s%s at %" IVdf "..%" UVuf " ",
                            i ? "floating" : "anchored",
@@ -19131,7 +19270,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          * is a crude hack but it may be the best for now since
          * we have no flag "this EXACTish node was UTF-8"
          * --jhi */
-       pv_pretty(sv, STRING(o), STR_LEN(o), 60, PL_colors[0], PL_colors[1],
+       pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
+                  PL_colors[0], PL_colors[1],
                   PERL_PV_ESCAPE_UNI_DETECT |
                   PERL_PV_ESCAPE_NONASCII   |
                   PERL_PV_PRETTY_ELLIPSES   |
@@ -19355,7 +19495,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              SV* contents;
  
              /* See if truncation size is overridden */
-            const STRLEN dump_len = (PL_dump_re_max_len)
+            const STRLEN dump_len = (PL_dump_re_max_len > 256)
                                      ? PL_dump_re_max_len
                                      : 256;
  
@@ -19482,7 +19622,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                       PL_colors[5],PL_colors[0],
                       s,
                       PL_colors[1],
-                     (strlen(s) > 60 ? "..." : ""));
+                     (strlen(s) > PL_dump_re_max_len ? "..." : ""));
         } );
  
      /* use UTF8 check substring if regexp pattern itself is in UTF8 */
@@ -19633,7 +19773,7 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
      drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
      SvREFCNT_inc_void(drx->qr_anoncv);
      if (srx->recurse_locinput)
-        Newxz(drx->recurse_locinput,srx->nparens + 1,char *);
+        Newx(drx->recurse_locinput,srx->nparens + 1,char *);
  
      return dsv;
  }
@@ -19667,7 +19807,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
         {
             SV *dsv= sv_newmortal();
              RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
-                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60);
+                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), PL_dump_re_max_len);
              Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n",
                  PL_colors[4],PL_colors[5],s);
          }
@@ -19843,7 +19983,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
      RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
      ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
      if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+        Newx(ret->recurse_locinput,r->nparens + 1,char *);
  
      if (ret->pprivate)
         RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -20296,9 +20436,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  #else
          format = "\\x%02" UVXf "-\\x%02" UVXf;
  #endif
-        GCC_DIAG_IGNORE(-Wformat-nonliteral);
+        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
-        GCC_DIAG_RESTORE;
+        GCC_DIAG_RESTORE_STMT;
          break;
      }
  }
@@ -20731,7 +20871,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
         /* While that wasn't END last time... */
         NODE_ALIGN(node);
         op = OP(node);
-       if (op == CLOSE || op == WHILEM)
+       if (op == CLOSE || op == SRCLOSE || op == WHILEM)
             indent--;
         next = regnext((regnode *)node);
  
@@ -20798,7 +20938,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                      indent+3,
                      elem_ptr
                      ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
-                                SvCUR(*elem_ptr), 60,
+                                SvCUR(*elem_ptr), PL_dump_re_max_len,
                                  PL_colors[0], PL_colors[1],
                                  (SvUTF8(*elem_ptr)
                                   ? PERL_PV_ESCAPE_UNI
@@ -20855,7 +20995,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             node = NEXTOPER(node);
             node += regarglen[(U8)op];
         }
-       if (op == CURLYX || op == OPEN)
+       if (op == CURLYX || op == OPEN || op == SROPEN)
             indent++;
      }
      CLEAR_OPTSTART;