Fix stray > in L<perlapi/sv_usepvn_flags>

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index 6dcd99a..2c13b3b 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -1645,6 +1645,7 @@ Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
      case SVt_PVGV:
         if (!isGV_with_GP(sv))
             break;
+        /* FALLTHROUGH */
      case SVt_PVAV:
      case SVt_PVHV:
      case SVt_PVCV:
@@ -1758,6 +1759,7 @@ Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
      case SVt_PVGV:
         if (!isGV_with_GP(sv))
             break;
+        /* FALLTHROUGH */
      case SVt_PVAV:
      case SVt_PVHV:
      case SVt_PVCV:
@@ -3139,7 +3141,7 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
                      DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
                      STORE_LC_NUMERIC_SET_TO_NEEDED();
  
-                    local_radix = PL_numeric_local && PL_numeric_radix_sv;
+                    local_radix = PL_numeric_underlying && PL_numeric_radix_sv;
                      if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) {
                          size += SvCUR(PL_numeric_radix_sv) - 1;
                          s = SvGROW_mutable(sv, size);
@@ -3399,11 +3401,7 @@ if all the bytes are invariant in UTF-8.
  If C<flags> has C<SV_GMAGIC> bit set,
  will C<mg_get> on C<sv> if appropriate, else not.
  
-If C<flags> has C<SV_FORCE_UTF8_UPGRADE> set, this function assumes that the PV
-will expand when converted to UTF-8, and skips the extra work of checking for
-that.  Typically this flag is used by a routine that has already parsed the
-string and found such characters, and passes this information on so that the
-work doesn't have to be repeated.
+The C<SV_FORCE_UTF8_UPGRADE> flag is now ignored.
  
  Returns the number of bytes in the converted string.
  
@@ -3424,22 +3422,10 @@ Returns the number of bytes in the converted string (not including the spares).
  
  =cut
  
-(One might think that the calling routine could pass in the position of the
-first variant character when it has set SV_FORCE_UTF8_UPGRADE, so it wouldn't
-have to be found again.  But that is not the case, because typically when the
-caller is likely to use this flag, it won't be calling this routine unless it
-finds something that won't fit into a byte.  Otherwise it tries to not upgrade
-and just use bytes.  But some things that do fit into a byte are variants in
-utf8, and the caller may not have been keeping track of these.)
-
  If the routine itself changes the string, it adds a trailing C<NUL>.  Such a
  C<NUL> isn't guaranteed due to having other routines do the work in some input
  cases, or if the input is already flagged as being in utf8.
  
-The speed of this could perhaps be improved for many cases if someone wanted to
-write a fast function that counts the number of variant characters in a string,
-especially if it could return the position of the first one.
-
  */
  
  STRLEN
@@ -3482,183 +3468,96 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr
         /* This function could be much more efficient if we
          * had a FLAG in SVs to signal if there are any variant
          * chars in the PV.  Given that there isn't such a flag
-        * make the loop as fast as possible (although there are certainly ways
-        * to speed this up, eg. through vectorization) */
+        * make the loop as fast as possible. */
         U8 * s = (U8 *) SvPVX_const(sv);
-       U8 * e = (U8 *) SvEND(sv);
         U8 *t = s;
-       STRLEN two_byte_count;
         
-       if (flags & SV_FORCE_UTF8_UPGRADE) {
-            two_byte_count = 0;
-        }
-        else {
-            if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
+        if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
  
-                /* utf8 conversion not needed because all are invariants.  Mark
-                 * as UTF-8 even if no variant - saves scanning loop */
-                SvUTF8_on(sv);
-                if (extra) SvGROW(sv, SvCUR(sv) + extra);
-                return SvCUR(sv);
-            }
-
-            /* Here, there is at least one variant, and t points to the first
-             * one */
-            two_byte_count = 1;
+            /* utf8 conversion not needed because all are invariants.  Mark
+             * as UTF-8 even if no variant - saves scanning loop */
+            SvUTF8_on(sv);
+            if (extra) SvGROW(sv, SvCUR(sv) + extra);
+            return SvCUR(sv);
          }
  
-        /* Note that the incoming SV may not have a trailing '\0', as certain
-         * code in pp_formline can send us partially built SVs.
+        /* Here, there is at least one variant (t points to the first one), so
+         * the string should be converted to utf8.  Everything from 's' to
+         * 't - 1' will occupy only 1 byte each on output.
           *
-        * Here, the string should be converted to utf8, either because of an
-         * input flag (which causes two_byte_count to be set to 0), or because
-         * a character that requires 2 bytes was found (two_byte_count = 1).  t
-         * points either to the beginning of the string (if we didn't examine
-         * anything), or to the first variant.  In either case, everything from
-         * s to t - 1 will occupy only 1 byte each on output.
+         * Note that the incoming SV may not have a trailing '\0', as certain
+         * code in pp_formline can send us partially built SVs.
          *
          * There are two main ways to convert.  One is to create a new string
          * and go through the input starting from the beginning, appending each
-        * converted value onto the new string as we go along.  It's probably
-        * best to allocate enough space in the string for the worst possible
-        * case rather than possibly running out of space and having to
-        * reallocate and then copy what we've done so far.  Since everything
-        * from s to t - 1 is invariant, the destination can be initialized
-        * with these using a fast memory copy
-        *
-         * The other way is to figure out exactly how big the string should be,
-        * by parsing the entire input.  Then you don't have to make it big
-        * enough to handle the worst possible case, and more importantly, if
-        * the string you already have is large enough, you don't have to
-        * allocate a new string, you can copy the last character in the input
-        * string to the final position(s) that will be occupied by the
-        * converted string and go backwards, stopping at t, since everything
-        * before that is invariant.
+         * converted value onto the new string as we go along.  Going this
+         * route, it's probably best to initially allocate enough space in the
+         * string rather than possibly running out of space and having to
+         * reallocate and then copy what we've done so far.  Since everything
+         * from 's' to 't - 1' is invariant, the destination can be initialized
+         * with these using a fast memory copy.  To be sure to allocate enough
+         * space, one could use the worst case scenario, where every remaining
+         * byte expands to two under UTF-8, or one could parse it and count
+         * exactly how many do expand.
          *
-        * There are advantages and disadvantages to each method.
+         * The other way is to unconditionally parse the remainder of the
+         * string to figure out exactly how big the expanded string will be,
+         * growing if needed.  Then start at the end of the string and place
+         * the character there at the end of the unfilled space in the expanded
+         * one, working backwards until reaching 't'.
          *
-        * In the first method, we can allocate a new string, do the memory
-        * copy from the s to t - 1, and then proceed through the rest of the
-        * string byte-by-byte.
-        *
-        * In the second method, we proceed through the rest of the input
-        * string just calculating how big the converted string will be.  Then
-        * there are two cases:
-        *  1)  if the string has enough extra space to handle the converted
-        *      value.  We go backwards through the string, converting until we
-        *      get to the position we are at now, and then stop.  If this
-        *      position is far enough along in the string, this method is
-         *     faster than the first method above.  If the memory copy were
-         *     the same speed as the byte-by-byte loop, that position would be
-         *     about half-way, as at the half-way mark, parsing to the end and
-         *     back is one complete string's parse, the same amount as
-         *     starting over and going all the way through.  Actually, it
-         *     would be somewhat less than half-way, as it's faster to just
-         *     count bytes than to also copy, and we don't have the overhead
-         *     of allocating a new string, changing the scalar to use it, and
-         *     freeing the existing one.  But if the memory copy is fast, the
-         *     break-even point is somewhere after half way.  The counting
-         *     loop could be sped up by vectorization, etc, to move the
-         *     break-even point further towards the beginning.
-        *  2)  if the string doesn't have enough space to handle the converted
-        *      value.  A new string will have to be allocated, and one might
-        *      as well, given that, start from the beginning doing the first
-        *      method.  We've spent extra time parsing the string and in
-        *      exchange all we've gotten is that we know precisely how big to
-        *      make the new one.  Perl is more optimized for time than space,
-        *      so this case is a loser.
-        * So what I've decided to do is not use the 2nd method unless it is
-        * guaranteed that a new string won't have to be allocated, assuming
-        * the worst case.  I also decided not to put any more conditions on it
-        * than this, for now.  It seems likely that, since the worst case is
-        * twice as big as the unknown portion of the string (plus 1), we won't
-        * be guaranteed enough space, causing us to go to the first method,
-        * unless the string is short, or the first variant character is near
-        * the end of it.  In either of these cases, it seems best to use the
-        * 2nd method.  The only circumstance I can think of where this would
-        * be really slower is if the string had once had much more data in it
-        * than it does now, but there is still a substantial amount in it  */
+         * The problem with assuming the worst case scenario is that for very
+         * long strings, we could allocate much more memory than actually
+         * needed, which can create performance problems.  If we have to parse
+         * anyway, the second method is the winner as it may avoid an extra
+         * copy.  The code used to use the first method under some
+         * circumstances, but now that there is faster variant counting on
+         * ASCII platforms, the second method is used exclusively, eliminating
+         * some code that no longer has to be maintained. */
  
         {
-           STRLEN invariant_head = t - s;
-           STRLEN size = invariant_head + (e - t) * 2 + 1 + extra;
-           if (SvLEN(sv) < size) {
-
-               /* Here, have decided to allocate a new string */
-
-               U8 *dst;
-               U8 *d;
-
-               Newx(dst, size, U8);
-
-               /* If no known invariants at the beginning of the input string,
-                * set so starts from there.  Otherwise, can use memory copy to
-                * get up to where we are now, and then start from here */
-
-               if (invariant_head == 0) {
-                   d = dst;
-               } else {
-                   Copy(s, dst, invariant_head, char);
-                   d = dst + invariant_head;
-               }
-
-               while (t < e) {
-                    append_utf8_from_native_byte(*t, &d);
-                    t++;
-               }
-               *d = '\0';
-               SvPV_free(sv); /* No longer using pre-existing string */
-               SvPV_set(sv, (char*)dst);
-               SvCUR_set(sv, d - dst);
-               SvLEN_set(sv, size);
-           } else {
-
-               /* Here, have decided to get the exact size of the string.
-                * Currently this happens only when we know that there is
-                * guaranteed enough space to fit the converted string, so
-                * don't have to worry about growing.  If two_byte_count is 0,
-                * then t points to the first byte of the string which hasn't
-                * been examined yet.  Otherwise two_byte_count is 1, and t
-                * points to the first byte in the string that will expand to
-                * two.  Depending on this, start examining at t or 1 after t.
-                * */
-
-               U8 *d = t + two_byte_count;
-
-
-               /* Count up the remaining bytes that expand to two */
-
-               while (d < e) {
-                   const U8 chr = *d++;
-                   if (! NATIVE_BYTE_IS_INVARIANT(chr)) two_byte_count++;
-               }
-
-               /* The string will expand by just the number of bytes that
-                * occupy two positions.  But we are one afterwards because of
-                * the increment just above.  This is the place to put the
-                * trailing NUL, and to set the length before we decrement */
-
-               d += two_byte_count;
-               SvCUR_set(sv, d - s);
-               *d-- = '\0';
+            /* Count the total number of variants there are.  We can start
+             * just beyond the first one, which is known to be at 't' */
+            const Size_t invariant_length = t - s;
+            U8 * e = (U8 *) SvEND(sv);
+
+            /* The length of the left overs, plus 1. */
+            const Size_t remaining_length_p1 = e - t;
+
+            /* We expand by 1 for the variant at 't' and one for each remaining
+             * variant (we start looking at 't+1') */
+            Size_t expansion = 1 + variant_under_utf8_count(t + 1, e);
+
+            /* +1 = trailing NUL */
+            Size_t need = SvCUR(sv) + expansion + extra + 1;
+            U8 * d;
+
+            /* Grow if needed */
+            if (SvLEN(sv) < need) {
+                t = invariant_length + (U8*) SvGROW(sv, need);
+                e = t + remaining_length_p1;
+            }
+            SvCUR_set(sv, invariant_length + remaining_length_p1 + expansion);
  
+            /* Set the NUL at the end */
+            d = (U8 *) SvEND(sv);
+            *d-- = '\0';
  
-               /* Having decremented d, it points to the position to put the
-                * very last byte of the expanded string.  Go backwards through
-                * the string, copying and expanding as we go, stopping when we
-                * get to the part that is invariant the rest of the way down */
+            /* Having decremented d, it points to the position to put the
+             * very last byte of the expanded string.  Go backwards through
+             * the string, copying and expanding as we go, stopping when we
+             * get to the part that is invariant the rest of the way down */
  
-               e--;
-               while (e >= t) {
-                   if (NATIVE_BYTE_IS_INVARIANT(*e)) {
-                       *d-- = *e;
-                   } else {
-                       *d-- = UTF8_EIGHT_BIT_LO(*e);
-                       *d-- = UTF8_EIGHT_BIT_HI(*e);
-                   }
-                    e--;
-               }
-           }
+            e--;
+            while (e >= t) {
+                if (NATIVE_BYTE_IS_INVARIANT(*e)) {
+                    *d-- = *e;
+                } else {
+                    *d-- = UTF8_EIGHT_BIT_LO(*e);
+                    *d-- = UTF8_EIGHT_BIT_HI(*e);
+                }
+                e--;
+            }
  
             if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
                 /* Update pos. We do it at the end rather than during
@@ -3677,7 +3576,6 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr
         }
      }
  
-    /* Mark as UTF-8 even if no variant - saves scanning loop */
      SvUTF8_on(sv);
      return SvCUR(sv);
  }
@@ -3782,7 +3680,7 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv)
      PERL_ARGS_ASSERT_SV_UTF8_DECODE;
  
      if (SvPOKp(sv)) {
-        const U8 *start, *c;
+        const U8 *start, *c, *first_variant;
  
         /* The octets may have got themselves encoded - get them back as
          * bytes
@@ -3794,9 +3692,9 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv)
           * we want to make sure everything inside is valid utf8 first.
           */
          c = start = (const U8 *) SvPVX_const(sv);
-       if (!is_utf8_string(c, SvCUR(sv)))
-           return FALSE;
-        if (! is_utf8_invariant_string(c, SvCUR(sv))) {
+        if (! is_utf8_invariant_string_loc(c, SvCUR(sv), &first_variant)) {
+            if (!is_utf8_string(first_variant, SvCUR(sv) - (first_variant -c)))
+                return FALSE;
              SvUTF8_on(sv);
          }
         if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
@@ -3917,15 +3815,14 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype)
         glob to begin with. */
      if(dtype == SVt_PVGV) {
          const char * const name = GvNAME((const GV *)dstr);
-        if(
-            strEQ(name,"ISA")
+        const STRLEN len = GvNAMELEN(dstr);
+        if(memEQs(name, len, "ISA")
           /* The stash may have been detached from the symbol table, so
              check its name. */
           && GvSTASH(dstr) && HvENAME(GvSTASH(dstr))
          )
              mro_changes = 2;
          else {
-            const STRLEN len = GvNAMELEN(dstr);
              if ((len > 1 && name[len-2] == ':' && name[len-1] == ':')
               || (len == 1 && name[0] == ':')) {
                  mro_changes = 3;
@@ -4138,7 +4035,7 @@ Perl_gv_setref(pTHX_ SV *const dstr, SV *const sstr)
         }
         else if (
             stype == SVt_PVAV && sref != dref
-        && strEQ(GvNAME((GV*)dstr), "ISA")
+        && memEQs(GvNAME((GV*)dstr), GvNAMELEN((GV*)dstr), "ISA")
          /* The stash may have been detached from the symbol table, so
             check its name before doing anything. */
          && GvSTASH(dstr) && HvENAME(GvSTASH(dstr))
@@ -4694,11 +4591,13 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags)
              ) {
              /* Either it's a shared hash key, or it's suitable for
                 copy-on-write.  */
+#ifdef DEBUGGING
              if (DEBUG_C_TEST) {
                  PerlIO_printf(Perl_debug_log, "Copy on write: sstr --> dstr\n");
                  sv_dump(sstr);
                  sv_dump(dstr);
              }
+#endif
  #ifdef PERL_ANY_COW
              if (!(sflags & SVf_IsCOW)) {
                      SvIsCOW_on(sstr);
@@ -4872,7 +4771,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr)
  #endif
  
      PERL_ARGS_ASSERT_SV_SETSV_COW;
-
+#ifdef DEBUGGING
      if (DEBUG_C_TEST) {
         PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n",
                       (void*)sstr, (void*)dstr);
@@ -4880,7 +4779,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr)
         if (dstr)
                     sv_dump(dstr);
      }
-
+#endif
      if (dstr) {
         if (SvTHINKFIRST(dstr))
             sv_force_normal_flags(dstr, SV_COW_DROP_PV);
@@ -4927,9 +4826,10 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr)
         SvUTF8_on(dstr);
      SvLEN_set(dstr, len);
      SvCUR_set(dstr, cur);
-    if (DEBUG_C_TEST) {
-       sv_dump(dstr);
-    }
+#ifdef DEBUGGING
+    if (DEBUG_C_TEST)
+               sv_dump(dstr);
+#endif
      return dstr;
  }
  #endif
@@ -5135,7 +5035,7 @@ giving it to C<sv_usepvn>, and neither should any pointers from "behind"
  that pointer (e.g. ptr + 1) be used.
  
  If S<C<flags & SV_SMAGIC>> is true, will call C<SvSETMAGIC>.  If
-S<C<flags> & SV_HAS_TRAILING_NUL>> is true, then C<ptr[len]> must be C<NUL>,
+S<C<flags & SV_HAS_TRAILING_NUL>> is true, then C<ptr[len]> must be C<NUL>,
  and the realloc
  will be skipped (i.e. the buffer is actually at least 1 byte longer than
  C<len>, and already meets the requirements for storing in C<SvPVX>).
@@ -5215,12 +5115,14 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags)
         const STRLEN len = SvLEN(sv);
         const STRLEN cur = SvCUR(sv);
  
+#ifdef DEBUGGING
          if (DEBUG_C_TEST) {
                  PerlIO_printf(Perl_debug_log,
                                "Copy on write: Force normal %ld\n",
                                (long) flags);
                  sv_dump(sv);
          }
+#endif
          SvIsCOW_off(sv);
  # ifdef PERL_COPY_ON_WRITE
         if (len) {
@@ -5260,9 +5162,10 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags)
             } else {
                 unshare_hek(SvSHARED_HEK_FROM_PV(pvx));
             }
-            if (DEBUG_C_TEST) {
+#ifdef DEBUGGING
+            if (DEBUG_C_TEST)
                  sv_dump(sv);
-            }
+#endif
         }
  #else
             const char * const pvx = SvPVX_const(sv);
@@ -6805,10 +6708,12 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
                      && !(IoFLAGS(sv) & IOf_FAKE_DIRP)))
             {
                 if (SvIsCOW(sv)) {
+#ifdef DEBUGGING
                     if (DEBUG_C_TEST) {
                         PerlIO_printf(Perl_debug_log, "Copy on write: clear\n");
                         sv_dump(sv);
                     }
+#endif
                     if (SvLEN(sv)) {
                         if (CowREFCNT(sv)) {
                             sv_buf_to_rw(sv);
@@ -9002,7 +8907,7 @@ Perl_sv_inc_nomg(pTHX_ SV *const sv)
      if (flags & SVp_NOK) {
         const NV was = SvNVX(sv);
         if (LIKELY(!Perl_isinfnan(was)) &&
-            NV_OVERFLOWS_INTEGERS_AT &&
+            NV_OVERFLOWS_INTEGERS_AT != 0.0 &&
             was >= NV_OVERFLOWS_INTEGERS_AT) {
             /* diag_listed_as: Lost precision when %s %f by 1 */
             Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION),
@@ -9185,7 +9090,7 @@ Perl_sv_dec_nomg(pTHX_ SV *const sv)
         {
             const NV was = SvNVX(sv);
             if (LIKELY(!Perl_isinfnan(was)) &&
-                NV_OVERFLOWS_INTEGERS_AT &&
+                NV_OVERFLOWS_INTEGERS_AT != 0.0 &&
                 was <= -NV_OVERFLOWS_INTEGERS_AT) {
                 /* diag_listed_as: Lost precision when %s %f by 1 */
                 Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION),
@@ -11632,8 +11537,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
      /* In this case there is an implicit bit,
       * and therefore the exponent is shifted by one. */
      exponent--;
-#  else
-#    ifdef NV_X86_80_BIT
+#  elif defined(NV_X86_80_BIT)
      if (subnormal) {
          /* The subnormals of the x86-80 have a base exponent of -16382,
           * (while the physical exponent bits are zero) but the frexp()
@@ -11647,7 +11551,6 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
      } else {
          exponent -= 4;
      }
-#    endif
      /* TBD: other non-implicit-bit platforms than the x86-80. */
  #  endif
  #endif
@@ -11704,6 +11607,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
               * the top non-zero nybble. */
              for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { }
              assert(n < 4);
+            assert(vlnz);
              vlnz[1] = 0;
              for (vshr = vlnz; vshr >= vfnz; vshr--) {
                vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n);
@@ -11910,11 +11814,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      STRLEN origlen;
      Size_t svix = 0;
      static const char nullstr[] = "(null)";
-    SV *argsv = NULL;
      bool has_utf8 = DO_UTF8(sv);    /* has the result utf8? */
      const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */
      /* Times 4: a decimal digit takes more than 3 binary digits.
-     * NV_DIG: mantissa takes than many decimal digits.
+     * NV_DIG: mantissa takes that many decimal digits.
       * Plus 32: Playing safe. */
      char ebuf[IV_DIG * 4 + NV_DIG + 32];
      bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */
@@ -12015,6 +11918,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         Size_t efix      = 0;         /* explicit format parameter index */
         const Size_t osvix  = svix;   /* original index in case of bad fmt */
  
+       SV *argsv        = NULL;
         bool is_utf8     = FALSE;     /* is this item utf8?   */
          bool arg_missing = FALSE;     /* give "Missing argument" warning */
         char esignbuf[4];             /* holds sign prefix, e.g. "-0x" */
@@ -12327,9 +12231,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         case 'V':
         case 'z':
         case 't':
-#ifdef I_STDINT
          case 'j':
-#endif
             intsize = *q++;
             break;
         }
@@ -12368,7 +12270,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             if (args) {
                 eptr = va_arg(*args, char*);
                 if (eptr)
-                   elen = strlen(eptr);
+                    if (has_precis)
+                        elen = my_strnlen(eptr, precis);
+                    else
+                        elen = strlen(eptr);
                 else {
                     eptr = (char *)nullstr;
                     elen = sizeof nullstr - 1;
@@ -12649,9 +12554,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                          case 't':  iv = va_arg(*args, ptrdiff_t);  break;
  #endif
                          default:   iv = va_arg(*args, int);        break;
-#ifdef I_STDINT
-                        case 'j':  iv = va_arg(*args, intmax_t);   break;
-#endif
+                        case 'j':  iv = va_arg(*args, PERL_INTMAX_T); break;
                          case 'q':
  #if IVSIZE >= 8
                                     iv = va_arg(*args, Quad_t);     break;
@@ -12706,9 +12609,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                                     * uptrdiff_t, so oh well */
                          case 't': uv = va_arg(*args, ptrdiff_t);     break;
  #endif
-#ifdef I_STDINT
-                        case 'j': uv = va_arg(*args, uintmax_t);     break;
-#endif
+                        case 'j': uv = va_arg(*args, PERL_UINTMAX_T); break;
                          default:  uv = va_arg(*args, unsigned);      break;
                          case 'q':
  #if IVSIZE >= 8
@@ -13179,7 +13080,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
                  /* hopefully the above makes ptr a very constrained format
                   * that is safe to use, even though it's not literal */
-                GCC_DIAG_IGNORE(-Wformat-nonliteral);
+                GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
  #ifdef USE_QUADMATH
                  {
                      const char* qfmt = quadmath_format_single(ptr);
@@ -13200,9 +13101,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                          ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv)
                          : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)fv));
  #else
-                elen = my_sprintf(PL_efloatbuf, ptr, fv);
+                elen = my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv);
  #endif
-                GCC_DIAG_RESTORE;
+                GCC_DIAG_RESTORE_STMT;
             }
  
             eptr = PL_efloatbuf;
@@ -13248,9 +13149,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  #ifdef HAS_PTRDIFF_T
                      case 't':  *(va_arg(*args, ptrdiff_t*)) = i; break;
  #endif
-#ifdef I_STDINT
-                    case 'j':  *(va_arg(*args, intmax_t*))  = i; break;
-#endif
+                    case 'j':  *(va_arg(*args, PERL_INTMAX_T*)) = i; break;
                      case 'q':
  #if IVSIZE >= 8
                                 *(va_arg(*args, Quad_t*))    = i; break;
@@ -13493,13 +13392,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
      Newxz(parser, 1, yy_parser);
      ptr_table_store(PL_ptr_table, proto, parser);
  
-    /* XXX these not yet duped */
-    parser->old_parser = NULL;
-    parser->stack = NULL;
-    parser->ps = NULL;
-    parser->stack_max1 = 0;
-    /* XXX parser->stack->state = 0; */
-
      /* XXX eventually, just Copy() most of the parser struct ? */
  
      parser->lex_brackets = proto->lex_brackets;
@@ -13541,7 +13433,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
      parser->sig_optelems= proto->sig_optelems;
      parser->sig_slurpy  = proto->sig_slurpy;
      parser->recheck_utf8_validity = proto->recheck_utf8_validity;
-    parser->linestr    = sv_dup_inc(proto->linestr, param);
  
      {
         char * const ols = SvPVX(proto->linestr);
@@ -14283,6 +14174,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
                 else
                     LvTARG(dstr) = sv_dup_inc(LvTARG(dstr), param);
                 if (isREGEXP(sstr)) goto duprex;
+               /* FALLTHROUGH */
             case SVt_PVGV:
                 /* non-GP case already handled above */
                 if(isGV_with_GP(sstr)) {
@@ -14336,7 +14228,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
                     SSize_t items = AvFILLp((const AV *)sstr) + 1;
  
                     src_ary = AvARRAY((const AV *)sstr);
-                   Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*);
+                   Newx(dst_ary, AvMAX((const AV *)sstr)+1, SV*);
                     ptr_table_store(PL_ptr_table, src_ary, dst_ary);
                     AvARRAY(MUTABLE_AV(dstr)) = dst_ary;
                     AvALLOC((const AV *)dstr) = dst_ary;
@@ -14668,7 +14560,7 @@ Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param)
         return nsi;
  
      /* create anew and remember what it is */
-    Newxz(nsi, 1, PERL_SI);
+    Newx(nsi, 1, PERL_SI);
      ptr_table_store(PL_ptr_table, si, nsi);
  
      nsi->si_stack      = av_dup_inc(si->si_stack, param);
@@ -14679,6 +14571,9 @@ Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param)
      nsi->si_prev       = si_dup(si->si_prev, param);
      nsi->si_next       = si_dup(si->si_next, param);
      nsi->si_markoff    = si->si_markoff;
+#if defined DEBUGGING && !defined DEBUGGING_RE_ONLY
+    nsi->si_stack_hwm   = 0;
+#endif
  
      return nsi;
  }
@@ -14760,7 +14655,7 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
  
      PERL_ARGS_ASSERT_SS_DUP;
  
-    Newxz(nss, max, ANY);
+    Newx(nss, max, ANY);
  
      while (ix > 0) {
         const UV uv = POPUV(ss,ix);
@@ -14963,8 +14858,8 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
         case SAVEt_AELEM:               /* array element */
             sv = (const SV *)POPPTR(ss,ix);
             TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param));
-           i = POPINT(ss,ix);
-           TOPINT(nss,ix) = i;
+           iv = POPIV(ss,ix);
+           TOPIV(nss,ix) = iv;
             av = (const AV *)POPPTR(ss,ix);
             TOPPTR(nss,ix) = av_dup_inc(av, param);
             break;
@@ -15279,8 +15174,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_filemode                = proto_perl->Ifilemode;
      PL_lastfd          = proto_perl->Ilastfd;
      PL_oldname         = proto_perl->Ioldname;         /* XXX not quite right */
-    PL_Argv            = NULL;
-    PL_Cmd             = NULL;
      PL_gensym          = proto_perl->Igensym;
  
      PL_laststatval     = proto_perl->Ilaststatval;
@@ -15326,7 +15219,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
  
  #ifdef USE_LOCALE_NUMERIC
      PL_numeric_standard        = proto_perl->Inumeric_standard;
-    PL_numeric_local   = proto_perl->Inumeric_local;
+    PL_numeric_underlying      = proto_perl->Inumeric_underlying;
  #endif /* !USE_LOCALE_NUMERIC */
  
      /* Did the locale setup indicate UTF-8? */
@@ -15681,6 +15574,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      }
      PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param);
      PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param);
+    PL_SCX_invlist = sv_dup_inc(proto_perl->ISCX_invlist, param);
      PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param);
      PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param);
      PL_utf8_mark       = sv_dup_inc(proto_perl->Iutf8_mark, param);
@@ -15723,7 +15617,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
  
         /* next PUSHMARK() sets *(PL_markstack_ptr+1) */
         i = proto_perl->Imarkstack_max - proto_perl->Imarkstack;
-       Newxz(PL_markstack, i, I32);
+       Newx(PL_markstack, i, I32);
         PL_markstack_max        = PL_markstack + (proto_perl->Imarkstack_max
                                                   - proto_perl->Imarkstack);
         PL_markstack_ptr        = PL_markstack + (proto_perl->Imarkstack_ptr
@@ -15733,11 +15627,11 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
  
         /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix]
          * NOTE: unlike the others! */
-       Newxz(PL_scopestack, PL_scopestack_max, I32);
+       Newx(PL_scopestack, PL_scopestack_max, I32);
         Copy(proto_perl->Iscopestack, PL_scopestack, PL_scopestack_ix, I32);
  
  #ifdef DEBUGGING
-       Newxz(PL_scopestack_name, PL_scopestack_max, const char *);
+       Newx(PL_scopestack_name, PL_scopestack_max, const char *);
         Copy(proto_perl->Iscopestack_name, PL_scopestack_name, PL_scopestack_ix, const char *);
  #endif
          /* reset stack AV to correct length before its duped via
@@ -16908,6 +16802,7 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv,
         /* def-ness of rval pos() is independent of the def-ness of its arg */
         if ( !(obase->op_flags & OPf_MOD))
             break;
+        /* FALLTHROUGH */
  
      case OP_SCHOMP:
      case OP_CHOMP:
@@ -16983,6 +16878,9 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv)
      if (PL_op) {
         desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded
                 ? "join or string"
+                : PL_op->op_type == OP_MULTICONCAT
+                    && (PL_op->op_private & OPpMULTICONCAT_FAKE)
+                ? "sprintf"
                 : OP_DESC(PL_op);
         if (uninit_sv && PL_curpad) {
             varname = find_uninit_var(PL_op, uninit_sv, 0, &desc);
@@ -16996,7 +16894,7 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv)
          desc = "sort";
  
      /* PL_warn_uninit_sv is constant */
-    GCC_DIAG_IGNORE(-Wformat-nonliteral);
+    GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
      if (desc)
          /* diag_listed_as: Use of uninitialized value%s */
          Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit_sv,
@@ -17005,7 +16903,7 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv)
      else
          Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit,
                  "", "", "");
-    GCC_DIAG_RESTORE;
+    GCC_DIAG_RESTORE_STMT;
  }
  
  /*