Use general locale mutex for numeric operations

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index bea9415..172a1fa 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -8420,7 +8420,7 @@ Perl_sv_collxfrm_flags(pTHX_ SV *const sv, STRLEN *const nxp, const I32 flags)
              Safefree(mg->mg_ptr);
  
          s = SvPV_flags_const(sv, len, flags);
-        if ((xf = _mem_collxfrm(s, len, &xlen, cBOOL(SvUTF8(sv))))) {
+       if ((xf = mem_collxfrm_(s, len, &xlen, cBOOL(SvUTF8(sv))))) {
              if (! mg) {
                  mg = sv_magicext(sv, 0, PERL_MAGIC_collxfrm, &PL_vtbl_collxfrm,
                                   0, 0);
@@ -9554,6 +9554,30 @@ Perl_newSVpvn(pTHX_ const char *const buffer, const STRLEN len)
  }
  
  /*
+=for apidoc newSVhek_mortal
+
+Creates a new mortal SV from the hash key structure.  It will generate
+scalars that point to the shared string table where possible.  Returns
+a new (undefined) SV if C<hek> is NULL.
+
+This is more efficient than using sv_2mortal(newSVhek( ... ))
+
+=cut
+*/
+
+SV *
+Perl_newSVhek_mortal(pTHX_ const HEK *const hek)
+{
+    SV * const sv = newSVhek(hek);
+    assert(sv);
+    assert(!SvIMMORTAL(sv));
+
+    PUSH_EXTEND_MORTAL__SV_C(sv);
+    SvTEMP_on(sv);
+    return sv;
+}
+
+/*
  =for apidoc newSVhek
  
  Creates a new SV from the hash key structure.  It will generate scalars that
@@ -9878,39 +9902,6 @@ Perl_newSV_false(pTHX)
      return sv;
  }
  
-/*
-=for apidoc newRV_noinc
-
-Creates an RV wrapper for an SV.  The reference count for the original
-SV is B<not> incremented.
-
-=cut
-*/
-
-SV *
-Perl_newRV_noinc(pTHX_ SV *const tmpRef)
-{
-    SV *sv;
-
-    PERL_ARGS_ASSERT_NEWRV_NOINC;
-
-    new_SV(sv);
-
-    /* We're starting from SVt_FIRST, so provided that's
-     * actual 0, we don't have to unset any SV type flags
-     * to promote to SVt_IV. */
-    STATIC_ASSERT_STMT(SVt_FIRST == 0);
-
-    SET_SVANY_FOR_BODYLESS_IV(sv);
-    SvFLAGS(sv) |= SVt_IV;
-
-    SvTEMP_off(tmpRef);
-
-    sv_setrv_noinc(sv, tmpRef);
-
-    return sv;
-}
-
  /* newRV_inc is the official function name to use now.
   * newRV_inc is in fact #defined to newRV in sv.h
   */
@@ -9985,7 +9976,7 @@ Perl_sv_resetpvn(pTHX_ const char *s, STRLEN len, HV * const stash)
  
      if (!s) {          /* reset ?? searches */
          MAGIC * const mg = mg_find((const SV *)stash, PERL_MAGIC_symtab);
-        if (mg) {
+        if (mg && mg->mg_len) {
              const U32 count = mg->mg_len / sizeof(PMOP**);
              PMOP **pmp = (PMOP**) mg->mg_ptr;
              PMOP *const *const end = pmp + count;
@@ -10308,7 +10299,7 @@ Perl_sv_pvbyten_force(pTHX_ SV *const sv, STRLEN *const lp)
      PERL_ARGS_ASSERT_SV_PVBYTEN_FORCE;
  
      sv_pvn_force(sv,lp);
-    sv_utf8_downgrade(sv,0);
+    (void)sv_utf8_downgrade(sv,0);
      *lp = SvCUR(sv);
      return SvPVX(sv);
  }
@@ -12036,6 +12027,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  #endif
      /* we never change this unless USE_LOCALE_NUMERIC */
      bool in_lc_numeric = FALSE;
+    SV *tmp_sv = NULL;
  
      PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
      PERL_UNUSED_ARG(maybe_tainted);
@@ -12141,6 +12133,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
          char c;                       /* the actual format ('d', s' etc) */
  
+        bool escape_it   = FALSE;     /* if this is a string should we quote and escape it? */
+
  
          /* echo everything up to the next format specification */
          for (q = fmtstart; q < patend && *q != '%'; ++q)
@@ -12514,6 +12508,21 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
              }
  
          string:
+            if (escape_it) {
+                U32 flags = PERL_PV_PRETTY_QUOTEDPREFIX;
+                if (is_utf8)
+                    flags |= PERL_PV_ESCAPE_UNI;
+
+                if (!tmp_sv) {
+                    /* "blah"... where blah might be made up
+                     * of characters like \x{1234} */
+                    tmp_sv = newSV(1 + (PERL_QUOTEDPREFIX_LEN * 8) + 1 + 3);
+                    sv_2mortal(tmp_sv);
+                }
+                pv_pretty(tmp_sv, eptr, elen, PERL_QUOTEDPREFIX_LEN,
+                            NULL, NULL, flags);
+                eptr = SvPV_const(tmp_sv, elen);
+            }
              if (has_precis && precis < elen)
                  elen = precis;
              break;
@@ -12522,7 +12531,34 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
          case 'p':
  
-            /* %p extensions:
+            /* BEGIN NOTE
+             *
+             * We want to extend the C level sprintf format API with
+             * custom formats for specific types (eg SV*) and behavior.
+             * However some C compilers are "sprintf aware" and will
+             * throw compile time exceptions when an illegal sprintf is
+             * encountered, so we can't just add new format letters.
+             *
+             * However it turns out the length argument to the %p format
+             * is more or less useless (the size of a pointer does not
+             * change over time) and is not really used in the C level
+             * code. Accordingly we can map our special behavior to
+             * specific "length" options to the %p format. We hide these
+             * mappings behind defines anyway, so nobody needs to know
+             * that HEKf is actually %2p. This keeps the C compiler
+             * happy while allowing us to add new formats.
+             *
+             * Note the existing logic for which number is used for what
+             * is torturous. All negative values are used for SVf, and
+             * non-negative values have arbitrary meanings with no
+             * structure to them. This may change in the future.
+             *
+             * NEVER use the raw %p values directly. Always use the define
+             * as the underlying mapping may change in the future.
+             *
+             * END NOTE
+             *
+             * %p extensions:
               *
               * "%...p" is normally treated like "%...x", except that the
               * number to print is the SV's address (or a pointer address
@@ -12532,23 +12568,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
               * extensions. These are currently:
               *
               * %-p       (SVf)  Like %s, but gets the string from an SV*
-             *                  arg rather than a char* arg.
+             *                  arg rather than a char* arg. Use C<SVfARG()>
+             *                  to set up the argument properly.
               *                  (This was previously %_).
               *
-             * %-<num>p         Ditto but like %.<num>s (i.e. num is max width)
+             * %-<num>p         Ditto but like %.<num>s (i.e. num is max
+             *                  width), there is no escaped and quoted version
+             *                  of this.
+             *
+             * %1p       (PVf_QUOTEDPREFIX). Like raw %s, but it is escaped
+             *                  and quoted.
+             *
+             * %5p       (SVf_QUOTEDPREFIX) Like SVf, but length restricted,
+             *                  escaped and quoted with pv_pretty. Intended
+             *                  for error messages.
               *
               * %2p       (HEKf) Like %s, but using the key string in a HEK
+             * %7p       (HEKf_QUOTEDPREFIX) ... but escaped and quoted.
               *
               * %3p       (HEKf256) Ditto but like %.256s
+             * %8p       (HEKf256_QUOTEDPREFIX) ... but escaped and quoted
               *
               * %d%lu%4p  (UTF8f) A utf8 string. Consumes 3 args:
               *                       (cBOOL(utf8), len, string_buf).
               *                   It's handled by the "case 'd'" branch
               *                   rather than here.
+             * %d%lu%9p  (UTF8f_QUOTEDPREFIX) .. but escaped and quoted.
+             *
               *
-             * %<num>p   where num is 1 or > 4: reserved for future
+             * %<num>p   where num is > 9: reserved for future
               *           extensions. Warns, but then is treated as a
               *           general %p (print hex address) format.
+             *
+             * NOTE: If you add a new magic %p value you will
+             * need to update F<t/porting/diag.t> to be aware of it
+             * on top of adding the various defines and etc. Do not
+             * forget to add it to F<pod/perlguts.pod> as well.
               */
  
              if (   args
@@ -12560,10 +12615,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  && q[-2] != '*'
                  && q[-2] != '$'
              ) {
-                if (left) {                    /* %-p (SVf), %-NNNp */
-                    if (width) {
+                if (left || width == 5) {                /* %-p (SVf), %-NNNp, %5p */
+                    if (left && width) {
                          precis = width;
                          has_precis = TRUE;
+                    } else if (width == 5) {
+                        escape_it = TRUE;
                      }
                      argsv = MUTABLE_SV(va_arg(*args, void*));
                      eptr = SvPV_const(argsv, elen);
@@ -12572,7 +12629,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                      width = 0;
                      goto string;
                  }
-                else if (width == 2 || width == 3) {   /* HEKf, HEKf256 */
+                else if (width == 2 || width == 3 ||
+                         width == 7 || width == 8)
+                {        /* HEKf, HEKf256, HEKf_QUOTEDPREFIX, HEKf256_QUOTEDPREFIX */
                      HEK * const hek = va_arg(*args, HEK *);
                      eptr = HEK_KEY(hek);
                      elen = HEK_LEN(hek);
@@ -12582,10 +12641,20 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                          precis = 256;
                          has_precis = TRUE;
                      }
+                    if (width > 5)
+                        escape_it = TRUE;
+                    width = 0;
+                    goto string;
+                }
+                else if (width == 1) {
+                    eptr = va_arg(*args,char *);
+                    elen = strlen(eptr);
+                    escape_it = TRUE;
                      width = 0;
                      goto string;
                  }
                  else if (width) {
+                    /* note width=4 or width=9 is handled under %d */
                      Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
                           "internal %%<num>p might conflict with future printf extensions");
                  }
@@ -12626,7 +12695,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
          case 'd':
              /* probably just a plain %d, but it might be the start of the
               * special UTF8f format, which usually looks something like
-             * "%d%lu%4p" (the lu may vary by platform)
+             * "%d%lu%4p" (the lu may vary by platform) or
+             * "%d%lu%9p" for an escaped version.
               */
              assert((UTF8f)[0] == 'd');
              assert((UTF8f)[1] == '%');
@@ -12635,10 +12705,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                   && q == fmtstart + 1 /* plain %d, not %....d */
                   && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
                   && *q == '%'
-                 && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 3))
+                 && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 5)
+                 && q[sizeof(UTF8f)-3] == 'p'
+                 && (q[sizeof(UTF8f)-4] == '4' ||
+                     q[sizeof(UTF8f)-4] == '9'))
              {
                  /* The argument has already gone through cBOOL, so the cast
                     is safe. */
+                if (q[sizeof(UTF8f)-4] == '9')
+                    escape_it = TRUE;
                  is_utf8 = (bool)va_arg(*args, int);
                  elen = va_arg(*args, UV);
                  /* if utf8 length is larger than 0x7ffff..., then it might
@@ -14642,6 +14717,9 @@ S_sv_dup_common(pTHX_ const SV *const ssv, CLONE_PARAMS *const param)
                  } else if (CvCONST(dsv)) {
                      CvXSUBANY(dsv).any_ptr =
                          sv_dup_inc((const SV *)CvXSUBANY(dsv).any_ptr, param);
+                } else if (CvREFCOUNTED_ANYSV(dsv)) {
+                    CvXSUBANY(dsv).any_sv =
+                        sv_dup_inc((const SV *)CvXSUBANY(dsv).any_sv, param);
                  }
                  assert(!CvSLABBED(dsv));
                  if (CvDYNFILE(dsv)) CvFILE(dsv) = SAVEPV(CvFILE(dsv));
@@ -15357,6 +15435,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_savestack_max = -1;
      PL_sig_pending = 0;
      PL_parser = NULL;
+    PL_eval_begin_nest_depth = proto_perl->Ieval_begin_nest_depth;
      Zero(&PL_debug_pad, 1, struct perl_debug_pad);
      Zero(&PL_padname_undef, 1, PADNAME);
      Zero(&PL_padname_const, 1, PADNAME);
@@ -15509,6 +15588,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_collxfrm_base   = proto_perl->Icollxfrm_base;
      PL_collxfrm_mult   = proto_perl->Icollxfrm_mult;
      PL_strxfrm_max_cp   = proto_perl->Istrxfrm_max_cp;
+    PL_strxfrm_is_behaved = proto_perl->Istrxfrm_is_behaved;
+    PL_strxfrm_NUL_replacement = proto_perl->Istrxfrm_NUL_replacement;
  #endif /* USE_LOCALE_COLLATE */
  
  #ifdef USE_LOCALE_NUMERIC
@@ -15519,11 +15600,10 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
  
      /* Did the locale setup indicate UTF-8? */
      PL_utf8locale      = proto_perl->Iutf8locale;
-    PL_in_utf8_CTYPE_locale = proto_perl->Iin_utf8_CTYPE_locale;
-    PL_in_utf8_COLLATE_locale = proto_perl->Iin_utf8_COLLATE_locale;
-    my_strlcpy(PL_locale_utf8ness, proto_perl->Ilocale_utf8ness, sizeof(PL_locale_utf8ness));
-#if defined(USE_ITHREADS) && ! defined(USE_THREAD_SAFE_LOCALE)
-    PL_lc_numeric_mutex_depth = 0;
+
+#ifdef USE_LOCALE_THREADS
+    assert(PL_locale_mutex_depth <= 0);
+    PL_locale_mutex_depth = 0;
  #endif
      /* Unicode features (see perlrun/-C) */
      PL_unicode         = proto_perl->Iunicode;
@@ -15553,6 +15633,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
  
      PL_srand_called    = proto_perl->Isrand_called;
      Copy(&(proto_perl->Irandom_state), &PL_random_state, 1, PL_RANDOM_STATE_TYPE);
+    PL_srand_override   = proto_perl->Isrand_override;
+    PL_srand_override_next = proto_perl->Isrand_override_next;
  
      if (flags & CLONEf_COPY_STACKS) {
          /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */
@@ -15825,31 +15907,38 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
  
      PL_subname         = sv_dup_inc(proto_perl->Isubname, param);
  
-#if   defined(USE_POSIX_2008_LOCALE)      \
- &&   defined(USE_THREAD_SAFE_LOCALE)     \
- && ! defined(HAS_QUERYLOCALE)
+#ifdef USE_PL_CURLOCALES
      for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) {
-        PL_curlocales[i] = savepv("."); /* An illegal value */
+        PL_curlocales[i] = SAVEPV(proto_perl->Icurlocales[i]);
      }
  #endif
  #ifdef USE_LOCALE_CTYPE
      Copy(proto_perl->Ifold_locale, PL_fold_locale, 256, U8);
      /* Should we warn if uses locale? */
+    PL_ctype_name      = SAVEPV(proto_perl->Ictype_name);
      PL_warn_locale      = sv_dup_inc(proto_perl->Iwarn_locale, param);
+    PL_utf8locale             = proto_perl->Iutf8locale;
+    PL_in_utf8_CTYPE_locale   = proto_perl->Iin_utf8_CTYPE_locale;
+    PL_in_utf8_turkic_locale  = proto_perl->Iin_utf8_turkic_locale;
  #endif
  
  #ifdef USE_LOCALE_COLLATE
+    PL_in_utf8_COLLATE_locale = proto_perl->Iin_utf8_COLLATE_locale;
      PL_collation_name  = SAVEPV(proto_perl->Icollation_name);
  #endif /* USE_LOCALE_COLLATE */
  
  #ifdef USE_LOCALE_NUMERIC
      PL_numeric_name    = SAVEPV(proto_perl->Inumeric_name);
      PL_numeric_radix_sv        = sv_dup_inc(proto_perl->Inumeric_radix_sv, param);
+    PL_underlying_radix_sv = sv_dup_inc(proto_perl->Iunderlying_radix_sv, param);
  
  #  if defined(USE_POSIX_2008_LOCALE)
      PL_underlying_numeric_obj = NULL;
  #  endif
  #endif /* !USE_LOCALE_NUMERIC */
+#if defined(USE_POSIX_2008_LOCALE)
+    PL_scratch_locale_obj = NULL;
+#endif
  
  #ifdef HAS_MBRLEN
      PL_mbrlen_ps = proto_perl->Imbrlen_ps;
@@ -15867,6 +15956,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_setlocale_buf = NULL;
      PL_setlocale_bufsize = 0;
  
+    PL_stdize_locale_buf = NULL;
+    PL_stdize_locale_bufsize = 0;
+
      /* Unicode inversion lists */
  
      PL_AboveLatin1            = sv_dup_inc(proto_perl->IAboveLatin1, param);
@@ -16386,7 +16478,7 @@ S_find_hash_subscript(pTHX_ const HV *const hv, const SV *const val)
          HE *entry;
          for (entry = array[i]; entry; entry = HeNEXT(entry)) {
              if (HeVAL(entry) == val)
-                return sv_2mortal(newSVhek(HeKEY_hek(entry)));
+                return newSVhek_mortal(HeKEY_hek(entry));
          }
      }
      return NULL;