X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/d1589979b3edf55f4440d041d4566c478a3860f6..407fecf1ecbd5b45621badd1485c91ddf95256e1:/sv.c diff --git a/sv.c b/sv.c index 0616850..2123cf4 100644 --- a/sv.c +++ b/sv.c @@ -131,6 +131,7 @@ static const char S_destroy[] = "DESTROY"; /* ============================================================================ =head1 Allocation and deallocation of SVs. + An SV (or AV, HV, etc.) is allocated in two parts: the head (struct sv, av, hv...) contains type and reference count information, and for many types, a pointer to the body (struct xrv, xpv, xpviv...), which @@ -769,7 +770,7 @@ Perl_sv_free_arenas(pTHX) /* Here are mid-level routines that manage the allocation of bodies out - of the various arenas. There are 5 kinds of arenas: + of the various arenas. There are 4 kinds of arenas: 1. SV-head arenas, which are discussed and handled above 2. regular body arenas @@ -782,7 +783,7 @@ Perl_sv_free_arenas(pTHX) unused block of them is wasteful. Also, several svtypes dont have bodies; the data fits into the sv-head itself. The arena-root pointer thus has a few unused root-pointers (which may be hijacked - later for arena types 4,5) + later for arena type 4) 3 differs from 2 as an optimization; some body types have several unused fields in the front of the structure (which are kept in-place @@ -791,11 +792,6 @@ Perl_sv_free_arenas(pTHX) are decremented to point at the unused 'ghost' memory, knowing that the pointers are used with offsets to the real memory. - -=head1 SV-Body Allocation - -=cut - Allocation of SV-bodies is similar to SV-heads, differing as follows; the allocation mechanism is used for many body types, so is somewhat more complicated, it uses arena-sets, and has no need for still-live @@ -1075,10 +1071,10 @@ Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size, #if defined(DEBUGGING) && defined(PERL_GLOBAL_STRUCT) dVAR; #endif -#if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT_PRIVATE) +#if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT) static bool done_sanity_check; - /* PERL_GLOBAL_STRUCT_PRIVATE cannot coexist with global + /* PERL_GLOBAL_STRUCT cannot coexist with global * variables like done_sanity_check. */ if (!done_sanity_check) { unsigned int i = SVt_LAST; @@ -2667,13 +2663,17 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) if (SvTYPE(sv) < SVt_NV) { /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */ sv_upgrade(sv, SVt_NV); + CLANG_DIAG_IGNORE_STMT(-Wthread-safety); DEBUG_c({ - STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + STORE_LC_NUMERIC_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%" UVxf " num(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); - RESTORE_LC_NUMERIC_UNDERLYING(); + RESTORE_LC_NUMERIC(); }); + CLANG_DIAG_RESTORE_STMT; + } else if (SvTYPE(sv) < SVt_PVNV) sv_upgrade(sv, SVt_PVNV); @@ -2808,12 +2808,15 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) and ideally should be fixed. */ return 0.0; } + CLANG_DIAG_IGNORE_STMT(-Wthread-safety); DEBUG_c({ - STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + STORE_LC_NUMERIC_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); - RESTORE_LC_NUMERIC_UNDERLYING(); + RESTORE_LC_NUMERIC(); }); + CLANG_DIAG_RESTORE_STMT; return SvNVX(sv); } @@ -2843,6 +2846,34 @@ Perl_sv_2num(pTHX_ SV *const sv) return sv_2mortal(newSVuv(PTR2UV(SvRV(sv)))); } +/* int2str_table: lookup table containing string representations of all + * two digit numbers. For example, int2str_table.arr[0] is "00" and + * int2str_table.arr[12*2] is "12". + * + * We are going to read two bytes at a time, so we have to ensure that + * the array is aligned to a 2 byte boundary. That's why it was made a + * union with a dummy U16 member. */ +static const union { + char arr[200]; + U16 dummy; +} int2str_table = {{ + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', + '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', + '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', + '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', + '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', + '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', + '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', + '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', + '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', + '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', + '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', + '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', + '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', + '9', '8', '9', '9' +}}; + /* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or * UV as a string towards the end of buf, and return pointers to start and * end of it. @@ -2850,29 +2881,48 @@ Perl_sv_2num(pTHX_ SV *const sv) * We assume that buf is at least TYPE_CHARS(UV) long. */ -static char * +PERL_STATIC_INLINE char * S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob) { char *ptr = buf + TYPE_CHARS(UV); char * const ebuf = ptr; int sign; + U16 *word_ptr, *word_table; PERL_ARGS_ASSERT_UIV_2BUF; - if (is_uv) + /* ptr has to be properly aligned, because we will cast it to U16* */ + assert(PTR2nat(ptr) % 2 == 0); + /* we are going to read/write two bytes at a time */ + word_ptr = (U16*)ptr; + word_table = (U16*)int2str_table.arr; + + if (UNLIKELY(is_uv)) sign = 0; else if (iv >= 0) { uv = iv; sign = 0; } else { - uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv); + uv = -(UV)iv; sign = 1; } - do { - *--ptr = '0' + (char)(uv % 10); - } while (uv /= 10); + + while (uv > 99) { + *--word_ptr = word_table[uv % 100]; + uv /= 100; + } + ptr = (char*)word_ptr; + + if (uv < 10) + *--ptr = (char)uv + '0'; + else { + *--word_ptr = word_table[uv]; + ptr = (char*)word_ptr; + } + if (sign) - *--ptr = '-'; + *--ptr = '-'; + *peob = ebuf; return ptr; } @@ -3080,13 +3130,18 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) /* I'm assuming that if both IV and NV are equally valid then converting the IV is going to be more efficient */ const U32 isUIOK = SvIsUV(sv); - char buf[TYPE_CHARS(UV)]; + /* The purpose of this union is to ensure that arr is aligned on + a 2 byte boundary, because that is what uiv_2buf() requires */ + union { + char arr[TYPE_CHARS(UV)]; + U16 dummy; + } buf; char *ebuf, *ptr; STRLEN len; if (SvTYPE(sv) < SVt_PVIV) sv_upgrade(sv, SVt_PVIV); - ptr = uiv_2buf(buf, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf); + ptr = uiv_2buf(buf.arr, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf); len = ebuf - ptr; /* inlined from sv_setpvn */ s = SvGROW_mutable(sv, len + 1); @@ -3141,7 +3196,7 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) DECLARATION_FOR_LC_NUMERIC_MANIPULATION; STORE_LC_NUMERIC_SET_TO_NEEDED(); - local_radix = PL_numeric_local && PL_numeric_radix_sv; + local_radix = _NOT_IN_NUMERIC_STANDARD; if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) { size += SvCUR(PL_numeric_radix_sv) - 1; s = SvGROW_mutable(sv, size); @@ -3401,11 +3456,7 @@ if all the bytes are invariant in UTF-8. If C has C bit set, will C on C if appropriate, else not. -If C has C set, this function assumes that the PV -will expand when converted to UTF-8, and skips the extra work of checking for -that. Typically this flag is used by a routine that has already parsed the -string and found such characters, and passes this information on so that the -work doesn't have to be repeated. +The C flag is now ignored. Returns the number of bytes in the converted string. @@ -3426,22 +3477,10 @@ Returns the number of bytes in the converted string (not including the spares). =cut -(One might think that the calling routine could pass in the position of the -first variant character when it has set SV_FORCE_UTF8_UPGRADE, so it wouldn't -have to be found again. But that is not the case, because typically when the -caller is likely to use this flag, it won't be calling this routine unless it -finds something that won't fit into a byte. Otherwise it tries to not upgrade -and just use bytes. But some things that do fit into a byte are variants in -utf8, and the caller may not have been keeping track of these.) - If the routine itself changes the string, it adds a trailing C. Such a C isn't guaranteed due to having other routines do the work in some input cases, or if the input is already flagged as being in utf8. -The speed of this could perhaps be improved for many cases if someone wanted to -write a fast function that counts the number of variant characters in a string, -especially if it could return the position of the first one. - */ STRLEN @@ -3479,188 +3518,102 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr } if (SvCUR(sv) == 0) { - if (extra) SvGROW(sv, extra); + if (extra) SvGROW(sv, extra + 1); /* Make sure is room for a trailing + byte */ } else { /* Assume Latin-1/EBCDIC */ /* This function could be much more efficient if we * had a FLAG in SVs to signal if there are any variant * chars in the PV. Given that there isn't such a flag - * make the loop as fast as possible (although there are certainly ways - * to speed this up, eg. through vectorization) */ + * make the loop as fast as possible. */ U8 * s = (U8 *) SvPVX_const(sv); - U8 * e = (U8 *) SvEND(sv); U8 *t = s; - STRLEN two_byte_count; - if (flags & SV_FORCE_UTF8_UPGRADE) { - two_byte_count = 0; - } - else { - if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) { - - /* utf8 conversion not needed because all are invariants. Mark - * as UTF-8 even if no variant - saves scanning loop */ - SvUTF8_on(sv); - if (extra) SvGROW(sv, SvCUR(sv) + extra); - return SvCUR(sv); - } + if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) { - /* Here, there is at least one variant, and t points to the first - * one */ - two_byte_count = 1; + /* utf8 conversion not needed because all are invariants. Mark + * as UTF-8 even if no variant - saves scanning loop */ + SvUTF8_on(sv); + if (extra) SvGROW(sv, SvCUR(sv) + extra); + return SvCUR(sv); } - /* Note that the incoming SV may not have a trailing '\0', as certain - * code in pp_formline can send us partially built SVs. + /* Here, there is at least one variant (t points to the first one), so + * the string should be converted to utf8. Everything from 's' to + * 't - 1' will occupy only 1 byte each on output. * - * Here, the string should be converted to utf8, either because of an - * input flag (which causes two_byte_count to be set to 0), or because - * a character that requires 2 bytes was found (two_byte_count = 1). t - * points either to the beginning of the string (if we didn't examine - * anything), or to the first variant. In either case, everything from - * s to t - 1 will occupy only 1 byte each on output. + * Note that the incoming SV may not have a trailing '\0', as certain + * code in pp_formline can send us partially built SVs. * * There are two main ways to convert. One is to create a new string * and go through the input starting from the beginning, appending each - * converted value onto the new string as we go along. It's probably - * best to allocate enough space in the string for the worst possible - * case rather than possibly running out of space and having to - * reallocate and then copy what we've done so far. Since everything - * from s to t - 1 is invariant, the destination can be initialized - * with these using a fast memory copy - * - * The other way is to figure out exactly how big the string should be, - * by parsing the entire input. Then you don't have to make it big - * enough to handle the worst possible case, and more importantly, if - * the string you already have is large enough, you don't have to - * allocate a new string, you can copy the last character in the input - * string to the final position(s) that will be occupied by the - * converted string and go backwards, stopping at t, since everything - * before that is invariant. - * - * There are advantages and disadvantages to each method. + * converted value onto the new string as we go along. Going this + * route, it's probably best to initially allocate enough space in the + * string rather than possibly running out of space and having to + * reallocate and then copy what we've done so far. Since everything + * from 's' to 't - 1' is invariant, the destination can be initialized + * with these using a fast memory copy. To be sure to allocate enough + * space, one could use the worst case scenario, where every remaining + * byte expands to two under UTF-8, or one could parse it and count + * exactly how many do expand. * - * In the first method, we can allocate a new string, do the memory - * copy from the s to t - 1, and then proceed through the rest of the - * string byte-by-byte. + * The other way is to unconditionally parse the remainder of the + * string to figure out exactly how big the expanded string will be, + * growing if needed. Then start at the end of the string and place + * the character there at the end of the unfilled space in the expanded + * one, working backwards until reaching 't'. * - * In the second method, we proceed through the rest of the input - * string just calculating how big the converted string will be. Then - * there are two cases: - * 1) if the string has enough extra space to handle the converted - * value. We go backwards through the string, converting until we - * get to the position we are at now, and then stop. If this - * position is far enough along in the string, this method is - * faster than the first method above. If the memory copy were - * the same speed as the byte-by-byte loop, that position would be - * about half-way, as at the half-way mark, parsing to the end and - * back is one complete string's parse, the same amount as - * starting over and going all the way through. Actually, it - * would be somewhat less than half-way, as it's faster to just - * count bytes than to also copy, and we don't have the overhead - * of allocating a new string, changing the scalar to use it, and - * freeing the existing one. But if the memory copy is fast, the - * break-even point is somewhere after half way. The counting - * loop could be sped up by vectorization, etc, to move the - * break-even point further towards the beginning. - * 2) if the string doesn't have enough space to handle the converted - * value. A new string will have to be allocated, and one might - * as well, given that, start from the beginning doing the first - * method. We've spent extra time parsing the string and in - * exchange all we've gotten is that we know precisely how big to - * make the new one. Perl is more optimized for time than space, - * so this case is a loser. - * So what I've decided to do is not use the 2nd method unless it is - * guaranteed that a new string won't have to be allocated, assuming - * the worst case. I also decided not to put any more conditions on it - * than this, for now. It seems likely that, since the worst case is - * twice as big as the unknown portion of the string (plus 1), we won't - * be guaranteed enough space, causing us to go to the first method, - * unless the string is short, or the first variant character is near - * the end of it. In either of these cases, it seems best to use the - * 2nd method. The only circumstance I can think of where this would - * be really slower is if the string had once had much more data in it - * than it does now, but there is still a substantial amount in it */ + * The problem with assuming the worst case scenario is that for very + * long strings, we could allocate much more memory than actually + * needed, which can create performance problems. If we have to parse + * anyway, the second method is the winner as it may avoid an extra + * copy. The code used to use the first method under some + * circumstances, but now that there is faster variant counting on + * ASCII platforms, the second method is used exclusively, eliminating + * some code that no longer has to be maintained. */ { - STRLEN invariant_head = t - s; - STRLEN size = invariant_head + (e - t) * 2 + 1 + extra; - if (SvLEN(sv) < size) { - - /* Here, have decided to allocate a new string */ - - U8 *dst; - U8 *d; - - Newx(dst, size, U8); - - /* If no known invariants at the beginning of the input string, - * set so starts from there. Otherwise, can use memory copy to - * get up to where we are now, and then start from here */ - - if (invariant_head == 0) { - d = dst; - } else { - Copy(s, dst, invariant_head, char); - d = dst + invariant_head; - } - - while (t < e) { - append_utf8_from_native_byte(*t, &d); - t++; - } - *d = '\0'; - SvPV_free(sv); /* No longer using pre-existing string */ - SvPV_set(sv, (char*)dst); - SvCUR_set(sv, d - dst); - SvLEN_set(sv, size); - } else { - - /* Here, have decided to get the exact size of the string. - * Currently this happens only when we know that there is - * guaranteed enough space to fit the converted string, so - * don't have to worry about growing. If two_byte_count is 0, - * then t points to the first byte of the string which hasn't - * been examined yet. Otherwise two_byte_count is 1, and t - * points to the first byte in the string that will expand to - * two. Depending on this, start examining at t or 1 after t. - * */ - - U8 *d = t + two_byte_count; - - - /* Count up the remaining bytes that expand to two */ - - while (d < e) { - const U8 chr = *d++; - if (! NATIVE_BYTE_IS_INVARIANT(chr)) two_byte_count++; - } - - /* The string will expand by just the number of bytes that - * occupy two positions. But we are one afterwards because of - * the increment just above. This is the place to put the - * trailing NUL, and to set the length before we decrement */ - - d += two_byte_count; - SvCUR_set(sv, d - s); - *d-- = '\0'; + /* Count the total number of variants there are. We can start + * just beyond the first one, which is known to be at 't' */ + const Size_t invariant_length = t - s; + U8 * e = (U8 *) SvEND(sv); + + /* The length of the left overs, plus 1. */ + const Size_t remaining_length_p1 = e - t; + + /* We expand by 1 for the variant at 't' and one for each remaining + * variant (we start looking at 't+1') */ + Size_t expansion = 1 + variant_under_utf8_count(t + 1, e); + + /* +1 = trailing NUL */ + Size_t need = SvCUR(sv) + expansion + extra + 1; + U8 * d; + + /* Grow if needed */ + if (SvLEN(sv) < need) { + t = invariant_length + (U8*) SvGROW(sv, need); + e = t + remaining_length_p1; + } + SvCUR_set(sv, invariant_length + remaining_length_p1 + expansion); + /* Set the NUL at the end */ + d = (U8 *) SvEND(sv); + *d-- = '\0'; - /* Having decremented d, it points to the position to put the - * very last byte of the expanded string. Go backwards through - * the string, copying and expanding as we go, stopping when we - * get to the part that is invariant the rest of the way down */ + /* Having decremented d, it points to the position to put the + * very last byte of the expanded string. Go backwards through + * the string, copying and expanding as we go, stopping when we + * get to the part that is invariant the rest of the way down */ - e--; - while (e >= t) { - if (NATIVE_BYTE_IS_INVARIANT(*e)) { - *d-- = *e; - } else { - *d-- = UTF8_EIGHT_BIT_LO(*e); - *d-- = UTF8_EIGHT_BIT_HI(*e); - } - e--; - } - } + e--; + while (e >= t) { + if (NATIVE_BYTE_IS_INVARIANT(*e)) { + *d-- = *e; + } else { + *d-- = UTF8_EIGHT_BIT_LO(*e); + *d-- = UTF8_EIGHT_BIT_HI(*e); + } + e--; + } if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { /* Update pos. We do it at the end rather than during @@ -3679,7 +3632,6 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr } } - /* Mark as UTF-8 even if no variant - saves scanning loop */ SvUTF8_on(sv); return SvCUR(sv); } @@ -3784,7 +3736,7 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) PERL_ARGS_ASSERT_SV_UTF8_DECODE; if (SvPOKp(sv)) { - const U8 *start, *c; + const U8 *start, *c, *first_variant; /* The octets may have got themselves encoded - get them back as * bytes @@ -3796,9 +3748,9 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) * we want to make sure everything inside is valid utf8 first. */ c = start = (const U8 *) SvPVX_const(sv); - if (!is_utf8_string(c, SvCUR(sv))) - return FALSE; - if (! is_utf8_invariant_string(c, SvCUR(sv))) { + if (! is_utf8_invariant_string_loc(c, SvCUR(sv), &first_variant)) { + if (!is_utf8_string(first_variant, SvCUR(sv) - (first_variant -c))) + return FALSE; SvUTF8_on(sv); } if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { @@ -3919,15 +3871,14 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) glob to begin with. */ if(dtype == SVt_PVGV) { const char * const name = GvNAME((const GV *)dstr); - if( - strEQ(name,"ISA") + const STRLEN len = GvNAMELEN(dstr); + if(memEQs(name, len, "ISA") /* The stash may have been detached from the symbol table, so check its name. */ && GvSTASH(dstr) && HvENAME(GvSTASH(dstr)) ) mro_changes = 2; else { - const STRLEN len = GvNAMELEN(dstr); if ((len > 1 && name[len-2] == ':' && name[len-1] == ':') || (len == 1 && name[0] == ':')) { mro_changes = 3; @@ -4140,7 +4091,7 @@ Perl_gv_setref(pTHX_ SV *const dstr, SV *const sstr) } else if ( stype == SVt_PVAV && sref != dref - && strEQ(GvNAME((GV*)dstr), "ISA") + && memEQs(GvNAME((GV*)dstr), GvNAMELEN((GV*)dstr), "ISA") /* The stash may have been detached from the symbol table, so check its name before doing anything. */ && GvSTASH(dstr) && HvENAME(GvSTASH(dstr)) @@ -4433,6 +4384,10 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) if (dtype < SVt_PVNV) sv_upgrade(dstr, SVt_PVNV); break; + + case SVt_INVLIST: + invlist_clone(sstr, dstr); + break; default: { const char * const type = sv_reftype(sstr,0); @@ -4450,7 +4405,6 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) sv_upgrade(dstr, SVt_REGEXP); break; - case SVt_INVLIST: case SVt_PVLV: case SVt_PVGV: case SVt_PVMG: @@ -5140,7 +5094,7 @@ giving it to C, and neither should any pointers from "behind" that pointer (e.g. ptr + 1) be used. If S> is true, will call C. If -S & SV_HAS_TRAILING_NUL>> is true, then C must be C, +S> is true, then C must be C, and the realloc will be skipped (i.e. the buffer is actually at least 1 byte longer than C, and already meets the requirements for storing in C). @@ -5263,9 +5217,8 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags) SvCUR_set(sv, cur); *SvEND(sv) = '\0'; } - if (len) { - } else { - unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); + if (! len) { + unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); } #ifdef DEBUGGING if (DEBUG_C_TEST) @@ -6370,8 +6323,10 @@ Perl_sv_kill_backrefs(pTHX_ SV *const sv, AV *const av) /* =for apidoc sv_insert -Inserts a string at the specified offset/length within the SV. Similar to -the Perl C function. Handles get magic. +Inserts and/or replaces a string at the specified offset/length within the SV. +Similar to the Perl C function, with C bytes starting at +C replacing C bytes of the string in C starting at +C. Handles get magic. =for apidoc sv_insert_flags @@ -7905,8 +7860,6 @@ Perl_sv_eq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) STRLEN cur1; const char *pv2; STRLEN cur2; - I32 eq = 0; - SV* svrecode = NULL; if (!sv1) { pv1 = ""; @@ -7946,11 +7899,9 @@ Perl_sv_eq_flags(pTHX_ SV *sv1, SV *sv2, const U32 flags) } if (cur1 == cur2) - eq = (pv1 == pv2) || memEQ(pv1, pv2, cur1); - - SvREFCNT_dec(svrecode); - - return eq; + return (pv1 == pv2) || memEQ(pv1, pv2, cur1); + else + return 0; } /* @@ -8585,18 +8536,19 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) * null assign is a placeholder. */ rslast = rslen ? rsptr[rslen - 1] : '\0'; - if (rspara) { /* have to do this both before and after */ - do { /* to make sure file boundaries work right */ - if (PerlIO_eof(fp)) - return 0; - i = PerlIO_getc(fp); - if (i != '\n') { - if (i == -1) - return 0; - PerlIO_ungetc(fp,i); - break; - } - } while (i != EOF); + if (rspara) { /* have to do this both before and after */ + /* to make sure file boundaries work right */ + while (1) { + if (PerlIO_eof(fp)) + return 0; + i = PerlIO_getc(fp); + if (i != '\n') { + if (i == -1) + return 0; + PerlIO_ungetc(fp,i); + break; + } + } } /* See if we know enough about I/O mechanism to cheat it ! */ @@ -9798,11 +9750,15 @@ Perl_newRV(pTHX_ SV *const sv) Creates a new SV which is an exact duplicate of the original SV. (Uses C.) +=for apidoc newSVsv_nomg + +Like C but does not process get magic. + =cut */ SV * -Perl_newSVsv(pTHX_ SV *const old) +Perl_newSVsv_flags(pTHX_ SV *const old, I32 flags) { SV *sv; @@ -9813,11 +9769,10 @@ Perl_newSVsv(pTHX_ SV *const old) return NULL; } /* Do this here, otherwise we leak the new SV if this croaks. */ - SvGETMAGIC(old); + if (flags & SV_GMAGIC) + SvGETMAGIC(old); new_SV(sv); - /* SV_NOSTEAL prevents TEMP buffers being, well, stolen, and saves games - with SvTEMP_off and SvTEMP_on round a call to sv_setsv. */ - sv_setsv_flags(sv, old, SV_NOSTEAL); + sv_setsv_flags(sv, old, flags & ~SV_GMAGIC); return sv; } @@ -10723,9 +10678,14 @@ Does not handle 'set' magic. See C>. void Perl_sv_setpviv(pTHX_ SV *const sv, const IV iv) { - char buf[TYPE_CHARS(UV)]; + /* The purpose of this union is to ensure that arr is aligned on + a 2 byte boundary, because that is what uiv_2buf() requires */ + union { + char arr[TYPE_CHARS(UV)]; + U16 dummy; + } buf; char *ebuf; - char * const ptr = uiv_2buf(buf, iv, 0, 0, &ebuf); + char * const ptr = uiv_2buf(buf.arr, iv, 0, 0, &ebuf); PERL_ARGS_ASSERT_SV_SETPVIV; @@ -11162,12 +11122,15 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len) assert(!Perl_isinfnan(nv)); if (neg) nv = -nv; - if (nv < UV_MAX) { + if (nv != 0.0 && nv < UV_MAX) { char *p = endbuf; - nv += 0.5; uv = (UV)nv; - if (uv & 1 && uv == nv) - uv--; /* Round to even */ + if (uv != nv) { + nv += 0.5; + uv = (UV)nv; + if (uv & 1 && uv == nv) + uv--; /* Round to even */ + } do { const unsigned dig = uv % 10; *--p = '0' + dig; @@ -11712,6 +11675,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, * the top non-zero nybble. */ for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { } assert(n < 4); + assert(vlnz); vlnz[1] = 0; for (vshr = vlnz; vshr >= vfnz; vshr--) { vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n); @@ -11812,7 +11776,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, #ifndef USE_LOCALE_NUMERIC *p++ = '.'; #else - if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) { + if (IN_LC(LC_NUMERIC)) { STRLEN n; const char* r = SvPV(PL_numeric_radix_sv, n); Copy(r, p, n, char); @@ -11918,11 +11882,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p STRLEN origlen; Size_t svix = 0; static const char nullstr[] = "(null)"; - SV *argsv = NULL; bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ /* Times 4: a decimal digit takes more than 3 binary digits. - * NV_DIG: mantissa takes than many decimal digits. + * NV_DIG: mantissa takes that many decimal digits. * Plus 32: Playing safe. */ char ebuf[IV_DIG * 4 + NV_DIG + 32]; bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ @@ -12023,6 +11986,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p Size_t efix = 0; /* explicit format parameter index */ const Size_t osvix = svix; /* original index in case of bad fmt */ + SV *argsv = NULL; bool is_utf8 = FALSE; /* is this item utf8? */ bool arg_missing = FALSE; /* give "Missing argument" warning */ char esignbuf[4]; /* holds sign prefix, e.g. "-0x" */ @@ -12335,9 +12299,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 'V': case 'z': case 't': -#ifdef I_STDINT case 'j': -#endif intsize = *q++; break; } @@ -12660,9 +12622,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 't': iv = va_arg(*args, ptrdiff_t); break; #endif default: iv = va_arg(*args, int); break; -#ifdef I_STDINT - case 'j': iv = va_arg(*args, intmax_t); break; -#endif + case 'j': iv = va_arg(*args, PERL_INTMAX_T); break; case 'q': #if IVSIZE >= 8 iv = va_arg(*args, Quad_t); break; @@ -12697,7 +12657,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p esignbuf[esignlen++] = plus; } else { - uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv); + uv = -(UV)iv; esignbuf[esignlen++] = '-'; } } @@ -12717,9 +12677,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * uptrdiff_t, so oh well */ case 't': uv = va_arg(*args, ptrdiff_t); break; #endif -#ifdef I_STDINT - case 'j': uv = va_arg(*args, uintmax_t); break; -#endif + case 'j': uv = va_arg(*args, PERL_UINTMAX_T); break; default: uv = va_arg(*args, unsigned); break; case 'q': #if IVSIZE >= 8 @@ -13010,7 +12968,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p lc_numeric_set = TRUE; } - if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) { + if (IN_LC(LC_NUMERIC)) { /* this can't wrap unless PL_numeric_radix_sv is a string * consuming virtually all the 32-bit or 64-bit address * space @@ -13190,7 +13148,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* hopefully the above makes ptr a very constrained format * that is safe to use, even though it's not literal */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); #ifdef USE_QUADMATH { const char* qfmt = quadmath_format_single(ptr); @@ -13213,7 +13171,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p #else elen = my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv); #endif - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } eptr = PL_efloatbuf; @@ -13259,9 +13217,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p #ifdef HAS_PTRDIFF_T case 't': *(va_arg(*args, ptrdiff_t*)) = i; break; #endif -#ifdef I_STDINT - case 'j': *(va_arg(*args, intmax_t*)) = i; break; -#endif + case 'j': *(va_arg(*args, PERL_INTMAX_T*)) = i; break; case 'q': #if IVSIZE >= 8 *(va_arg(*args, Quad_t*)) = i; break; @@ -13436,8 +13392,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p SvTAINT(sv); - RESTORE_LC_NUMERIC(); /* Done outside loop, so don't have to save/restore - each iteration. */ +#ifdef USE_LOCALE_NUMERIC + + if (lc_numeric_set) { + RESTORE_LC_NUMERIC(); /* Done outside loop, so don't have to + save/restore each iteration. */ + } + +#endif + } /* ========================================================================= @@ -13504,13 +13467,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) Newxz(parser, 1, yy_parser); ptr_table_store(PL_ptr_table, proto, parser); - /* XXX these not yet duped */ - parser->old_parser = NULL; - parser->stack = NULL; - parser->ps = NULL; - parser->stack_max1 = 0; - /* XXX parser->stack->state = 0; */ - /* XXX eventually, just Copy() most of the parser struct ? */ parser->lex_brackets = proto->lex_brackets; @@ -13552,7 +13508,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) parser->sig_optelems= proto->sig_optelems; parser->sig_slurpy = proto->sig_slurpy; parser->recheck_utf8_validity = proto->recheck_utf8_validity; - parser->linestr = sv_dup_inc(proto->linestr, param); { char * const ols = SvPVX(proto->linestr); @@ -14348,7 +14303,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) SSize_t items = AvFILLp((const AV *)sstr) + 1; src_ary = AvARRAY((const AV *)sstr); - Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*); + Newx(dst_ary, AvMAX((const AV *)sstr)+1, SV*); ptr_table_store(PL_ptr_table, src_ary, dst_ary); AvARRAY(MUTABLE_AV(dstr)) = dst_ary; AvALLOC((const AV *)dstr) = dst_ary; @@ -14680,7 +14635,7 @@ Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param) return nsi; /* create anew and remember what it is */ - Newxz(nsi, 1, PERL_SI); + Newx(nsi, 1, PERL_SI); ptr_table_store(PL_ptr_table, si, nsi); nsi->si_stack = av_dup_inc(si->si_stack, param); @@ -14691,6 +14646,9 @@ Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param) nsi->si_prev = si_dup(si->si_prev, param); nsi->si_next = si_dup(si->si_next, param); nsi->si_markoff = si->si_markoff; +#if defined DEBUGGING && !defined DEBUGGING_RE_ONLY + nsi->si_stack_hwm = 0; +#endif return nsi; } @@ -14772,7 +14730,7 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) PERL_ARGS_ASSERT_SS_DUP; - Newxz(nss, max, ANY); + Newx(nss, max, ANY); while (ix > 0) { const UV uv = POPUV(ss,ix); @@ -14975,8 +14933,8 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) case SAVEt_AELEM: /* array element */ sv = (const SV *)POPPTR(ss,ix); TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param)); - i = POPINT(ss,ix); - TOPINT(nss,ix) = i; + iv = POPIV(ss,ix); + TOPIV(nss,ix) = iv; av = (const AV *)POPPTR(ss,ix); TOPPTR(nss,ix) = av_dup_inc(av, param); break; @@ -15291,8 +15249,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_filemode = proto_perl->Ifilemode; PL_lastfd = proto_perl->Ilastfd; PL_oldname = proto_perl->Ioldname; /* XXX not quite right */ - PL_Argv = NULL; - PL_Cmd = NULL; PL_gensym = proto_perl->Igensym; PL_laststatval = proto_perl->Ilaststatval; @@ -15338,13 +15294,18 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_LOCALE_NUMERIC PL_numeric_standard = proto_perl->Inumeric_standard; - PL_numeric_local = proto_perl->Inumeric_local; + PL_numeric_underlying = proto_perl->Inumeric_underlying; + PL_numeric_underlying_is_standard = proto_perl->Inumeric_underlying_is_standard; #endif /* !USE_LOCALE_NUMERIC */ /* Did the locale setup indicate UTF-8? */ PL_utf8locale = proto_perl->Iutf8locale; PL_in_utf8_CTYPE_locale = proto_perl->Iin_utf8_CTYPE_locale; PL_in_utf8_COLLATE_locale = proto_perl->Iin_utf8_COLLATE_locale; + my_strlcpy(PL_locale_utf8ness, proto_perl->Ilocale_utf8ness, sizeof(PL_locale_utf8ness)); +#if defined(USE_ITHREADS) && ! defined(USE_THREAD_SAFE_LOCALE) + PL_lc_numeric_mutex_depth = 0; +#endif /* Unicode features (see perlrun/-C) */ PL_unicode = proto_perl->Iunicode; @@ -15617,16 +15578,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, if (PL_my_cxt_size) { Newx(PL_my_cxt_list, PL_my_cxt_size, void *); Copy(proto_perl->Imy_cxt_list, PL_my_cxt_list, PL_my_cxt_size, void *); -#ifdef PERL_GLOBAL_STRUCT_PRIVATE - Newx(PL_my_cxt_keys, PL_my_cxt_size, const char *); - Copy(proto_perl->Imy_cxt_keys, PL_my_cxt_keys, PL_my_cxt_size, char *); -#endif } else { PL_my_cxt_list = (void**)NULL; -#ifdef PERL_GLOBAL_STRUCT_PRIVATE - PL_my_cxt_keys = (const char**)NULL; -#endif } PL_modglobal = hv_dup_inc(proto_perl->Imodglobal, param); PL_custom_op_names = hv_dup_inc(proto_perl->Icustom_op_names,param); @@ -15658,6 +15612,13 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_subname = sv_dup_inc(proto_perl->Isubname, param); +#if defined(USE_POSIX_2008_LOCALE) \ + && defined(USE_THREAD_SAFE_LOCALE) \ + && ! defined(HAS_QUERYLOCALE) + for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) { + PL_curlocales[i] = savepv("."); /* An illegal value */ + } +#endif #ifdef USE_LOCALE_CTYPE /* Should we warn if uses locale? */ PL_warn_locale = sv_dup_inc(proto_perl->Iwarn_locale, param); @@ -15670,45 +15631,20 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_LOCALE_NUMERIC PL_numeric_name = SAVEPV(proto_perl->Inumeric_name); PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv, param); + +# if defined(HAS_POSIX_2008_LOCALE) + PL_underlying_numeric_obj = NULL; +# endif #endif /* !USE_LOCALE_NUMERIC */ PL_langinfo_buf = NULL; PL_langinfo_bufsize = 0; - /* Unicode inversion lists */ - PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); - PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param); - PL_AboveLatin1 = sv_dup_inc(proto_perl->IAboveLatin1, param); - PL_InBitmap = sv_dup_inc(proto_perl->IInBitmap, param); - - PL_NonL1NonFinalFold = sv_dup_inc(proto_perl->INonL1NonFinalFold, param); - PL_HasMultiCharFold = sv_dup_inc(proto_perl->IHasMultiCharFold, param); + PL_setlocale_buf = NULL; + PL_setlocale_bufsize = 0; /* utf8 character class swashes */ - for (i = 0; i < POSIX_SWASH_COUNT; i++) { - PL_utf8_swash_ptrs[i] = sv_dup_inc(proto_perl->Iutf8_swash_ptrs[i], param); - } - for (i = 0; i < POSIX_CC_COUNT; i++) { - PL_XPosix_ptrs[i] = sv_dup_inc(proto_perl->IXPosix_ptrs[i], param); - } - PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param); - PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param); - PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param); PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param); - PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); - PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); - PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); - PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); - PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); - PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param); - PL_utf8_xidstart = sv_dup_inc(proto_perl->Iutf8_xidstart, param); - PL_utf8_perl_idstart = sv_dup_inc(proto_perl->Iutf8_perl_idstart, param); - PL_utf8_perl_idcont = sv_dup_inc(proto_perl->Iutf8_perl_idcont, param); - PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param); - PL_utf8_xidcont = sv_dup_inc(proto_perl->Iutf8_xidcont, param); - PL_utf8_foldable = sv_dup_inc(proto_perl->Iutf8_foldable, param); - PL_utf8_charname_begin = sv_dup_inc(proto_perl->Iutf8_charname_begin, param); - PL_utf8_charname_continue = sv_dup_inc(proto_perl->Iutf8_charname_continue, param); if (proto_perl->Ipsig_pend) { Newxz(PL_psig_pend, SIG_SIZE, int); @@ -15735,7 +15671,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* next PUSHMARK() sets *(PL_markstack_ptr+1) */ i = proto_perl->Imarkstack_max - proto_perl->Imarkstack; - Newxz(PL_markstack, i, I32); + Newx(PL_markstack, i, I32); PL_markstack_max = PL_markstack + (proto_perl->Imarkstack_max - proto_perl->Imarkstack); PL_markstack_ptr = PL_markstack + (proto_perl->Imarkstack_ptr @@ -15745,11 +15681,11 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] * NOTE: unlike the others! */ - Newxz(PL_scopestack, PL_scopestack_max, I32); + Newx(PL_scopestack, PL_scopestack_max, I32); Copy(proto_perl->Iscopestack, PL_scopestack, PL_scopestack_ix, I32); #ifdef DEBUGGING - Newxz(PL_scopestack_name, PL_scopestack_max, const char *); + Newx(PL_scopestack_name, PL_scopestack_max, const char *); Copy(proto_perl->Iscopestack_name, PL_scopestack_name, PL_scopestack_ix, const char *); #endif /* reset stack AV to correct length before its duped via @@ -15808,7 +15744,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_registered_mros = hv_dup_inc(proto_perl->Iregistered_mros, param); PL_blockhooks = av_dup_inc(proto_perl->Iblockhooks, param); - PL_utf8_foldclosures = hv_dup_inc(proto_perl->Iutf8_foldclosures, param); /* Call the ->CLONE method, if it exists, for each of the stashes identified by sv_dup() above. @@ -15957,6 +15892,8 @@ Perl_clone_params_new(PerlInterpreter *const from, PerlInterpreter *const to) void Perl_init_constants(pTHX) { + dVAR; + SvREFCNT(&PL_sv_undef) = SvREFCNT_IMMORTAL; SvFLAGS(&PL_sv_undef) = SVf_READONLY|SVf_PROTECT|SVt_NULL; SvANY(&PL_sv_undef) = NULL; @@ -16996,6 +16933,9 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) if (PL_op) { desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded ? "join or string" + : PL_op->op_type == OP_MULTICONCAT + && (PL_op->op_private & OPpMULTICONCAT_FAKE) + ? "sprintf" : OP_DESC(PL_op); if (uninit_sv && PL_curpad) { varname = find_uninit_var(PL_op, uninit_sv, 0, &desc); @@ -17009,7 +16949,7 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) desc = "sort"; /* PL_warn_uninit_sv is constant */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); if (desc) /* diag_listed_as: Use of uninitialized value%s */ Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit_sv, @@ -17018,7 +16958,7 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) else Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, "", "", ""); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } /*