X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/890ed176d1ab1cf305ce9634210857cfed305fff..ddeae0f14c58a5f1101e1a8c75be3b67f60cf6fd:/sv.c?ds=sidebyside diff --git a/sv.c b/sv.c index e522a47..d82e354 100644 --- a/sv.c +++ b/sv.c @@ -1,6 +1,7 @@ /* sv.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + * 2000, 2001, 2002, 2003, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -28,10 +29,6 @@ #define SV_COW_NEXT_SV_SET(current,next) SvUVX(current) = PTR2UV(next) /* This is a pessimistic view. Scalar must be purely a read-write PV to copy- on-write. */ -#define CAN_COW_MASK (SVs_OBJECT|SVs_GMG|SVs_SMG|SVs_RMG|SVf_IOK|SVf_NOK| \ - SVf_POK|SVf_ROK|SVp_IOK|SVp_NOK|SVp_POK|SVf_FAKE| \ - SVf_OOK|SVf_BREAK|SVf_READONLY|SVf_AMAGIC) -#define CAN_COW_FLAGS (SVp_POK|SVf_POK) #endif /* ============================================================================ @@ -1566,8 +1563,6 @@ Perl_sv_grow(pTHX_ register SV *sv, register STRLEN newlen) { register char *s; - - #ifdef HAS_64K_LIMIT if (newlen >= 0x10000) { PerlIO_printf(Perl_debug_log, @@ -2279,7 +2274,7 @@ Perl_sv_2iv(pTHX_ register SV *sv) this NV is in the preserved range, therefore: */ if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) < (UV)IV_MAX)) { - Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs(SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); + Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); } } else { /* IN_UV NOT_INT @@ -2566,7 +2561,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) this NV is in the preserved range, therefore: */ if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) < (UV)IV_MAX)) { - Perl_croak(aTHX_ "sv_2uv assumed (U_V(fabs(SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); + Perl_croak(aTHX_ "sv_2uv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); } } else sv_2iuv_non_preserve (sv, numtype); @@ -2895,6 +2890,16 @@ uiv_2buf(char *buf, IV iv, UV uv, int is_uv, char **peob) return ptr; } +/* sv_2pv() is now a macro using Perl_sv_2pv_flags(); + * this function provided for binary compatibility only + */ + +char * +Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) +{ + return sv_2pv_flags(sv, lp, SV_GMAGIC); +} + /* =for apidoc sv_2pv_flags @@ -2972,7 +2977,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) case SVt_PVMG: if ( ((SvFLAGS(sv) & (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG)) - == (SVs_OBJECT|SVs_RMG)) + == (SVs_OBJECT|SVs_SMG)) && (mg = mg_find(sv, PERL_MAGIC_qr))) { regexp *re = (regexp *)mg->mg_obj; @@ -3064,7 +3069,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) s = "REF"; else s = "SCALAR"; break; - case SVt_PVLV: s = "LVALUE"; break; + case SVt_PVLV: s = SvROK(sv) ? "REF":"LVALUE"; break; case SVt_PVAV: s = "ARRAY"; break; case SVt_PVHV: s = "HASH"; break; case SVt_PVCV: s = "CODE"; break; @@ -3075,7 +3080,10 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) } tsv = NEWSV(0,0); if (SvOBJECT(sv)) - Perl_sv_setpvf(aTHX_ tsv, "%s=%s", HvNAME(SvSTASH(sv)), s); + if (HvNAME(SvSTASH(sv))) + Perl_sv_setpvf(aTHX_ tsv, "%s=%s", HvNAME(SvSTASH(sv)), s); + else + Perl_sv_setpvf(aTHX_ tsv, "__ANON__=%s", s); else sv_setpv(tsv, s); Perl_sv_catpvf(aTHX_ tsv, "(0x%"UVxf")", PTR2UV(sv)); @@ -3346,6 +3354,17 @@ Perl_sv_2bool(pTHX_ register SV *sv) } } +/* sv_utf8_upgrade() is now a macro using sv_utf8_upgrade_flags(); + * this function provided for binary compatibility only + */ + + +STRLEN +Perl_sv_utf8_upgrade(pTHX_ register SV *sv) +{ + return sv_utf8_upgrade_flags(sv, SV_GMAGIC); +} + /* =for apidoc sv_utf8_upgrade @@ -3395,7 +3414,7 @@ Perl_sv_utf8_upgrade_flags(pTHX_ register SV *sv, I32 flags) sv_force_normal_flags(sv, 0); } - if (PL_encoding) + if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) sv_recode_to_utf8(sv, PL_encoding); else { /* Assume Latin-1/EBCDIC */ /* This function could be much more efficient if we @@ -3528,6 +3547,16 @@ Perl_sv_utf8_decode(pTHX_ register SV *sv) return TRUE; } +/* sv_setsv() is now a macro using Perl_sv_setsv_flags(); + * this function provided for binary compatibility only + */ + +void +Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) +{ + sv_setsv_flags(dstr, sstr, SV_GMAGIC); +} + /* =for apidoc sv_setsv @@ -3658,8 +3687,16 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) goto glob_assign; } break; - case SVt_PV: case SVt_PVFM: +#ifdef PERL_COPY_ON_WRITE + if ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS) { + if (dtype < SVt_PVIV) + sv_upgrade(dstr, SVt_PVIV); + break; + } + /* Fall through */ +#endif + case SVt_PV: if (dtype < SVt_PV) sv_upgrade(dstr, SVt_PV); break; @@ -3944,8 +3981,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) /* Either it's a shared hash key, or it's suitable for copy-on-write or we can swipe the string. */ if (DEBUG_C_TEST) { - PerlIO_printf(Perl_debug_log, - "Copy on write: sstr --> dstr\n"); + PerlIO_printf(Perl_debug_log, "Copy on write: sstr --> dstr\n"); sv_dump(sstr); sv_dump(dstr); } @@ -3982,6 +4018,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) /* making another shared SV. */ STRLEN cur = SvCUR(sstr); STRLEN len = SvLEN(sstr); + assert (SvTYPE(dstr) >= SVt_PVIV); if (len) { /* SvIsCOW_normal */ /* splice us in between source and next-after-source. */ @@ -4098,6 +4135,77 @@ Perl_sv_setsv_mg(pTHX_ SV *dstr, register SV *sstr) SvSETMAGIC(dstr); } +#ifdef PERL_COPY_ON_WRITE +SV * +Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) +{ + STRLEN cur = SvCUR(sstr); + STRLEN len = SvLEN(sstr); + register char *new_pv; + + if (DEBUG_C_TEST) { + PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", + sstr, dstr); + sv_dump(sstr); + if (dstr) + sv_dump(dstr); + } + + if (dstr) { + if (SvTHINKFIRST(dstr)) + sv_force_normal_flags(dstr, SV_COW_DROP_PV); + else if (SvPVX(dstr)) + Safefree(SvPVX(dstr)); + } + else + new_SV(dstr); + SvUPGRADE (dstr, SVt_PVIV); + + assert (SvPOK(sstr)); + assert (SvPOKp(sstr)); + assert (!SvIOK(sstr)); + assert (!SvIOKp(sstr)); + assert (!SvNOK(sstr)); + assert (!SvNOKp(sstr)); + + if (SvIsCOW(sstr)) { + + if (SvLEN(sstr) == 0) { + /* source is a COW shared hash key. */ + UV hash = SvUVX(sstr); + DEBUG_C(PerlIO_printf(Perl_debug_log, + "Fast copy on write: Sharing hash\n")); + SvUVX(dstr) = hash; + new_pv = sharepvn(SvPVX(sstr), (SvUTF8(sstr)?-cur:cur), hash); + goto common_exit; + } + SV_COW_NEXT_SV_SET(dstr, SV_COW_NEXT_SV(sstr)); + } else { + assert ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS); + SvUPGRADE (sstr, SVt_PVIV); + SvREADONLY_on(sstr); + SvFAKE_on(sstr); + DEBUG_C(PerlIO_printf(Perl_debug_log, + "Fast copy on write: Converting sstr to COW\n")); + SV_COW_NEXT_SV_SET(dstr, sstr); + } + SV_COW_NEXT_SV_SET(sstr, dstr); + new_pv = SvPVX(sstr); + + common_exit: + SvPV_set(dstr, new_pv); + SvFLAGS(dstr) = (SVt_PVIV|SVf_POK|SVp_POK|SVf_FAKE|SVf_READONLY); + if (SvUTF8(sstr)) + SvUTF8_on(dstr); + SvLEN(dstr) = len; + SvCUR(dstr) = cur; + if (DEBUG_C_TEST) { + sv_dump(dstr); + } + return dstr; +} +#endif + /* =for apidoc sv_setpvn @@ -4299,7 +4407,7 @@ an xpvmg; if we're a copy-on-write scalar, this is the on-write time when we do the copy, and is also used locally. If C is set then a copy-on-write scalar drops its PV buffer (if any) and becomes SvPOK_off rather than making a copy. (Used where this scalar is about to be -set to some other value. In addtion, the C parameter gets passed to +set to some other value.) In addition, the C parameter gets passed to C when unrefing. C calls this function with flags set to 0. @@ -4353,11 +4461,11 @@ Perl_sv_force_normal_flags(pTHX_ register SV *sv, U32 flags) char *pvx = SvPVX(sv); STRLEN len = SvCUR(sv); U32 hash = SvUVX(sv); + SvFAKE_off(sv); + SvREADONLY_off(sv); SvGROW(sv, len + 1); Move(pvx,SvPVX(sv),len,char); *SvEND(sv) = '\0'; - SvFAKE_off(sv); - SvREADONLY_off(sv); unsharepvn(pvx, SvUTF8(sv) ? -(I32)len : len, hash); } else if (PL_curcop != &PL_compiling) @@ -4417,9 +4525,12 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) *SvEND(sv) = '\0'; } SvIVX(sv) = 0; - SvFLAGS(sv) |= SVf_OOK; + /* Same SvOOK_on but SvOOK_on does a SvIOK_off + and we do that anyway inside the SvNIOK_off + */ + SvFLAGS(sv) |= SVf_OOK; } - SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK|SVp_IOK|SVp_NOK|SVf_IVisUV); + SvNIOK_off(sv); delta = ptr - SvPVX(sv); SvLEN(sv) -= delta; SvCUR(sv) -= delta; @@ -4427,6 +4538,16 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) SvIVX(sv) += delta; } +/* sv_catpvn() is now a macro using Perl_sv_catpvn_flags(); + * this function provided for binary compatibility only + */ + +void +Perl_sv_catpvn(pTHX_ SV *dsv, const char* sstr, STRLEN slen) +{ + sv_catpvn_flags(dsv, sstr, slen, SV_GMAGIC); +} + /* =for apidoc sv_catpvn @@ -4479,6 +4600,16 @@ Perl_sv_catpvn_mg(pTHX_ register SV *sv, register const char *ptr, register STRL SvSETMAGIC(sv); } +/* sv_catsv() is now a macro using Perl_sv_catsv_flags(); + * this function provided for binary compatibility only + */ + +void +Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) +{ + sv_catsv_flags(dstr, sstr, SV_GMAGIC); +} + /* =for apidoc sv_catsv @@ -4824,6 +4955,9 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam case PERL_MAGIC_vstring: vtable = 0; break; + case PERL_MAGIC_utf8: + vtable = &PL_vtbl_utf8; + break; case PERL_MAGIC_substr: vtable = &PL_vtbl_substr; break; @@ -4893,6 +5027,8 @@ Perl_sv_unmagic(pTHX_ SV *sv, int type) Safefree(mg->mg_ptr); else if (mg->mg_len == HEf_SVKEY) SvREFCNT_dec((SV*)mg->mg_ptr); + else if (mg->mg_type == PERL_MAGIC_utf8 && mg->mg_ptr) + Safefree(mg->mg_ptr); } if (mg->mg_flags & MGf_REFCOUNTED) SvREFCNT_dec(mg->mg_obj); @@ -4956,7 +5092,19 @@ S_sv_add_backref(pTHX_ SV *tsv, SV *sv) sv_magic(tsv, (SV*)av, PERL_MAGIC_backref, NULL, 0); SvREFCNT_dec(av); /* for sv_magic */ } - av_push(av,sv); + if (AvFILLp(av) >= AvMAX(av)) { + SV **svp = AvARRAY(av); + I32 i = AvFILLp(av); + while (i >= 0) { + if (svp[i] == &PL_sv_undef) { + svp[i] = sv; /* reuse the slot */ + return; + } + i--; + } + av_extend(av, AvFILLp(av)+1); + } + AvARRAY(av)[++AvFILLp(av)] = sv; /* av_push() */ } /* delete a back-reference to ourselves from the backref magic associated @@ -5164,34 +5312,37 @@ Perl_sv_clear(pTHX_ register SV *sv) if (PL_defstash) { /* Still have a symbol table? */ dSP; CV* destructor; - SV tmpref; - Zero(&tmpref, 1, SV); - sv_upgrade(&tmpref, SVt_RV); - SvROK_on(&tmpref); - SvREADONLY_on(&tmpref); /* DESTROY() could be naughty */ - SvREFCNT(&tmpref) = 1; + do { stash = SvSTASH(sv); destructor = StashHANDLER(stash,DESTROY); if (destructor) { + SV* tmpref = newRV(sv); + SvREADONLY_on(tmpref); /* DESTROY() could be naughty */ ENTER; PUSHSTACKi(PERLSI_DESTROY); - SvRV(&tmpref) = SvREFCNT_inc(sv); EXTEND(SP, 2); PUSHMARK(SP); - PUSHs(&tmpref); + PUSHs(tmpref); PUTBACK; call_sv((SV*)destructor, G_DISCARD|G_EVAL|G_KEEPERR|G_VOID); - SvREFCNT(sv)--; + + POPSTACK; SPAGAIN; LEAVE; + if(SvREFCNT(tmpref) < 2) { + /* tmpref is not kept alive! */ + SvREFCNT(sv)--; + SvRV(tmpref) = 0; + SvROK_off(tmpref); + } + SvREFCNT_dec(tmpref); } } while (SvOBJECT(sv) && SvSTASH(sv) != stash); - del_XRV(SvANY(&tmpref)); if (SvREFCNT(sv)) { if (PL_in_clean_objs) @@ -5245,7 +5396,13 @@ Perl_sv_clear(pTHX_ register SV *sv) av_undef((AV*)sv); break; case SVt_PVLV: - SvREFCNT_dec(LvTARG(sv)); + if (LvTYPE(sv) == 'T') { /* for tie: return HE to pool */ + SvREFCNT_dec(HeKEY_sv((HE*)LvTARG(sv))); + HeNEXT((HE*)LvTARG(sv)) = PL_hv_fetch_ent_mh; + PL_hv_fetch_ent_mh = (HE*)LvTARG(sv); + } + else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ + SvREFCNT_dec(LvTARG(sv)); goto freescalar; case SVt_PVGV: gp_free((GV*)sv); @@ -5378,7 +5535,7 @@ SV * Perl_sv_newref(pTHX_ SV *sv) { if (sv) - ATOMIC_INC(SvREFCNT(sv)); + (SvREFCNT(sv))++; return sv; } @@ -5396,8 +5553,6 @@ Normally called via a wrapper macro C. void Perl_sv_free(pTHX_ SV *sv) { - int refcount_is_zero; - if (!sv) return; if (SvREFCNT(sv) == 0) { @@ -5416,9 +5571,14 @@ Perl_sv_free(pTHX_ SV *sv) Perl_warner(aTHX_ packWARN(WARN_INTERNAL), "Attempt to free unreferenced scalar"); return; } - ATOMIC_DEC_AND_TEST(refcount_is_zero, SvREFCNT(sv)); - if (!refcount_is_zero) + if (--(SvREFCNT(sv)) > 0) return; + Perl_sv_free2(aTHX_ sv); +} + +void +Perl_sv_free2(pTHX_ SV *sv) +{ #ifdef DEBUGGING if (SvTEMP(sv)) { if (ckWARN_d(WARN_DEBUGGING)) @@ -5471,6 +5631,13 @@ UTF8 bytes as a single character. Handles magic and type coercion. =cut */ +/* + * The length is cached in PERL_UTF8_magic, in the mg_len field. Also the + * mg_ptr is used, by sv_pos_u2b(), see the comments of S_utf8_mg_pos_init(). + * (Note that the mg_len is not the length of the mg_ptr field.) + * + */ + STRLEN Perl_sv_len_utf8(pTHX_ register SV *sv) { @@ -5481,13 +5648,158 @@ Perl_sv_len_utf8(pTHX_ register SV *sv) return mg_length(sv); else { - STRLEN len; + STRLEN len, ulen; U8 *s = (U8*)SvPV(sv, len); + MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0; - return Perl_utf8_length(aTHX_ s, s + len); + if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) + ulen = mg->mg_len; + else { + ulen = Perl_utf8_length(aTHX_ s, s + len); + if (!mg && !SvREADONLY(sv)) { + sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); + mg = mg_find(sv, PERL_MAGIC_utf8); + assert(mg); + } + if (mg) + mg->mg_len = ulen; + } + return ulen; } } +/* S_utf8_mg_pos_init() is used to initialize the mg_ptr field of + * a PERL_UTF8_magic. The mg_ptr is used to store the mapping + * between UTF-8 and byte offsets. There are two (substr offset and substr + * length, the i offset, PERL_MAGIC_UTF8_CACHESIZE) times two (UTF-8 offset + * and byte offset) cache positions. + * + * The mg_len field is used by sv_len_utf8(), see its comments. + * Note that the mg_len is not the length of the mg_ptr field. + * + */ +STATIC bool +S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, U8 *s, U8 *start) +{ + bool found = FALSE; + + if (SvMAGICAL(sv) && !SvREADONLY(sv)) { + if (!*mgp) { + sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); + *mgp = mg_find(sv, PERL_MAGIC_utf8); + } + assert(*mgp); + + if ((*mgp)->mg_ptr) + *cachep = (STRLEN *) (*mgp)->mg_ptr; + else { + Newz(0, *cachep, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); + (*mgp)->mg_ptr = (char *) *cachep; + } + assert(*cachep); + + (*cachep)[i] = *offsetp; + (*cachep)[i+1] = s - start; + found = TRUE; + } + + return found; +} + +/* + * S_utf8_mg_pos() is used to query and update mg_ptr field of + * a PERL_UTF8_magic. The mg_ptr is used to store the mapping + * between UTF-8 and byte offsets. See also the comments of + * S_utf8_mg_pos_init(). + * + */ +STATIC bool +S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I32 uoff, U8 **sp, U8 *start, U8 *send) +{ + bool found = FALSE; + + if (SvMAGICAL(sv) && !SvREADONLY(sv)) { + if (!*mgp) + *mgp = mg_find(sv, PERL_MAGIC_utf8); + if (*mgp && (*mgp)->mg_ptr) { + *cachep = (STRLEN *) (*mgp)->mg_ptr; + if ((*cachep)[i] == (STRLEN)uoff) /* An exact match. */ + found = TRUE; + else { /* We will skip to the right spot. */ + STRLEN forw = 0; + STRLEN backw = 0; + U8* p = NULL; + + /* The assumption is that going backward is half + * the speed of going forward (that's where the + * 2 * backw in the below comes from). (The real + * figure of course depends on the UTF-8 data.) */ + + if ((*cachep)[i] > (STRLEN)uoff) { + forw = uoff; + backw = (*cachep)[i] - (STRLEN)uoff; + + if (forw < 2 * backw) + p = start; + else + p = start + (*cachep)[i+1]; + } + /* Try this only for the substr offset (i == 0), + * not for the substr length (i == 2). */ + else if (i == 0) { /* (*cachep)[i] < uoff */ + STRLEN ulen = sv_len_utf8(sv); + + if ((STRLEN)uoff < ulen) { + forw = (STRLEN)uoff - (*cachep)[i]; + backw = ulen - (STRLEN)uoff; + + if (forw < 2 * backw) + p = start + (*cachep)[i+1]; + else + p = send; + } + + /* If the string is not long enough for uoff, + * we could extend it, but not at this low a level. */ + } + + if (p) { + if (forw < 2 * backw) { + while (forw--) + p += UTF8SKIP(p); + } + else { + while (backw--) { + p--; + while (UTF8_IS_CONTINUATION(*p)) + p--; + } + } + + /* Update the cache. */ + (*cachep)[i] = (STRLEN)uoff; + (*cachep)[i+1] = p - start; + + found = TRUE; + } + } + if (found) { /* Setup the return values. */ + *offsetp = (*cachep)[i+1]; + *sp = start + *offsetp; + if (*sp >= send) { + *sp = send; + *offsetp = send - start; + } + else if (*sp < start) { + *sp = start; + *offsetp = 0; + } + } + } + } + return found; +} + /* =for apidoc sv_pos_u2b @@ -5500,33 +5812,67 @@ type coercion. =cut */ +/* + * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential + * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and + * byte offsets. See also the comments of S_utf8_mg_pos(). + * + */ + void Perl_sv_pos_u2b(pTHX_ register SV *sv, I32* offsetp, I32* lenp) { U8 *start; U8 *s; - U8 *send; - I32 uoffset = *offsetp; STRLEN len; + STRLEN *cache = 0; + STRLEN boffset = 0; if (!sv) return; start = s = (U8*)SvPV(sv, len); - send = s + len; - while (s < send && uoffset--) - s += UTF8SKIP(s); - if (s >= send) - s = send; - *offsetp = s - start; - if (lenp) { - I32 ulen = *lenp; - start = s; - while (s < send && ulen--) - s += UTF8SKIP(s); - if (s >= send) - s = send; - *lenp = s - start; + if (len) { + I32 uoffset = *offsetp; + U8 *send = s + len; + MAGIC *mg = 0; + bool found = FALSE; + + if (utf8_mg_pos(sv, &mg, &cache, 0, offsetp, *offsetp, &s, start, send)) + found = TRUE; + if (!found && uoffset > 0) { + while (s < send && uoffset--) + s += UTF8SKIP(s); + if (s >= send) + s = send; + if (utf8_mg_pos_init(sv, &mg, &cache, 0, offsetp, s, start)) + boffset = cache[1]; + *offsetp = s - start; + } + if (lenp) { + found = FALSE; + start = s; + if (utf8_mg_pos(sv, &mg, &cache, 2, lenp, *lenp + *offsetp, &s, start, send)) { + *lenp -= boffset; + found = TRUE; + } + if (!found && *lenp > 0) { + I32 ulen = *lenp; + if (ulen > 0) + while (s < send && ulen--) + s += UTF8SKIP(s); + if (s >= send) + s = send; + if (utf8_mg_pos_init(sv, &mg, &cache, 2, lenp, s, start)) + cache[2] += *offsetp; + } + *lenp = s - start; + } + } + else { + *offsetp = 0; + if (lenp) + *lenp = 0; } return; } @@ -5541,11 +5887,17 @@ Handles magic and type coercion. =cut */ +/* + * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential + * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and + * byte offsets. See also the comments of S_utf8_mg_pos(). + * + */ + void -Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp) +Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp) { - U8 *s; - U8 *send; + U8* s; STRLEN len; if (!sv) @@ -5554,22 +5906,93 @@ Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp) s = (U8*)SvPV(sv, len); if ((I32)len < *offsetp) Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset"); - send = s + *offsetp; - len = 0; - while (s < send) { - STRLEN n = 1; - /* Call utf8n_to_uvchr() to validate the sequence - * (unless a simple non-UTF character) */ - if (!UTF8_IS_INVARIANT(*s)) - utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0); - if (n > 0) { - s += n; - len++; + else { + U8* send = s + *offsetp; + MAGIC* mg = NULL; + STRLEN *cache = NULL; + + len = 0; + + if (SvMAGICAL(sv) && !SvREADONLY(sv)) { + mg = mg_find(sv, PERL_MAGIC_utf8); + if (mg && mg->mg_ptr) { + cache = (STRLEN *) mg->mg_ptr; + if (cache[1] == *offsetp) { + /* An exact match. */ + *offsetp = cache[0]; + + return; + } + else if (cache[1] < *offsetp) { + /* We already know part of the way. */ + len = cache[0]; + s += cache[1]; + /* Let the below loop do the rest. */ + } + else { /* cache[1] > *offsetp */ + /* We already know all of the way, now we may + * be able to walk back. The same assumption + * is made as in S_utf8_mg_pos(), namely that + * walking backward is twice slower than + * walking forward. */ + STRLEN forw = *offsetp; + STRLEN backw = cache[1] - *offsetp; + + if (!(forw < 2 * backw)) { + U8 *p = s + cache[1]; + STRLEN ubackw = 0; + + cache[1] -= backw; + + while (backw--) { + p--; + while (UTF8_IS_CONTINUATION(*p)) + p--; + ubackw++; + } + + cache[0] -= ubackw; + + return; + } + } + } } - else - break; + + while (s < send) { + STRLEN n = 1; + + /* Call utf8n_to_uvchr() to validate the sequence + * (unless a simple non-UTF character) */ + if (!UTF8_IS_INVARIANT(*s)) + utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0); + if (n > 0) { + s += n; + len++; + } + else + break; + } + + if (!SvREADONLY(sv)) { + if (!mg) { + sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); + mg = mg_find(sv, PERL_MAGIC_utf8); + } + assert(mg); + + if (!mg->mg_ptr) { + Newz(0, cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); + mg->mg_ptr = (char *) cache; + } + assert(cache); + + cache[0] = len; + cache[1] = *offsetp; + } + + *offsetp = len; } - *offsetp = len; return; } @@ -5886,8 +6309,10 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) register I32 cnt; I32 i = 0; I32 rspara = 0; + I32 recsize; - SV_CHECK_THINKFIRST_COW_DROP(sv); + if (SvTHINKFIRST(sv)) + sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV); /* XXX. If you make this PVIV, then copy on write can copy scalars read from <>. However, perlbench says it's slower, because the existing swipe code @@ -5897,39 +6322,71 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) SvSCREAM_off(sv); + if (append) { + if (PerlIO_isutf8(fp)) { + if (!SvUTF8(sv)) { + sv_utf8_upgrade_nomg(sv); + sv_pos_u2b(sv,&append,0); + } + } else if (SvUTF8(sv)) { + SV *tsv = NEWSV(0,0); + sv_gets(tsv, fp, 0); + sv_utf8_upgrade_nomg(tsv); + SvCUR_set(sv,append); + sv_catsv(sv,tsv); + sv_free(tsv); + goto return_string_or_null; + } + } + + SvPOK_only(sv); + if (PerlIO_isutf8(fp)) + SvUTF8_on(sv); + if (PL_curcop == &PL_compiling) { /* we always read code in line mode */ rsptr = "\n"; rslen = 1; } else if (RsSNARF(PL_rs)) { + /* If it is a regular disk file use size from stat() as estimate + of amount we are going to read - may result in malloc-ing + more memory than we realy need if layers bellow reduce + size we read (e.g. CRLF or a gzip layer) + */ + Stat_t st; + if (!PerlLIO_fstat(PerlIO_fileno(fp), &st) && S_ISREG(st.st_mode)) { + Off_t offset = PerlIO_tell(fp); + if (offset != (Off_t) -1 && st.st_size + append > offset) { + (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 1)); + } + } rsptr = NULL; rslen = 0; } else if (RsRECORD(PL_rs)) { - I32 recsize, bytesread; + I32 bytesread; char *buffer; /* Grab the size of the record we're getting */ recsize = SvIV(SvRV(PL_rs)); - (void)SvPOK_only(sv); /* Validate pointer */ - buffer = SvGROW(sv, (STRLEN)(recsize + 1)); + buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append; /* Go yank in */ #ifdef VMS /* VMS wants read instead of fread, because fread doesn't respect */ /* RMS record boundaries. This is not necessarily a good thing to be */ - /* doing, but we've got no other real choice */ + /* doing, but we've got no other real choice - except avoid stdio + as implementation - perhaps write a :vms layer ? + */ bytesread = PerlLIO_read(PerlIO_fileno(fp), buffer, recsize); #else bytesread = PerlIO_read(fp, buffer, recsize); #endif - SvCUR_set(sv, bytesread); + if (bytesread < 0) + bytesread = 0; + SvCUR_set(sv, bytesread += append); buffer[bytesread] = '\0'; - if (PerlIO_isutf8(fp)) - SvUTF8_on(sv); - else - SvUTF8_off(sv); - return(SvCUR(sv) ? SvPVX(sv) : Nullch); + goto return_string_or_null; } else if (RsPARA(PL_rs)) { rsptr = "\n\n"; @@ -5998,9 +6455,13 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) /* Here is some breathtakingly efficient cheating */ cnt = PerlIO_get_cnt(fp); /* get count into register */ - (void)SvPOK_only(sv); /* validate pointer */ - if ((I32)(SvLEN(sv) - append) <= cnt + 1) { /* make sure we have the room */ - if (cnt > 80 && (I32)SvLEN(sv) > append) { + /* make sure we have the room */ + if ((I32)(SvLEN(sv) - append) <= cnt + 1) { + /* Not room for all of it + if we are looking for a separator and room for some + */ + if (rslen && cnt > 80 && (I32)SvLEN(sv) > append) { + /* just process what we have room for */ shortbuffered = cnt - SvLEN(sv) + append + 1; cnt -= shortbuffered; } @@ -6010,7 +6471,7 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) SvGROW(sv, (STRLEN)(append + (cnt <= 0 ? 2 : (cnt + 1)))); } } - else + else shortbuffered = 0; bp = (STDCHAR*)SvPVX(sv) + append; /* move these two too to registers */ ptr = (STDCHAR*)PerlIO_get_ptr(fp); @@ -6178,11 +6639,7 @@ screamer2: } } - if (PerlIO_isutf8(fp)) - SvUTF8_on(sv); - else - SvUTF8_off(sv); - +return_string_or_null: return (SvCUR(sv) - append) ? SvPVX(sv) : Nullch; } @@ -6911,7 +7368,7 @@ Perl_sv_2io(pTHX_ SV *sv) else io = 0; if (!io) - Perl_croak(aTHX_ "Bad filehandle: %s", SvPV(sv,n_a)); + Perl_croak(aTHX_ "Bad filehandle: %"SVf, sv); break; } return io; @@ -6992,7 +7449,8 @@ Perl_sv_2cv(pTHX_ SV *sv, HV **st, GV **gvp, I32 lref) Nullop); LEAVE; if (!GvCVu(gv)) - Perl_croak(aTHX_ "Unable to create sub named \"%s\"", SvPV(sv,n_a)); + Perl_croak(aTHX_ "Unable to create sub named \"%"SVf"\"", + sv); } return GvCVu(gv); } @@ -7091,6 +7549,21 @@ Perl_sv_nv(pTHX_ register SV *sv) return sv_2nv(sv); } +/* sv_pv() is now a macro using SvPV_nolen(); + * this function provided for binary compatibility only + */ + +char * +Perl_sv_pv(pTHX_ SV *sv) +{ + STRLEN n_a; + + if (SvPOK(sv)) + return SvPVX(sv); + + return sv_2pv(sv, &n_a); +} + /* =for apidoc sv_pv @@ -7125,6 +7598,16 @@ Perl_sv_pvn_nomg(pTHX_ register SV *sv, STRLEN *lp) return sv_2pv_flags(sv, lp, 0); } +/* sv_pvn_force() is now a macro using Perl_sv_pvn_force_flags(); + * this function provided for binary compatibility only + */ + +char * +Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) +{ + return sv_pvn_force_flags(sv, lp, SV_GMAGIC); +} + /* =for apidoc sv_pvn_force @@ -7183,6 +7666,17 @@ Perl_sv_pvn_force_flags(pTHX_ SV *sv, STRLEN *lp, I32 flags) return SvPVX(sv); } +/* sv_pvbyte () is now a macro using Perl_sv_2pv_flags(); + * this function provided for binary compatibility only + */ + +char * +Perl_sv_pvbyte(pTHX_ SV *sv) +{ + sv_utf8_downgrade(sv,0); + return sv_pv(sv); +} + /* =for apidoc sv_pvbyte @@ -7221,6 +7715,17 @@ Perl_sv_pvbyten_force(pTHX_ SV *sv, STRLEN *lp) return sv_pvn_force(sv,lp); } +/* sv_pvutf8 () is now a macro using Perl_sv_2pv_flags(); + * this function provided for binary compatibility only + */ + +char * +Perl_sv_pvutf8(pTHX_ SV *sv) +{ + sv_utf8_upgrade(sv); + return sv_pv(sv); +} + /* =for apidoc sv_pvutf8 @@ -7271,7 +7776,10 @@ char * Perl_sv_reftype(pTHX_ SV *sv, int ob) { if (ob && SvOBJECT(sv)) { - return HvNAME(SvSTASH(sv)); + if (HvNAME(SvSTASH(sv))) + return HvNAME(SvSTASH(sv)); + else + return "__ANON__"; } else { switch (SvTYPE(sv)) { @@ -7290,7 +7798,7 @@ Perl_sv_reftype(pTHX_ SV *sv, int ob) return "REF"; else return "SCALAR"; - case SVt_PVLV: return "LVALUE"; + case SVt_PVLV: return SvROK(sv) ? "REF" : "LVALUE"; case SVt_PVAV: return "ARRAY"; case SVt_PVHV: return "HASH"; case SVt_PVCV: return "CODE"; @@ -7349,6 +7857,8 @@ Perl_sv_isa(pTHX_ SV *sv, const char *name) sv = (SV*)SvRV(sv); if (!SvOBJECT(sv)) return 0; + if (!HvNAME(SvSTASH(sv))) + return 0; return strEQ(HvNAME(SvSTASH(sv)), name); } @@ -7615,7 +8125,9 @@ Perl_sv_unref_flags(pTHX_ SV *sv, U32 flags) } SvRV(sv) = 0; SvROK_off(sv); - if (SvREFCNT(rv) != 1 || SvREADONLY(rv) || (flags & SV_IMMEDIATE_UNREF)) + /* You can't have a || SvREADONLY(rv) here, as $a = $$a, where $a was + assigned to as BEGIN {$a = \"Foo"} will fail. */ + if (SvREFCNT(rv) != 1 || (flags & SV_IMMEDIATE_UNREF)) SvREFCNT_dec(rv); else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */ sv_2mortal(rv); /* Schedule for freeing later */ @@ -7686,6 +8198,44 @@ Perl_sv_tainted(pTHX_ SV *sv) return FALSE; } +/* +=for apidoc sv_setpviv + +Copies an integer into the given SV, also updating its string value. +Does not handle 'set' magic. See C. + +=cut +*/ + +void +Perl_sv_setpviv(pTHX_ SV *sv, IV iv) +{ + char buf[TYPE_CHARS(UV)]; + char *ebuf; + char *ptr = uiv_2buf(buf, iv, 0, 0, &ebuf); + + sv_setpvn(sv, ptr, ebuf - ptr); +} + +/* +=for apidoc sv_setpviv_mg + +Like C, but also handles 'set' magic. + +=cut +*/ + +void +Perl_sv_setpviv_mg(pTHX_ SV *sv, IV iv) +{ + char buf[TYPE_CHARS(UV)]; + char *ebuf; + char *ptr = uiv_2buf(buf, iv, 0, 0, &ebuf); + + sv_setpvn(sv, ptr, ebuf - ptr); + SvSETMAGIC(sv); +} + #if defined(PERL_IMPLICIT_CONTEXT) /* pTHX_ magic can't cope with varargs, so this is a no-context @@ -7919,7 +8469,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV I32 svix = 0; static char nullstr[] = "(null)"; SV *argsv = Nullsv; - bool has_utf8 = FALSE; /* has the result utf8? */ + bool has_utf8; /* has the result utf8? */ + bool pat_utf8; /* the pattern is in utf8? */ + SV *nsv = Nullsv; + + has_utf8 = pat_utf8 = DO_UTF8(sv); /* no matter what, this is a string now */ (void)SvPV_force(sv, origlen); @@ -7970,6 +8524,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN zeros = 0; bool has_precis = FALSE; STRLEN precis = 0; + I32 osvix = svix; bool is_utf8 = FALSE; /* is this item utf8? */ #ifdef HAS_LDBL_SPRINTF_BUG /* This is to try to fix a bug with irix/nonstop-ux/powerux and @@ -8020,7 +8575,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* echo everything up to the next format specification */ for (q = p; q < patend && *q != '%'; ++q) ; if (q > p) { - sv_catpvn(sv, p, q - p); + if (has_utf8 && !pat_utf8) + sv_catpvn_utf8_upgrade(sv, p, q - p, nsv); + else + sv_catpvn(sv, p, q - p); p = q; } if (q++ >= patend) @@ -8031,6 +8589,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV \d+\$ explicit format parameter index [-+ 0#]+ flags v|\*(\d+\$)?v vector with optional (optionally specified) arg + 0 flag (as above): repeated to allow "v02" \d+|\*(\d+\$)? width using optional (optionally specified) arg \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg [hlqLV] size @@ -8096,6 +8655,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } if (!asterisk) + if( *q == '0' ) + fill = *q++; EXPECT_NUMBER(q, width); if (vectorize) { @@ -8523,6 +9084,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV intsize = 'q'; #endif break; +/* [perl #20339] - we should accept and ignore %lf rather than die */ + case 'l': + /* FALL THROUGH */ default: #if defined(USE_LONG_DOUBLE) intsize = args ? 0 : 'q'; @@ -8535,8 +9099,6 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* FALL THROUGH */ #endif case 'h': - /* FALL THROUGH */ - case 'l': goto unknown; } @@ -8714,12 +9276,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV default: unknown: - vectorize = FALSE; if (!args && ckWARN(WARN_PRINTF) && (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF)) { SV *msg = sv_newmortal(); - Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %s: ", - (PL_op->op_type == OP_PRTF) ? "printf" : "sprintf"); + Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ", + (PL_op->op_type == OP_PRTF) ? "" : "s"); if (c) { if (isPRINT(c)) Perl_sv_catpvf(aTHX_ msg, @@ -8746,6 +9307,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV p += elen; *p = '\0'; SvCUR(sv) = p - SvPVX(sv); + svix = osvix; continue; /* not "break" */ } @@ -8950,6 +9512,9 @@ Perl_re_dup(pTHX_ REGEXP *r, CLONE_PARAMS *param) ret->subbeg = SAVEPV(r->subbeg); else ret->subbeg = Nullch; +#ifdef PERL_COPY_ON_WRITE + ret->saved_copy = Nullsv; +#endif ptr_table_store(PL_ptr_table, r, ret); return ret; @@ -9318,9 +9883,20 @@ Perl_rvpv_dup(pTHX_ SV *dstr, SV *sstr, CLONE_PARAMS* param) /* Special case - not normally malloced for some reason */ if (SvREADONLY(sstr) && SvFAKE(sstr)) { /* A "shared" PV - clone it as unshared string */ - SvFAKE_off(dstr); - SvREADONLY_off(dstr); - SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvCUR(sstr)); + if(SvPADTMP(sstr)) { + /* However, some of them live in the pad + and they should not have these flags + turned off */ + + SvPVX(dstr) = sharepvn(SvPVX(sstr), SvCUR(sstr), + SvUVX(sstr)); + SvUVX(dstr) = SvUVX(sstr); + } else { + + SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvCUR(sstr)); + SvFAKE_off(dstr); + SvREADONLY_off(dstr); + } } else { /* Some other special case - random pointer */ @@ -9346,6 +9922,18 @@ Perl_sv_dup(pTHX_ SV *sstr, CLONE_PARAMS* param) if (dstr) return dstr; + if(param->flags & CLONEf_JOIN_IN) { + /** We are joining here so we don't want do clone + something that is bad **/ + + if(SvTYPE(sstr) == SVt_PVHV && + HvNAME(sstr)) { + /** don't clone stashes if they already exist **/ + HV* old_stash = gv_stashpv(HvNAME(sstr),0); + return (SV*) old_stash; + } + } + /* create anew and remember what it is */ new_SV(dstr); ptr_table_store(PL_ptr_table, sstr, dstr); @@ -9432,7 +10020,12 @@ Perl_sv_dup(pTHX_ SV *sstr, CLONE_PARAMS* param) Perl_rvpv_dup(aTHX_ dstr, sstr, param); LvTARGOFF(dstr) = LvTARGOFF(sstr); /* XXX sometimes holds PMOP* when DEBUGGING */ LvTARGLEN(dstr) = LvTARGLEN(sstr); - LvTARG(dstr) = sv_dup_inc(LvTARG(sstr), param); + if (LvTYPE(sstr) == 't') /* for tie: unrefcnted fake (SV**) */ + LvTARG(dstr) = dstr; + else if (LvTYPE(sstr) == 'T') /* for tie: fake HE */ + LvTARG(dstr) = (SV*)he_dup((HE*)LvTARG(sstr), 0, param); + else + LvTARG(dstr) = sv_dup_inc(LvTARG(sstr), param); LvTYPE(dstr) = LvTYPE(sstr); break; case SVt_PVGV: @@ -9487,12 +10080,21 @@ Perl_sv_dup(pTHX_ SV *sstr, CLONE_PARAMS* param) IoPAGE(dstr) = IoPAGE(sstr); IoPAGE_LEN(dstr) = IoPAGE_LEN(sstr); IoLINES_LEFT(dstr) = IoLINES_LEFT(sstr); + if(IoFLAGS(sstr) & IOf_FAKE_DIRP) { + /* I have no idea why fake dirp (rsfps) + should be treaded differently but otherwise + we end up with leaks -- sky*/ + IoTOP_GV(dstr) = gv_dup_inc(IoTOP_GV(sstr), param); + IoFMT_GV(dstr) = gv_dup_inc(IoFMT_GV(sstr), param); + IoBOTTOM_GV(dstr) = gv_dup_inc(IoBOTTOM_GV(sstr), param); + } else { + IoTOP_GV(dstr) = gv_dup(IoTOP_GV(sstr), param); + IoFMT_GV(dstr) = gv_dup(IoFMT_GV(sstr), param); + IoBOTTOM_GV(dstr) = gv_dup(IoBOTTOM_GV(sstr), param); + } IoTOP_NAME(dstr) = SAVEPV(IoTOP_NAME(sstr)); - IoTOP_GV(dstr) = gv_dup(IoTOP_GV(sstr), param); IoFMT_NAME(dstr) = SAVEPV(IoFMT_NAME(sstr)); - IoFMT_GV(dstr) = gv_dup(IoFMT_GV(sstr), param); IoBOTTOM_NAME(dstr) = SAVEPV(IoBOTTOM_NAME(sstr)); - IoBOTTOM_GV(dstr) = gv_dup(IoBOTTOM_GV(sstr), param); IoSUBPROCESS(dstr) = IoSUBPROCESS(sstr); IoTYPE(dstr) = IoTYPE(sstr); IoFLAGS(dstr) = IoFLAGS(sstr); @@ -10364,6 +10966,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_DBsingle = sv_dup(proto_perl->IDBsingle, param); PL_DBtrace = sv_dup(proto_perl->IDBtrace, param); PL_DBsignal = sv_dup(proto_perl->IDBsignal, param); + PL_DBassertion = sv_dup(proto_perl->IDBassertion, param); PL_lineary = av_dup(proto_perl->Ilineary, param); PL_dbargs = av_dup(proto_perl->Idbargs, param); @@ -10391,11 +10994,13 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* internal state */ PL_tainting = proto_perl->Itainting; + PL_taint_warn = proto_perl->Itaint_warn; PL_maxo = proto_perl->Imaxo; if (proto_perl->Iop_mask) PL_op_mask = SAVEPVN(proto_perl->Iop_mask, PL_maxo); else PL_op_mask = Nullch; + /* PL_asserting = proto_perl->Iasserting; */ /* current interpreter roots */ PL_main_cv = cv_dup_inc(proto_perl->Imain_cv, param); @@ -10467,7 +11072,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_origalen = proto_perl->Iorigalen; PL_pidstatus = newHV(); /* XXX flag for cloning? */ PL_osname = SAVEPV(proto_perl->Iosname); - PL_sh_path = proto_perl->Ish_path; /* XXX never deallocated */ + PL_sh_path_compat = proto_perl->Ish_path_compat; /* XXX never deallocated */ PL_sighandlerp = proto_perl->Isighandlerp; @@ -10599,6 +11204,40 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param); PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param); + /* Did the locale setup indicate UTF-8? */ + PL_utf8locale = proto_perl->Iutf8locale; + /* Unicode features (see perlrun/-C) */ + PL_unicode = proto_perl->Iunicode; + + /* Pre-5.8 signals control */ + PL_signals = proto_perl->Isignals; + + /* times() ticks per second */ + PL_clocktick = proto_perl->Iclocktick; + + /* Recursion stopper for PerlIO_find_layer */ + PL_in_load_module = proto_perl->Iin_load_module; + + /* sort() routine */ + PL_sort_RealCmp = proto_perl->Isort_RealCmp; + + /* Not really needed/useful since the reenrant_retint is "volatile", + * but do it for consistency's sake. */ + PL_reentrant_retint = proto_perl->Ireentrant_retint; + + /* Hooks to shared SVs and locks. */ + PL_sharehook = proto_perl->Isharehook; + PL_lockhook = proto_perl->Ilockhook; + PL_unlockhook = proto_perl->Iunlockhook; + PL_threadhook = proto_perl->Ithreadhook; + + PL_runops_std = proto_perl->Irunops_std; + PL_runops_dbg = proto_perl->Irunops_dbg; + +#ifdef THREADS_HAVE_PIDS + PL_ppid = proto_perl->Ippid; +#endif + /* swatch cache */ PL_last_swash_hv = Nullhv; /* reinits on demand */ PL_last_swash_klen = 0; @@ -10740,9 +11379,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_protect = proto_perl->Tprotect; #endif PL_errors = sv_dup_inc(proto_perl->Terrors, param); - PL_av_fetch_sv = Nullsv; - PL_hv_fetch_sv = Nullsv; - Zero(&PL_hv_fetch_ent_mh, 1, HE); /* XXX */ + PL_hv_fetch_ent_mh = Nullhe; PL_modcount = proto_perl->Tmodcount; PL_lastgotoprobe = Nullop; PL_dumpindent = proto_perl->Tdumpindent; @@ -10799,6 +11436,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_reg_curpm = (PMOP*)NULL; PL_reg_oldsaved = Nullch; PL_reg_oldsavedlen = 0; +#ifdef PERL_COPY_ON_WRITE + PL_nrs = Nullsv; +#endif PL_reg_maxiter = 0; PL_reg_leftiter = 0; PL_reg_poscache = Nullch; @@ -10817,6 +11457,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* Pluggable optimizer */ PL_peepp = proto_perl->Tpeepp; + PL_stashcache = newHV(); + if (!(flags & CLONEf_KEEP_PTR_TABLE)) { ptr_table_free(PL_ptr_table); PL_ptr_table = NULL; @@ -10869,14 +11511,14 @@ The PV of the sv is returned. char * Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) { - if (SvPOK(sv) && !DO_UTF8(sv) && SvROK(encoding)) { - int vary = FALSE; + if (SvPOK(sv) && !SvUTF8(sv) && !IN_BYTES && SvROK(encoding)) { SV *uni; STRLEN len; char *s; dSP; ENTER; SAVETMPS; + save_re_context(); PUSHMARK(sp); EXTEND(SP, 3); XPUSHs(encoding); @@ -10897,13 +11539,6 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) uni = POPs; PUTBACK; s = SvPV(uni, len); - { - U8 *t = (U8 *)s, *e = (U8 *)s + len; - while (t < e) { - if ((vary = !UTF8_IS_INVARIANT(*t++))) - break; - } - } if (s != SvPVX(sv)) { SvGROW(sv, len + 1); Move(s, SvPVX(sv), len, char); @@ -10912,12 +11547,55 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) } FREETMPS; LEAVE; - if (vary) - SvUTF8_on(sv); SvUTF8_on(sv); } return SvPVX(sv); } +/* +=for apidoc sv_cat_decode + +The encoding is assumed to be an Encode object, the PV of the ssv is +assumed to be octets in that encoding and decoding the input starts +from the position which (PV + *offset) pointed to. The dsv will be +concatenated the decoded UTF-8 string from ssv. Decoding will terminate +when the string tstr appears in decoding output or the input ends on +the PV of the ssv. The value which the offset points will be modified +to the last input position on the ssv. +Returns TRUE if the terminator was found, else returns FALSE. + +=cut */ + +bool +Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding, + SV *ssv, int *offset, char *tstr, int tlen) +{ + bool ret = FALSE; + if (SvPOK(ssv) && SvPOK(dsv) && SvROK(encoding) && offset) { + SV *offsv; + dSP; + ENTER; + SAVETMPS; + save_re_context(); + PUSHMARK(sp); + EXTEND(SP, 6); + XPUSHs(encoding); + XPUSHs(dsv); + XPUSHs(ssv); + XPUSHs(offsv = sv_2mortal(newSViv(*offset))); + XPUSHs(sv_2mortal(newSVpvn(tstr, tlen))); + PUTBACK; + call_method("cat_decode", G_SCALAR); + SPAGAIN; + ret = SvTRUE(TOPs); + *offset = SvIV(offsv); + PUTBACK; + FREETMPS; + LEAVE; + } + else + Perl_croak(aTHX_ "Invalid argument to sv_cat_decode"); + return ret; +}