sv_force_normal_flags(sv, 0);
}
- if (PL_encoding)
+ if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING))
sv_recode_to_utf8(sv, PL_encoding);
else { /* Assume Latin-1/EBCDIC */
/* This function could be much more efficient if we
switch (SvTYPE(sref)) {
case SVt_PVAV:
if (intro)
- SAVESPTR(GvAV(dstr));
+ SAVEGENERICSV(GvAV(dstr));
else
dref = (SV*)GvAV(dstr);
GvAV(dstr) = (AV*)sref;
break;
case SVt_PVHV:
if (intro)
- SAVESPTR(GvHV(dstr));
+ SAVEGENERICSV(GvHV(dstr));
else
dref = (SV*)GvHV(dstr);
GvHV(dstr) = (HV*)sref;
GvCVGEN(dstr) = 0; /* Switch off cacheness. */
PL_sub_generation++;
}
- SAVESPTR(GvCV(dstr));
+ SAVEGENERICSV(GvCV(dstr));
}
else
dref = (SV*)GvCV(dstr);
break;
case SVt_PVIO:
if (intro)
- SAVESPTR(GvIOp(dstr));
+ SAVEGENERICSV(GvIOp(dstr));
else
dref = (SV*)GvIOp(dstr);
GvIOp(dstr) = (IO*)sref;
break;
case SVt_PVFM:
if (intro)
- SAVESPTR(GvFORM(dstr));
+ SAVEGENERICSV(GvFORM(dstr));
else
dref = (SV*)GvFORM(dstr);
GvFORM(dstr) = (CV*)sref;
break;
default:
if (intro)
- SAVESPTR(GvSV(dstr));
+ SAVEGENERICSV(GvSV(dstr));
else
dref = (SV*)GvSV(dstr);
GvSV(dstr) = sref;
}
if (dref)
SvREFCNT_dec(dref);
- if (intro)
- SAVEFREESV(sref);
if (SvTAINTED(sstr))
SvTAINT(dstr);
return;
case PERL_MAGIC_vstring:
vtable = 0;
break;
+ case PERL_MAGIC_utf8:
+ vtable = &PL_vtbl_utf8;
+ break;
case PERL_MAGIC_substr:
vtable = &PL_vtbl_substr;
break;
Safefree(mg->mg_ptr);
else if (mg->mg_len == HEf_SVKEY)
SvREFCNT_dec((SV*)mg->mg_ptr);
+ else if (mg->mg_type == PERL_MAGIC_utf8 && mg->mg_ptr)
+ Safefree(mg->mg_ptr);
}
if (mg->mg_flags & MGf_REFCOUNTED)
SvREFCNT_dec(mg->mg_obj);
=cut
*/
+/*
+ * The length is cached in PERL_UTF8_magic, in the mg_len field. Also the
+ * mg_ptr is used, by sv_pos_u2b(), see the comments of S_utf8_mg_pos_init().
+ * (Note that the mg_len is not the length of the mg_ptr field.)
+ *
+ */
+
STRLEN
Perl_sv_len_utf8(pTHX_ register SV *sv)
{
return mg_length(sv);
else
{
- STRLEN len;
+ STRLEN len, ulen;
U8 *s = (U8*)SvPV(sv, len);
+ MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0;
+
+ if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0))
+ ulen = mg->mg_len;
+ else {
+ ulen = Perl_utf8_length(aTHX_ s, s + len);
+ if (!mg && !SvREADONLY(sv)) {
+ sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
+ mg = mg_find(sv, PERL_MAGIC_utf8);
+ assert(mg);
+ }
+ if (mg)
+ mg->mg_len = ulen;
+ }
+ return ulen;
+ }
+}
+
+/* S_utf8_mg_pos_init() is used to initialize the mg_ptr field of
+ * a PERL_UTF8_magic. The mg_ptr is used to store the mapping
+ * between UTF-8 and byte offsets. There are two (substr offset and substr
+ * length, the i offset, PERL_MAGIC_UTF8_CACHESIZE) times two (UTF-8 offset
+ * and byte offset) cache positions.
+ *
+ * The mg_len field is used by sv_len_utf8(), see its comments.
+ * Note that the mg_len is not the length of the mg_ptr field.
+ *
+ */
+STATIC bool
+S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, U8 *s, U8 *start)
+{
+ bool found = FALSE;
+
+ if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
+ if (!*mgp) {
+ sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
+ *mgp = mg_find(sv, PERL_MAGIC_utf8);
+ }
+ assert(*mgp);
+
+ if ((*mgp)->mg_ptr)
+ *cachep = (STRLEN *) (*mgp)->mg_ptr;
+ else {
+ Newz(0, *cachep, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
+ (*mgp)->mg_ptr = (char *) *cachep;
+ }
+ assert(*cachep);
- return Perl_utf8_length(aTHX_ s, s + len);
+ (*cachep)[i] = *offsetp;
+ (*cachep)[i+1] = s - start;
+ found = TRUE;
}
+
+ return found;
}
/*
+ * S_utf8_mg_pos() is used to query and update mg_ptr field of
+ * a PERL_UTF8_magic. The mg_ptr is used to store the mapping
+ * between UTF-8 and byte offsets. See also the comments of
+ * S_utf8_mg_pos_init().
+ *
+ */
+STATIC bool
+S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I32 uoff, U8 **sp, U8 *start, U8 *send)
+{
+ bool found = FALSE;
+
+ if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
+ if (!*mgp)
+ *mgp = mg_find(sv, PERL_MAGIC_utf8);
+ if (*mgp && (*mgp)->mg_ptr) {
+ *cachep = (STRLEN *) (*mgp)->mg_ptr;
+ if ((*cachep)[i] == uoff) /* An exact match. */
+ found = TRUE;
+ else { /* We will skip to the right spot. */
+ STRLEN forw = 0;
+ STRLEN backw = 0;
+ U8* p = NULL;
+
+ /* The assumption is that going backward is half
+ * the speed of going forward (that's where the
+ * 2 * backw in the below comes from). (The real
+ * figure of course depends on the UTF-8 data.) */
+
+ if ((*cachep)[i] > uoff) {
+ forw = uoff;
+ backw = (*cachep)[i] - uoff;
+
+ if (forw < 2 * backw)
+ p = start;
+ else
+ p = start + (*cachep)[i+1];
+ }
+ /* Try this only for the substr offset (i == 0),
+ * not for the substr length (i == 2). */
+ else if (i == 0) { /* (*cachep)[i] < uoff */
+ STRLEN ulen = sv_len_utf8(sv);
+
+ if (uoff < ulen) {
+ forw = uoff - (*cachep)[i];
+ backw = ulen - uoff;
+
+ if (forw < 2 * backw)
+ p = start + (*cachep)[i+1];
+ else
+ p = send;
+ }
+
+ /* If the string is not long enough for uoff,
+ * we could extend it, but not at this low a level. */
+ }
+
+ if (p) {
+ if (forw < 2 * backw) {
+ while (forw--)
+ p += UTF8SKIP(p);
+ }
+ else {
+ while (backw--) {
+ p--;
+ while (UTF8_IS_CONTINUATION(*p))
+ p--;
+ }
+ }
+
+ /* Update the cache. */
+ (*cachep)[i] = uoff;
+ (*cachep)[i+1] = p - start;
+
+ found = TRUE;
+ }
+ }
+ if (found) { /* Setup the return values. */
+ *offsetp = (*cachep)[i+1];
+ *sp = start + *offsetp;
+ if (*sp >= send) {
+ *sp = send;
+ *offsetp = send - start;
+ }
+ else if (*sp < start) {
+ *sp = start;
+ *offsetp = 0;
+ }
+ }
+ }
+ }
+ return found;
+}
+
+/*
=for apidoc sv_pos_u2b
Converts the value pointed to by offsetp from a count of UTF8 chars from
=cut
*/
+/*
+ * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential
+ * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and
+ * byte offsets. See also the comments of S_utf8_mg_pos().
+ *
+ */
+
void
Perl_sv_pos_u2b(pTHX_ register SV *sv, I32* offsetp, I32* lenp)
{
U8 *start;
U8 *s;
- U8 *send;
- I32 uoffset = *offsetp;
STRLEN len;
+ STRLEN *cache = 0;
+ STRLEN boffset = 0;
if (!sv)
return;
start = s = (U8*)SvPV(sv, len);
- send = s + len;
- while (s < send && uoffset--)
- s += UTF8SKIP(s);
- if (s >= send)
- s = send;
- *offsetp = s - start;
- if (lenp) {
- I32 ulen = *lenp;
- start = s;
- while (s < send && ulen--)
- s += UTF8SKIP(s);
- if (s >= send)
- s = send;
- *lenp = s - start;
+ if (len) {
+ I32 uoffset = *offsetp;
+ U8 *send = s + len;
+ MAGIC *mg = 0;
+ bool found = FALSE;
+
+ if (utf8_mg_pos(sv, &mg, &cache, 0, offsetp, *offsetp, &s, start, send))
+ found = TRUE;
+ if (!found && uoffset > 0) {
+ while (s < send && uoffset--)
+ s += UTF8SKIP(s);
+ if (s >= send)
+ s = send;
+ if (utf8_mg_pos_init(sv, &mg, &cache, 0, offsetp, s, start))
+ boffset = cache[1];
+ *offsetp = s - start;
+ }
+ if (lenp) {
+ found = FALSE;
+ start = s;
+ if (utf8_mg_pos(sv, &mg, &cache, 2, lenp, *lenp + *offsetp, &s, start, send)) {
+ *lenp -= boffset;
+ found = TRUE;
+ }
+ if (!found && *lenp > 0) {
+ I32 ulen = *lenp;
+ if (ulen > 0)
+ while (s < send && ulen--)
+ s += UTF8SKIP(s);
+ if (s >= send)
+ s = send;
+ if (utf8_mg_pos_init(sv, &mg, &cache, 2, lenp, s, start))
+ cache[2] += *offsetp;
+ }
+ *lenp = s - start;
+ }
+ }
+ else {
+ *offsetp = 0;
+ if (lenp)
+ *lenp = 0;
}
return;
}
=cut
*/
+/*
+ * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential
+ * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and
+ * byte offsets. See also the comments of S_utf8_mg_pos().
+ *
+ */
+
void
-Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp)
+Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp)
{
- U8 *s;
- U8 *send;
+ U8* s;
STRLEN len;
if (!sv)
s = (U8*)SvPV(sv, len);
if ((I32)len < *offsetp)
Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset");
- send = s + *offsetp;
- len = 0;
- while (s < send) {
- STRLEN n = 1;
- /* Call utf8n_to_uvchr() to validate the sequence
- * (unless a simple non-UTF character) */
- if (!UTF8_IS_INVARIANT(*s))
- utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0);
- if (n > 0) {
- s += n;
- len++;
+ else {
+ U8* send = s + *offsetp;
+ MAGIC* mg = NULL;
+ STRLEN *cache = NULL;
+
+ len = 0;
+
+ if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
+ mg = mg_find(sv, PERL_MAGIC_utf8);
+ if (mg && mg->mg_ptr) {
+ cache = (STRLEN *) mg->mg_ptr;
+ if (cache[1] == *offsetp) {
+ /* An exact match. */
+ *offsetp = cache[0];
+
+ return;
+ }
+ else if (cache[1] < *offsetp) {
+ /* We already know part of the way. */
+ len = cache[0];
+ s += cache[1];
+ /* Let the below loop do the rest. */
+ }
+ else { /* cache[1] > *offsetp */
+ /* We already know all of the way, now we may
+ * be able to walk back. The same assumption
+ * is made as in S_utf8_mg_pos(), namely that
+ * walking backward is twice slower than
+ * walking forward. */
+ STRLEN forw = *offsetp;
+ STRLEN backw = cache[1] - *offsetp;
+
+ if (!(forw < 2 * backw)) {
+ U8 *p = s + cache[1];
+ STRLEN ubackw = 0;
+
+ while (backw--) {
+ p--;
+ while (UTF8_IS_CONTINUATION(*p))
+ p--;
+ ubackw++;
+ }
+
+ cache[0] -= ubackw;
+ cache[1] -= backw;
+
+ return;
+ }
+ }
+ }
}
- else
- break;
+
+ while (s < send) {
+ STRLEN n = 1;
+
+ /* Call utf8n_to_uvchr() to validate the sequence
+ * (unless a simple non-UTF character) */
+ if (!UTF8_IS_INVARIANT(*s))
+ utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0);
+ if (n > 0) {
+ s += n;
+ len++;
+ }
+ else
+ break;
+ }
+
+ if (!SvREADONLY(sv)) {
+ if (!mg) {
+ sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
+ mg = mg_find(sv, PERL_MAGIC_utf8);
+ }
+ assert(mg);
+
+ if (!mg->mg_ptr) {
+ Newz(0, cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
+ mg->mg_ptr = (char *) cache;
+ }
+ assert(cache);
+
+ cache[0] = len;
+ cache[1] = *offsetp;
+ }
+
+ *offsetp = len;
}
- *offsetp = len;
return;
}
else
io = 0;
if (!io)
- Perl_croak(aTHX_ "Bad filehandle: %s", SvPV(sv,n_a));
+ Perl_croak(aTHX_ "Bad filehandle: %"SVf, sv);
break;
}
return io;
Nullop);
LEAVE;
if (!GvCVu(gv))
- Perl_croak(aTHX_ "Unable to create sub named \"%s\"", SvPV(sv,n_a));
+ Perl_croak(aTHX_ "Unable to create sub named \"%"SVf"\"",
+ sv);
}
return GvCVu(gv);
}
}
SvRV(sv) = 0;
SvROK_off(sv);
- if (SvREFCNT(rv) != 1 || SvREADONLY(rv) || (flags & SV_IMMEDIATE_UNREF))
+ /* You can't have a || SvREADONLY(rv) here, as $a = $$a, where $a was
+ assigned to as BEGIN {$a = \"Foo"} will fail. */
+ if (SvREFCNT(rv) != 1 || (flags & SV_IMMEDIATE_UNREF))
SvREFCNT_dec(rv);
else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */
sv_2mortal(rv); /* Schedule for freeing later */
if (!args && ckWARN(WARN_PRINTF) &&
(PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF)) {
SV *msg = sv_newmortal();
- Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %s: ",
- (PL_op->op_type == OP_PRTF) ? "printf" : "sprintf");
+ Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ",
+ (PL_op->op_type == OP_PRTF) ? "" : "s");
if (c) {
if (isPRINT(c))
Perl_sv_catpvf(aTHX_ msg,
if (dstr)
return dstr;
+ if(param->flags & CLONEf_JOIN_IN) {
+ /** We are joining here so we don't want do clone
+ something that is bad **/
+
+ if(SvTYPE(sstr) == SVt_PVHV &&
+ HvNAME(sstr)) {
+ /** don't clone stashes if they already exist **/
+ HV* old_stash = gv_stashpv(HvNAME(sstr),0);
+ return (SV*) old_stash;
+ }
+ }
+
/* create anew and remember what it is */
new_SV(dstr);
ptr_table_store(PL_ptr_table, sstr, dstr);
CvDEPTH(dstr) = 0;
}
PAD_DUP(CvPADLIST(dstr), CvPADLIST(sstr), param);
- /* anon prototypes aren't refcounted */
- if (!CvANON(sstr) || CvCLONED(sstr))
- CvOUTSIDE(dstr) = cv_dup_inc(CvOUTSIDE(sstr), param);
- else
- CvOUTSIDE(dstr) = cv_dup(CvOUTSIDE(sstr), param);
- CvOUTSIDE_SEQ(dstr) = CvOUTSIDE_SEQ(sstr);
+ CvOUTSIDE_SEQ(dstr) = CvOUTSIDE_SEQ(sstr);
+ CvOUTSIDE(dstr) =
+ CvWEAKOUTSIDE(sstr)
+ ? cv_dup( CvOUTSIDE(sstr), param)
+ : cv_dup_inc(CvOUTSIDE(sstr), param);
CvFLAGS(dstr) = CvFLAGS(sstr);
CvFILE(dstr) = CvXSUB(sstr) ? CvFILE(sstr) : SAVEPV(CvFILE(sstr));
break;
#define TOPLONG(ss,ix) ((ss)[ix].any_long)
#define POPIV(ss,ix) ((ss)[--(ix)].any_iv)
#define TOPIV(ss,ix) ((ss)[ix].any_iv)
+#define POPBOOL(ss,ix) ((ss)[--(ix)].any_bool)
+#define TOPBOOL(ss,ix) ((ss)[ix].any_bool)
#define POPPTR(ss,ix) ((ss)[--(ix)].any_ptr)
#define TOPPTR(ss,ix) ((ss)[ix].any_ptr)
#define POPDPTR(ss,ix) ((ss)[--(ix)].any_dptr)
sv = (SV*)POPPTR(ss,ix);
TOPPTR(nss,ix) = sv_dup(sv, param);
break;
+ case SAVEt_BOOL:
+ ptr = POPPTR(ss,ix);
+ TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
+ longval = (long)POPBOOL(ss,ix);
+ TOPBOOL(nss,ix) = (bool)longval;
+ break;
default:
Perl_croak(aTHX_ "panic: ss_dup inconsistency");
}
Create and return a new interpreter by cloning the current one.
+perl_clone takes these flags as paramters:
+
+CLONEf_COPY_STACKS - is used to, well, copy the stacks also,
+without it we only clone the data and zero the stacks,
+with it we copy the stacks and the new perl interpreter is
+ready to run at the exact same point as the previous one.
+The pseudo-fork code uses COPY_STACKS while the
+threads->new doesn't.
+
+CLONEf_KEEP_PTR_TABLE
+perl_clone keeps a ptr_table with the pointer of the old
+variable as a key and the new variable as a value,
+this allows it to check if something has been cloned and not
+clone it again but rather just use the value and increase the
+refcount. If KEEP_PTR_TABLE is not set then perl_clone will kill
+the ptr_table using the function
+C<ptr_table_free(PL_ptr_table); PL_ptr_table = NULL;>,
+reason to keep it around is if you want to dup some of your own
+variable who are outside the graph perl scans, example of this
+code is in threads.xs create
+
+CLONEf_CLONE_HOST
+This is a win32 thing, it is ignored on unix, it tells perls
+win32host code (which is c++) to clone itself, this is needed on
+win32 if you want to run two threads at the same time,
+if you just want to do some stuff in a separate perl interpreter
+and then throw it away and return to the original one,
+you don't need to do anything.
+
=cut
*/
PL_watchok = Nullch;
PL_regdummy = proto_perl->Tregdummy;
- PL_regcomp_parse = Nullch;
- PL_regxend = Nullch;
- PL_regcode = (regnode*)NULL;
- PL_regnaughty = 0;
- PL_regsawback = 0;
PL_regprecomp = Nullch;
PL_regnpar = 0;
PL_regsize = 0;
- PL_regflags = 0;
- PL_regseen = 0;
- PL_seen_zerolen = 0;
- PL_seen_evals = 0;
- PL_regcomp_rx = (regexp*)NULL;
- PL_extralen = 0;
PL_colorset = 0; /* reinits PL_colors[] */
/*PL_colors[6] = {0,0,0,0,0,0};*/
- PL_reg_whilem_seen = 0;
PL_reginput = Nullch;
PL_regbol = Nullch;
PL_regeol = Nullch;