static const char S_no_symref_sv[] =
"Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
-PP(pp_rv2gv)
-{
- dVAR; dSP; dTOPss;
+/* In some cases this function inspects PL_op. If this function is called
+ for new op types, more bool parameters may need to be added in place of
+ the checks.
+ When noinit is true, the absence of a gv will cause a retval of undef.
+ This is unrelated to the cv-to-gv assignment case.
+*/
+
+static SV *
+S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
+ const bool noinit)
+{
+ dVAR;
if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
if (SvROK(sv)) {
- wasref:
if (SvAMAGIC(sv)) {
sv = amagic_deref_call(sv, to_gv_amg);
- SPAGAIN;
}
+ wasref:
sv = SvRV(sv);
if (SvTYPE(sv) == SVt_PVIO) {
GV * const gv = MUTABLE_GV(sv_newmortal());
- gv_init(gv, 0, "", 0, 0);
+ gv_init(gv, 0, "$__ANONIO__", 11, 0);
GvIOp(gv) = MUTABLE_IO(sv);
SvREFCNT_inc_void_NN(sv);
sv = MUTABLE_SV(gv);
}
else if (!isGV_with_GP(sv))
- DIE(aTHX_ "Not a GLOB reference");
+ return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
}
else {
if (!isGV_with_GP(sv)) {
- if (!SvOK(sv) && sv != &PL_sv_undef) {
+ if (!SvOK(sv)) {
/* If this is a 'my' scalar and flag is set then vivify
* NI-S 1999/05/07
*/
- if (SvREADONLY(sv))
- Perl_croak_no_modify(aTHX);
- if (PL_op->op_private & OPpDEREF) {
+ if (vivify_sv && sv != &PL_sv_undef) {
GV *gv;
+ if (SvREADONLY(sv))
+ Perl_croak_no_modify(aTHX);
if (cUNOP->op_targ) {
- STRLEN len;
SV * const namesv = PAD_SV(cUNOP->op_targ);
- const char * const name = SvPV(namesv, len);
gv = MUTABLE_GV(newSV(0));
- gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
+ gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0);
}
else {
const char * const name = CopSTASHPV(PL_curcop);
- gv = newGVgen(name);
+ gv = newGVgen_flags(name,
+ HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
}
prepare_SV_for_RV(sv);
SvRV_set(sv, MUTABLE_SV(gv));
SvSETMAGIC(sv);
goto wasref;
}
- if (PL_op->op_flags & OPf_REF ||
- PL_op->op_private & HINT_STRICT_REFS)
- DIE(aTHX_ PL_no_usym, "a symbol");
+ if (PL_op->op_flags & OPf_REF || strict)
+ return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
if (ckWARN(WARN_UNINITIALIZED))
report_uninit(sv);
- RETSETUNDEF;
+ return &PL_sv_undef;
}
- if ((PL_op->op_flags & OPf_SPECIAL) &&
- !(PL_op->op_flags & OPf_MOD))
+ if (noinit)
{
- SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
- if (!temp
- && (!is_gv_magical_sv(sv,0)
- || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
- SVt_PVGV))))) {
- RETSETUNDEF;
- }
- sv = temp;
+ if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
+ sv, GV_ADDMG, SVt_PVGV
+ ))))
+ return &PL_sv_undef;
}
else {
- if (PL_op->op_private & HINT_STRICT_REFS)
- DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
+ if (strict)
+ return
+ (SV *)Perl_die(aTHX_
+ S_no_symref_sv,
+ sv,
+ (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""),
+ "a symbol"
+ );
if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
== OPpDONT_INIT_GV) {
/* We are the target of a coderef assignment. Return
the scalar unchanged, and let pp_sasssign deal with
things. */
- RETURN;
+ return sv;
}
- sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
+ sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
}
/* FAKE globs in the symbol table cause weird bugs (#77810) */
- if (sv) SvFAKE_off(sv);
+ SvFAKE_off(sv);
}
}
- if (sv && SvFAKE(sv)) {
+ if (SvFAKE(sv)) {
SV *newsv = sv_newmortal();
sv_setsv_flags(newsv, sv, 0);
SvFAKE_off(newsv);
sv = newsv;
}
+ return sv;
+}
+
+PP(pp_rv2gv)
+{
+ dVAR; dSP; dTOPss;
+
+ sv = S_rv2gv(aTHX_
+ sv, PL_op->op_private & OPpDEREF,
+ PL_op->op_private & HINT_STRICT_REFS,
+ ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
+ || PL_op->op_type == OP_READLINE
+ );
if (PL_op->op_private & OPpLVAL_INTRO)
save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
SETs(sv);
if ((PL_op->op_flags & OPf_SPECIAL) &&
!(PL_op->op_flags & OPf_MOD))
{
- gv = gv_fetchsv(sv, 0, type);
- if (!gv
- && (!is_gv_magical_sv(sv,0)
- || !(gv = gv_fetchsv(sv, GV_ADD, type))))
+ if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
{
**spp = &PL_sv_undef;
return NULL;
}
}
else {
- gv = gv_fetchsv(sv, GV_ADD, type);
+ gv = gv_fetchsv_nomg(sv, GV_ADD, type);
}
return gv;
}
dVAR; dSP; dTOPss;
GV *gv = NULL;
- if (!(PL_op->op_private & OPpDEREFed))
- SvGETMAGIC(sv);
+ SvGETMAGIC(sv);
if (SvROK(sv)) {
if (SvAMAGIC(sv)) {
sv = amagic_deref_call(sv, to_sv_amg);
- SPAGAIN;
}
sv = SvRV(sv);
Perl_croak(aTHX_ "%s", PL_no_localize_ref);
}
else if (PL_op->op_private & OPpDEREF)
- vivify_ref(sv, PL_op->op_private & OPpDEREF);
+ sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
}
SETs(sv);
RETURN;
}
SETs(*sv);
} else {
- SETs(sv_2mortal(newSViv(
- AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
- )));
+ SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
}
RETURN;
}
dVAR; dSP; dPOPss;
if (PL_op->op_flags & OPf_MOD || LVRET) {
- SV * const ret = sv_2mortal(newSV_type(SVt_PVMG)); /* Not TARG RT#67838 */
- sv_magic(ret, sv, PERL_MAGIC_pos, NULL, 0);
+ SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
+ sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
+ LvTYPE(ret) = '.';
+ LvTARG(ret) = SvREFCNT_inc_simple(sv);
PUSHs(ret); /* no SvSETMAGIC */
RETURN;
}
I32 i = mg->mg_len;
if (DO_UTF8(sv))
sv_pos_b2u(sv, &i);
- PUSHi(i + CopARYBASE_get(PL_curcop));
+ PUSHi(i);
RETURN;
}
}
GV *gv;
HV *stash_unused;
const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
- ? 0
+ ? GV_ADDMG
: ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
? GV_ADD|GV_NOEXPAND
: GV_ADD;
if (CvCLONE(cv))
cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
if ((PL_op->op_private & OPpLVAL_INTRO)) {
- if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
+ if (gv && GvCV(gv) == cv && (gv = gv_autoload_pvn(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), GvNAMEUTF8(gv) ? SVf_UTF8 : 0)))
cv = GvCV(gv);
if (!CvLVALUE(cv))
DIE(aTHX_ "Can't modify non-lvalue subroutine call");
const char * s = SvPVX_const(TOPs);
if (strnEQ(s, "CORE::", 6)) {
const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
- if (code < 0) { /* Overridable. */
-#define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
- int i = 0, n = 0, seen_question = 0, defgv = 0;
- I32 oa;
- char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
-
- if (code == -KEY_chop || code == -KEY_chomp
- || code == -KEY_exec || code == -KEY_system)
- goto set;
- if (code == -KEY_mkdir) {
- ret = newSVpvs_flags("_;$", SVs_TEMP);
- goto set;
- }
- if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
- ret = newSVpvs_flags("+", SVs_TEMP);
- goto set;
- }
- if (code == -KEY_push || code == -KEY_unshift) {
- ret = newSVpvs_flags("+@", SVs_TEMP);
- goto set;
- }
- if (code == -KEY_pop || code == -KEY_shift) {
- ret = newSVpvs_flags(";+", SVs_TEMP);
- goto set;
- }
- if (code == -KEY_splice) {
- ret = newSVpvs_flags("+;$$@", SVs_TEMP);
- goto set;
- }
- if (code == -KEY_tied || code == -KEY_untie) {
- ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
- goto set;
- }
- if (code == -KEY_tie) {
- ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
- goto set;
- }
- if (code == -KEY_readpipe) {
- s = "CORE::backtick";
- }
- while (i < MAXO) { /* The slow way. */
- if (strEQ(s + 6, PL_op_name[i])
- || strEQ(s + 6, PL_op_desc[i]))
- {
- goto found;
- }
- i++;
- }
- goto nonesuch; /* Should not happen... */
- found:
- defgv = PL_opargs[i] & OA_DEFGV;
- oa = PL_opargs[i] >> OASHIFT;
- while (oa) {
- if (oa & OA_OPTIONAL && !seen_question && !defgv) {
- seen_question = 1;
- str[n++] = ';';
- }
- if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
- && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
- /* But globs are already references (kinda) */
- && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
- ) {
- str[n++] = '\\';
- }
- str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
- oa = oa >> 4;
- }
- if (defgv && str[n - 1] == '$')
- str[n - 1] = '_';
- str[n++] = '\0';
- ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
- }
- else if (code) /* Non-Overridable */
- goto set;
- else { /* None such */
- nonesuch:
+ if (!code || code == -KEY_CORE)
DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
+ if (code < 0) { /* Overridable. */
+ SV * const sv = core_prototype(NULL, s + 6, code, NULL);
+ if (sv) ret = sv;
}
+ goto set;
}
}
cv = sv_2cv(TOPs, &stash, &gv, 0);
if (cv && SvPOK(cv))
- ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
+ ret = newSVpvn_flags(
+ CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
+ );
set:
SETs(ret);
RETURN;
PP(pp_ref)
{
dVAR; dSP; dTARGET;
- const char *pv;
SV * const sv = POPs;
if (sv)
if (!sv || !SvROK(sv))
RETPUSHNO;
- pv = sv_reftype(SvRV(sv),TRUE);
- PUSHp(pv, strlen(pv));
+ (void)sv_ref(TARG,SvRV(sv),TRUE);
+ PUSHTARG;
RETURN;
}
HV *stash;
if (MAXARG == 1)
+ curstash:
stash = CopSTASH(PL_curcop);
else {
SV * const ssv = POPs;
STRLEN len;
const char *ptr;
- if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
+ if (!ssv) goto curstash;
+ if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
Perl_croak(aTHX_ "Attempt to bless into a reference");
ptr = SvPV_const(ssv,len);
if (len == 0)
Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
"Explicit blessing to '' (assuming package main)");
- stash = gv_stashpvn(ptr, len, GV_ADD);
+ stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
}
(void)sv_bless(TOPs, stash);
dVAR; dSP;
SV *sv = POPs;
- const char * const elem = SvPV_nolen_const(sv);
+ STRLEN len;
+ const char * const elem = SvPV_const(sv, len);
GV * const gv = MUTABLE_GV(POPs);
SV * tmpRef = NULL;
const char * const second_letter = elem + 1;
switch (*elem) {
case 'A':
- if (strEQ(second_letter, "RRAY"))
+ if (len == 5 && strEQ(second_letter, "RRAY"))
tmpRef = MUTABLE_SV(GvAV(gv));
break;
case 'C':
- if (strEQ(second_letter, "ODE"))
+ if (len == 4 && strEQ(second_letter, "ODE"))
tmpRef = MUTABLE_SV(GvCVu(gv));
break;
case 'F':
- if (strEQ(second_letter, "ILEHANDLE")) {
+ if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
/* finally deprecated in 5.8.0 */
deprecate("*glob{FILEHANDLE}");
tmpRef = MUTABLE_SV(GvIOp(gv));
}
else
- if (strEQ(second_letter, "ORMAT"))
+ if (len == 6 && strEQ(second_letter, "ORMAT"))
tmpRef = MUTABLE_SV(GvFORM(gv));
break;
case 'G':
- if (strEQ(second_letter, "LOB"))
+ if (len == 4 && strEQ(second_letter, "LOB"))
tmpRef = MUTABLE_SV(gv);
break;
case 'H':
- if (strEQ(second_letter, "ASH"))
+ if (len == 4 && strEQ(second_letter, "ASH"))
tmpRef = MUTABLE_SV(GvHV(gv));
break;
case 'I':
- if (*second_letter == 'O' && !elem[2])
+ if (*second_letter == 'O' && !elem[2] && len == 2)
tmpRef = MUTABLE_SV(GvIOp(gv));
break;
case 'N':
- if (strEQ(second_letter, "AME"))
+ if (len == 4 && strEQ(second_letter, "AME"))
sv = newSVhek(GvNAME_HEK(gv));
break;
case 'P':
- if (strEQ(second_letter, "ACKAGE")) {
+ if (len == 7 && strEQ(second_letter, "ACKAGE")) {
const HV * const stash = GvSTASH(gv);
const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
}
break;
case 'S':
- if (strEQ(second_letter, "CALAR"))
+ if (len == 6 && strEQ(second_letter, "CALAR"))
tmpRef = GvSVn(gv);
break;
}
{
dVAR; dSP; dPOPss;
register unsigned char *s;
- register I32 pos;
- register I32 ch;
- register I32 *sfirst;
- register I32 *snext;
+ char *sfirst_raw;
STRLEN len;
+ MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
+ U8 quanta;
+ STRLEN size;
+
+ if (mg && SvSCREAM(sv))
+ RETPUSHYES;
- if (sv == PL_lastscream) {
- if (SvSCREAM(sv))
- RETPUSHYES;
- }
s = (unsigned char*)(SvPV(sv, len));
if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
/* No point in studying a zero length string, and not safe to study
stringification. Also refuse to study an FBM scalar, as this gives
more flexibility in SV flag usage. No real-world code would ever
end up studying an FBM scalar, so this isn't a real pessimisation.
+ Endemic use of I32 in Perl_screaminstr makes it hard to safely push
+ the study length limit from I32_MAX to U32_MAX - 1.
*/
RETPUSHNO;
}
- pos = len;
-
- if (PL_lastscream) {
- SvSCREAM_off(PL_lastscream);
- SvREFCNT_dec(PL_lastscream);
- }
- PL_lastscream = SvREFCNT_inc_simple(sv);
- if (pos > PL_maxscream) {
- if (PL_maxscream < 0) {
- PL_maxscream = pos + 80;
- Newx(PL_screamfirst, 256, I32);
- Newx(PL_screamnext, PL_maxscream, I32);
- }
- else {
- PL_maxscream = pos + pos / 4;
- Renew(PL_screamnext, PL_maxscream, I32);
- }
- }
+ if (len < 0xFF) {
+ quanta = 1;
+ } else if (len < 0xFFFF) {
+ quanta = 2;
+ } else
+ quanta = 4;
- sfirst = PL_screamfirst;
- snext = PL_screamnext;
+ size = (256 + len) * quanta;
+ sfirst_raw = (char *)safemalloc(size);
- if (!sfirst || !snext)
+ if (!sfirst_raw)
DIE(aTHX_ "do_study: out of memory");
- for (ch = 256; ch; --ch)
- *sfirst++ = -1;
- sfirst -= 256;
-
- while (--pos >= 0) {
- register const I32 ch = s[pos];
- if (sfirst[ch] >= 0)
- snext[pos] = sfirst[ch] - pos;
- else
- snext[pos] = -pos;
- sfirst[ch] = pos;
+ SvSCREAM_on(sv);
+ if (!mg)
+ mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
+ mg->mg_ptr = sfirst_raw;
+ mg->mg_len = size;
+ mg->mg_private = quanta;
+
+ memset(sfirst_raw, ~0, 256 * quanta);
+
+ /* The assumption here is that most studied strings are fairly short, hence
+ the pain of the extra code is worth it, given the memory savings.
+ 80 character string, 336 bytes as U8, down from 1344 as U32
+ 800 character string, 2112 bytes as U16, down from 4224 as U32
+ */
+
+ if (quanta == 1) {
+ U8 *const sfirst = (U8 *)sfirst_raw;
+ U8 *const snext = sfirst + 256;
+ while (len-- > 0) {
+ const U8 ch = s[len];
+ snext[len] = sfirst[ch];
+ sfirst[ch] = len;
+ }
+ } else if (quanta == 2) {
+ U16 *const sfirst = (U16 *)sfirst_raw;
+ U16 *const snext = sfirst + 256;
+ while (len-- > 0) {
+ const U8 ch = s[len];
+ snext[len] = sfirst[ch];
+ sfirst[ch] = len;
+ }
+ } else {
+ U32 *const sfirst = (U32 *)sfirst_raw;
+ U32 *const snext = sfirst + 256;
+ while (len-- > 0) {
+ const U8 ch = s[len];
+ snext[len] = sfirst[ch];
+ sfirst[ch] = len;
+ }
}
- SvSCREAM_on(sv);
- /* piggyback on m//g magic */
- sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
RETPUSHYES;
}
}
TARG = sv_newmortal();
if(PL_op->op_type == OP_TRANSR) {
- SV * const newsv = newSVsv(sv);
+ STRLEN len;
+ const char * const pv = SvPV(sv,len);
+ SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
do_trans(newsv);
- mPUSHs(newsv);
+ PUSHs(newsv);
}
else PUSHi(do_trans(sv));
RETURN;
/* SV is copy-on-write */
sv_force_normal_flags(sv, 0);
}
- if (SvREADONLY(sv))
+ else
Perl_croak_no_modify(aTHX);
}
SvIVX(retval) += rs_charlen;
}
}
- s = SvPV_force_nolen(sv);
+ s = SvPV_force_nomg_nolen(sv);
SvCUR_set(sv, len);
*SvEND(sv) = '\0';
SvNIOK_off(sv);
break;
case SVt_PVCV:
if (cv_const_sv((const CV *)sv))
- Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
- CvANON((const CV *)sv) ? "(anonymous)"
- : GvENAME(CvGV((const CV *)sv)));
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+ "Constant subroutine %"SVf" undefined",
+ SVfARG(CvANON((const CV *)sv)
+ ? newSVpvs_flags("(anonymous)", SVs_TEMP)
+ : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
/* FALLTHROUGH */
case SVt_PVFM:
{
RETPUSHUNDEF;
}
-PP(pp_predec)
-{
- dVAR; dSP;
- if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
- Perl_croak_no_modify(aTHX);
- if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
- && SvIVX(TOPs) != IV_MIN)
- {
- SvIV_set(TOPs, SvIVX(TOPs) - 1);
- SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
- }
- else
- sv_dec(TOPs);
- SvSETMAGIC(TOPs);
- return NORMAL;
-}
-
PP(pp_postinc)
{
dVAR; dSP; dTARGET;
- if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
+ const bool inc =
+ PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
+ if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
Perl_croak_no_modify(aTHX);
if (SvROK(TOPs))
TARG = sv_newmortal();
sv_setsv(TARG, TOPs);
if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
- && SvIVX(TOPs) != IV_MAX)
+ && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
{
- SvIV_set(TOPs, SvIVX(TOPs) + 1);
+ SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
}
- else
+ else if (inc)
sv_inc_nomg(TOPs);
+ else sv_dec_nomg(TOPs);
SvSETMAGIC(TOPs);
/* special case for undef: see thread at 2003-03/msg00536.html in archive */
- if (!SvOK(TARG))
+ if (inc && !SvOK(TARG))
sv_setiv(TARG, 0);
SETs(TARG);
return NORMAL;
}
-PP(pp_postdec)
-{
- dVAR; dSP; dTARGET;
- if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
- Perl_croak_no_modify(aTHX);
- if (SvROK(TOPs))
- TARG = sv_newmortal();
- sv_setsv(TARG, TOPs);
- if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
- && SvIVX(TOPs) != IV_MIN)
- {
- SvIV_set(TOPs, SvIVX(TOPs) - 1);
- SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
- }
- else
- sv_dec_nomg(TOPs);
- SvSETMAGIC(TOPs);
- SETs(TARG);
- return NORMAL;
-}
-
/* Ordinary operators. */
PP(pp_pow)
PP(pp_lt)
{
dVAR; dSP;
+ SV *left, *right;
+
tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
-#ifdef PERL_PRESERVE_IVUV
- SvIV_please_nomg(TOPs);
- if (SvIOK(TOPs)) {
- SvIV_please_nomg(TOPm1s);
- if (SvIOK(TOPm1s)) {
- bool auvok = SvUOK(TOPm1s);
- bool buvok = SvUOK(TOPs);
-
- if (!auvok && !buvok) { /* ## IV < IV ## */
- const IV aiv = SvIVX(TOPm1s);
- const IV biv = SvIVX(TOPs);
-
- SP--;
- SETs(boolSV(aiv < biv));
- RETURN;
- }
- if (auvok && buvok) { /* ## UV < UV ## */
- const UV auv = SvUVX(TOPm1s);
- const UV buv = SvUVX(TOPs);
-
- SP--;
- SETs(boolSV(auv < buv));
- RETURN;
- }
- if (auvok) { /* ## UV < IV ## */
- UV auv;
- const IV biv = SvIVX(TOPs);
- SP--;
- if (biv < 0) {
- /* As (a) is a UV, it's >=0, so it cannot be < */
- SETs(&PL_sv_no);
- RETURN;
- }
- auv = SvUVX(TOPs);
- SETs(boolSV(auv < (UV)biv));
- RETURN;
- }
- { /* ## IV < UV ## */
- const IV aiv = SvIVX(TOPm1s);
- UV buv;
-
- if (aiv < 0) {
- /* As (b) is a UV, it's >=0, so it must be < */
- SP--;
- SETs(&PL_sv_yes);
- RETURN;
- }
- buv = SvUVX(TOPs);
- SP--;
- SETs(boolSV((UV)aiv < buv));
- RETURN;
- }
- }
- }
-#endif
-#ifndef NV_PRESERVES_UV
-#ifdef PERL_PRESERVE_IVUV
- else
-#endif
- if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
- SP--;
- SETs(boolSV(SvRV(TOPs) < SvRV(TOPp1s)));
- RETURN;
- }
-#endif
- {
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
- dPOPTOPnnrl_nomg;
- if (Perl_isnan(left) || Perl_isnan(right))
- RETSETNO;
- SETs(boolSV(left < right));
-#else
- dPOPnv_nomg;
- SETs(boolSV(SvNV_nomg(TOPs) < value));
-#endif
- RETURN;
- }
+ right = POPs;
+ left = TOPs;
+ SETs(boolSV(
+ (SvIOK_notUV(left) && SvIOK_notUV(right))
+ ? (SvIVX(left) < SvIVX(right))
+ : (do_ncmp(left, right) == -1)
+ ));
+ RETURN;
}
PP(pp_gt)
{
dVAR; dSP;
- tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
-#ifdef PERL_PRESERVE_IVUV
- SvIV_please_nomg(TOPs);
- if (SvIOK(TOPs)) {
- SvIV_please_nomg(TOPm1s);
- if (SvIOK(TOPm1s)) {
- bool auvok = SvUOK(TOPm1s);
- bool buvok = SvUOK(TOPs);
-
- if (!auvok && !buvok) { /* ## IV > IV ## */
- const IV aiv = SvIVX(TOPm1s);
- const IV biv = SvIVX(TOPs);
-
- SP--;
- SETs(boolSV(aiv > biv));
- RETURN;
- }
- if (auvok && buvok) { /* ## UV > UV ## */
- const UV auv = SvUVX(TOPm1s);
- const UV buv = SvUVX(TOPs);
-
- SP--;
- SETs(boolSV(auv > buv));
- RETURN;
- }
- if (auvok) { /* ## UV > IV ## */
- UV auv;
- const IV biv = SvIVX(TOPs);
+ SV *left, *right;
- SP--;
- if (biv < 0) {
- /* As (a) is a UV, it's >=0, so it must be > */
- SETs(&PL_sv_yes);
- RETURN;
- }
- auv = SvUVX(TOPs);
- SETs(boolSV(auv > (UV)biv));
- RETURN;
- }
- { /* ## IV > UV ## */
- const IV aiv = SvIVX(TOPm1s);
- UV buv;
-
- if (aiv < 0) {
- /* As (b) is a UV, it's >=0, so it cannot be > */
- SP--;
- SETs(&PL_sv_no);
- RETURN;
- }
- buv = SvUVX(TOPs);
- SP--;
- SETs(boolSV((UV)aiv > buv));
- RETURN;
- }
- }
- }
-#endif
-#ifndef NV_PRESERVES_UV
-#ifdef PERL_PRESERVE_IVUV
- else
-#endif
- if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
- SP--;
- SETs(boolSV(SvRV(TOPs) > SvRV(TOPp1s)));
- RETURN;
- }
-#endif
- {
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
- dPOPTOPnnrl_nomg;
- if (Perl_isnan(left) || Perl_isnan(right))
- RETSETNO;
- SETs(boolSV(left > right));
-#else
- dPOPnv_nomg;
- SETs(boolSV(SvNV_nomg(TOPs) > value));
-#endif
- RETURN;
- }
+ tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
+ right = POPs;
+ left = TOPs;
+ SETs(boolSV(
+ (SvIOK_notUV(left) && SvIOK_notUV(right))
+ ? (SvIVX(left) > SvIVX(right))
+ : (do_ncmp(left, right) == 1)
+ ));
+ RETURN;
}
PP(pp_le)
{
dVAR; dSP;
- tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
-#ifdef PERL_PRESERVE_IVUV
- SvIV_please_nomg(TOPs);
- if (SvIOK(TOPs)) {
- SvIV_please_nomg(TOPm1s);
- if (SvIOK(TOPm1s)) {
- bool auvok = SvUOK(TOPm1s);
- bool buvok = SvUOK(TOPs);
-
- if (!auvok && !buvok) { /* ## IV <= IV ## */
- const IV aiv = SvIVX(TOPm1s);
- const IV biv = SvIVX(TOPs);
-
- SP--;
- SETs(boolSV(aiv <= biv));
- RETURN;
- }
- if (auvok && buvok) { /* ## UV <= UV ## */
- UV auv = SvUVX(TOPm1s);
- UV buv = SvUVX(TOPs);
-
- SP--;
- SETs(boolSV(auv <= buv));
- RETURN;
- }
- if (auvok) { /* ## UV <= IV ## */
- UV auv;
- const IV biv = SvIVX(TOPs);
-
- SP--;
- if (biv < 0) {
- /* As (a) is a UV, it's >=0, so a cannot be <= */
- SETs(&PL_sv_no);
- RETURN;
- }
- auv = SvUVX(TOPs);
- SETs(boolSV(auv <= (UV)biv));
- RETURN;
- }
- { /* ## IV <= UV ## */
- const IV aiv = SvIVX(TOPm1s);
- UV buv;
+ SV *left, *right;
- if (aiv < 0) {
- /* As (b) is a UV, it's >=0, so a must be <= */
- SP--;
- SETs(&PL_sv_yes);
- RETURN;
- }
- buv = SvUVX(TOPs);
- SP--;
- SETs(boolSV((UV)aiv <= buv));
- RETURN;
- }
- }
- }
-#endif
-#ifndef NV_PRESERVES_UV
-#ifdef PERL_PRESERVE_IVUV
- else
-#endif
- if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
- SP--;
- SETs(boolSV(SvRV(TOPs) <= SvRV(TOPp1s)));
- RETURN;
- }
-#endif
- {
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
- dPOPTOPnnrl_nomg;
- if (Perl_isnan(left) || Perl_isnan(right))
- RETSETNO;
- SETs(boolSV(left <= right));
-#else
- dPOPnv_nomg;
- SETs(boolSV(SvNV_nomg(TOPs) <= value));
-#endif
- RETURN;
- }
+ tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
+ right = POPs;
+ left = TOPs;
+ SETs(boolSV(
+ (SvIOK_notUV(left) && SvIOK_notUV(right))
+ ? (SvIVX(left) <= SvIVX(right))
+ : (do_ncmp(left, right) <= 0)
+ ));
+ RETURN;
}
PP(pp_ge)
{
dVAR; dSP;
- tryAMAGICbin_MG(ge_amg,AMGf_set|AMGf_numeric);
-#ifdef PERL_PRESERVE_IVUV
- SvIV_please_nomg(TOPs);
- if (SvIOK(TOPs)) {
- SvIV_please_nomg(TOPm1s);
- if (SvIOK(TOPm1s)) {
- bool auvok = SvUOK(TOPm1s);
- bool buvok = SvUOK(TOPs);
-
- if (!auvok && !buvok) { /* ## IV >= IV ## */
- const IV aiv = SvIVX(TOPm1s);
- const IV biv = SvIVX(TOPs);
+ SV *left, *right;
+
+ tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
+ right = POPs;
+ left = TOPs;
+ SETs(boolSV(
+ (SvIOK_notUV(left) && SvIOK_notUV(right))
+ ? (SvIVX(left) >= SvIVX(right))
+ : ( (do_ncmp(left, right) & 2) == 0)
+ ));
+ RETURN;
+}
- SP--;
- SETs(boolSV(aiv >= biv));
- RETURN;
- }
- if (auvok && buvok) { /* ## UV >= UV ## */
- const UV auv = SvUVX(TOPm1s);
- const UV buv = SvUVX(TOPs);
+PP(pp_ne)
+{
+ dVAR; dSP;
+ SV *left, *right;
+
+ tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
+ right = POPs;
+ left = TOPs;
+ SETs(boolSV(
+ (SvIOK_notUV(left) && SvIOK_notUV(right))
+ ? (SvIVX(left) != SvIVX(right))
+ : (do_ncmp(left, right) != 0)
+ ));
+ RETURN;
+}
- SP--;
- SETs(boolSV(auv >= buv));
- RETURN;
- }
- if (auvok) { /* ## UV >= IV ## */
- UV auv;
- const IV biv = SvIVX(TOPs);
+/* compare left and right SVs. Returns:
+ * -1: <
+ * 0: ==
+ * 1: >
+ * 2: left or right was a NaN
+ */
+I32
+Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
+{
+ dVAR;
- SP--;
- if (biv < 0) {
- /* As (a) is a UV, it's >=0, so it must be >= */
- SETs(&PL_sv_yes);
- RETURN;
+ PERL_ARGS_ASSERT_DO_NCMP;
+#ifdef PERL_PRESERVE_IVUV
+ SvIV_please_nomg(right);
+ /* Fortunately it seems NaN isn't IOK */
+ if (SvIOK(right)) {
+ SvIV_please_nomg(left);
+ if (SvIOK(left)) {
+ if (!SvUOK(left)) {
+ const IV leftiv = SvIVX(left);
+ if (!SvUOK(right)) {
+ /* ## IV <=> IV ## */
+ const IV rightiv = SvIVX(right);
+ return (leftiv > rightiv) - (leftiv < rightiv);
}
- auv = SvUVX(TOPs);
- SETs(boolSV(auv >= (UV)biv));
- RETURN;
- }
- { /* ## IV >= UV ## */
- const IV aiv = SvIVX(TOPm1s);
- UV buv;
-
- if (aiv < 0) {
- /* As (b) is a UV, it's >=0, so a cannot be >= */
- SP--;
- SETs(&PL_sv_no);
- RETURN;
+ /* ## IV <=> UV ## */
+ if (leftiv < 0)
+ /* As (b) is a UV, it's >=0, so it must be < */
+ return -1;
+ {
+ const UV rightuv = SvUVX(right);
+ return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
}
- buv = SvUVX(TOPs);
- SP--;
- SETs(boolSV((UV)aiv >= buv));
- RETURN;
}
- }
- }
-#endif
-#ifndef NV_PRESERVES_UV
-#ifdef PERL_PRESERVE_IVUV
- else
-#endif
- if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
- SP--;
- SETs(boolSV(SvRV(TOPs) >= SvRV(TOPp1s)));
- RETURN;
- }
-#endif
- {
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
- dPOPTOPnnrl_nomg;
- if (Perl_isnan(left) || Perl_isnan(right))
- RETSETNO;
- SETs(boolSV(left >= right));
-#else
- dPOPnv_nomg;
- SETs(boolSV(SvNV_nomg(TOPs) >= value));
-#endif
- RETURN;
- }
-}
-PP(pp_ne)
-{
- dVAR; dSP;
- tryAMAGICbin_MG(ne_amg,AMGf_set|AMGf_numeric);
-#ifndef NV_PRESERVES_UV
- if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
- SP--;
- SETs(boolSV(SvRV(TOPs) != SvRV(TOPp1s)));
- RETURN;
- }
-#endif
-#ifdef PERL_PRESERVE_IVUV
- SvIV_please_nomg(TOPs);
- if (SvIOK(TOPs)) {
- SvIV_please_nomg(TOPm1s);
- if (SvIOK(TOPm1s)) {
- const bool auvok = SvUOK(TOPm1s);
- const bool buvok = SvUOK(TOPs);
-
- if (auvok == buvok) { /* ## IV == IV or UV == UV ## */
- /* Casting IV to UV before comparison isn't going to matter
- on 2s complement. On 1s complement or sign&magnitude
- (if we have any of them) it could make negative zero
- differ from normal zero. As I understand it. (Need to
- check - is negative zero implementation defined behaviour
- anyway?). NWC */
- const UV buv = SvUVX(POPs);
- const UV auv = SvUVX(TOPs);
-
- SETs(boolSV(auv != buv));
- RETURN;
+ if (SvUOK(right)) {
+ /* ## UV <=> UV ## */
+ const UV leftuv = SvUVX(left);
+ const UV rightuv = SvUVX(right);
+ return (leftuv > rightuv) - (leftuv < rightuv);
}
- { /* ## Mixed IV,UV ## */
- IV iv;
- UV uv;
-
- /* != is commutative so swap if needed (save code) */
- if (auvok) {
- /* swap. top of stack (b) is the iv */
- iv = SvIVX(TOPs);
- SP--;
- if (iv < 0) {
- /* As (a) is a UV, it's >0, so it cannot be == */
- SETs(&PL_sv_yes);
- RETURN;
- }
- uv = SvUVX(TOPs);
- } else {
- iv = SvIVX(TOPm1s);
- SP--;
- if (iv < 0) {
- /* As (b) is a UV, it's >0, so it cannot be == */
- SETs(&PL_sv_yes);
- RETURN;
- }
- uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */
+ /* ## UV <=> IV ## */
+ {
+ const IV rightiv = SvIVX(right);
+ if (rightiv < 0)
+ /* As (a) is a UV, it's >=0, so it cannot be < */
+ return 1;
+ {
+ const UV leftuv = SvUVX(left);
+ return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
}
- SETs(boolSV((UV)iv != uv));
- RETURN;
}
+ /* NOTREACHED */
}
}
#endif
{
+ NV const rnv = SvNV_nomg(right);
+ NV const lnv = SvNV_nomg(left);
+
#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
- dPOPTOPnnrl_nomg;
- if (Perl_isnan(left) || Perl_isnan(right))
- RETSETYES;
- SETs(boolSV(left != right));
+ if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
+ return 2;
+ }
+ return (lnv > rnv) - (lnv < rnv);
#else
- dPOPnv_nomg;
- SETs(boolSV(SvNV_nomg(TOPs) != value));
+ if (lnv < rnv)
+ return -1;
+ if (lnv > rnv)
+ return 1;
+ if (lnv == rnv)
+ return 0;
+ return 2;
#endif
- RETURN;
}
}
+
PP(pp_ncmp)
{
- dVAR; dSP; dTARGET;
+ dVAR; dSP;
+ SV *left, *right;
+ I32 value;
tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
-#ifndef NV_PRESERVES_UV
- if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
- const UV right = PTR2UV(SvRV(POPs));
- const UV left = PTR2UV(SvRV(TOPs));
- SETi((left > right) - (left < right));
- RETURN;
- }
-#endif
-#ifdef PERL_PRESERVE_IVUV
- /* Fortunately it seems NaN isn't IOK */
- SvIV_please_nomg(TOPs);
- if (SvIOK(TOPs)) {
- SvIV_please_nomg(TOPm1s);
- if (SvIOK(TOPm1s)) {
- const bool leftuvok = SvUOK(TOPm1s);
- const bool rightuvok = SvUOK(TOPs);
- I32 value;
- if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */
- const IV leftiv = SvIVX(TOPm1s);
- const IV rightiv = SvIVX(TOPs);
-
- if (leftiv > rightiv)
- value = 1;
- else if (leftiv < rightiv)
- value = -1;
- else
- value = 0;
- } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */
- const UV leftuv = SvUVX(TOPm1s);
- const UV rightuv = SvUVX(TOPs);
-
- if (leftuv > rightuv)
- value = 1;
- else if (leftuv < rightuv)
- value = -1;
- else
- value = 0;
- } else if (leftuvok) { /* ## UV <=> IV ## */
- const IV rightiv = SvIVX(TOPs);
- if (rightiv < 0) {
- /* As (a) is a UV, it's >=0, so it cannot be < */
- value = 1;
- } else {
- const UV leftuv = SvUVX(TOPm1s);
- if (leftuv > (UV)rightiv) {
- value = 1;
- } else if (leftuv < (UV)rightiv) {
- value = -1;
- } else {
- value = 0;
- }
- }
- } else { /* ## IV <=> UV ## */
- const IV leftiv = SvIVX(TOPm1s);
- if (leftiv < 0) {
- /* As (b) is a UV, it's >=0, so it must be < */
- value = -1;
- } else {
- const UV rightuv = SvUVX(TOPs);
- if ((UV)leftiv > rightuv) {
- value = 1;
- } else if ((UV)leftiv < rightuv) {
- value = -1;
- } else {
- value = 0;
- }
- }
- }
- SP--;
- SETi(value);
- RETURN;
- }
- }
-#endif
- {
- dPOPTOPnnrl_nomg;
- I32 value;
-
-#ifdef Perl_isnan
- if (Perl_isnan(left) || Perl_isnan(right)) {
- SETs(&PL_sv_undef);
- RETURN;
- }
- value = (left > right) - (left < right);
-#else
- if (left == right)
- value = 0;
- else if (left < right)
- value = -1;
- else if (left > right)
- value = 1;
- else {
+ right = POPs;
+ left = TOPs;
+ value = do_ncmp(left, right);
+ if (value == 2) {
SETs(&PL_sv_undef);
- RETURN;
- }
-#endif
- SETi(value);
- RETURN;
}
+ else {
+ dTARGET;
+ SETi(value);
+ }
+ RETURN;
}
PP(pp_sle)
NV value;
if (MAXARG < 1)
value = 1.0;
+ else if (!TOPs) {
+ value = 1.0; (void)POPs;
+ }
else
value = POPn;
if (value == 0.0)
PP(pp_srand)
{
dVAR; dSP; dTARGET;
- const UV anum = (MAXARG < 1) ? seed() : POPu;
+ const UV anum = (MAXARG < 1 || (!TOPs && !POPs)) ? seed() : POPu;
(void)seedDrand01((Rand_seed_t)anum);
PL_srand_called = TRUE;
if (anum)
IV len_iv = 0;
int len_is_uv = 1;
const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
+ const bool rvalue = (GIMME_V != G_VOID);
const char *tmps;
- const IV arybase = CopARYBASE_get(PL_curcop);
SV *repl_sv = NULL;
const char *repl = NULL;
STRLEN repl_len;
- const int num_args = PL_op->op_private & 7;
+ int num_args = PL_op->op_private & 7;
bool repl_need_utf8_upgrade = FALSE;
bool repl_is_utf8 = FALSE;
if (num_args > 2) {
if (num_args > 3) {
- repl_sv = POPs;
+ if((repl_sv = POPs)) {
repl = SvPV_const(repl_sv, repl_len);
- repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
+ repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
+ }
+ else num_args--;
+ }
+ if ((len_sv = POPs)) {
+ len_iv = SvIV(len_sv);
+ len_is_uv = SvIOK_UV(len_sv);
}
- len_sv = POPs;
- len_iv = SvIV(len_sv);
- len_is_uv = SvIOK_UV(len_sv);
+ else num_args--;
}
pos_sv = POPs;
pos1_iv = SvIV(pos_sv);
else
utf8_curlen = 0;
- if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
- UV pos1_uv = pos1_iv-arybase;
- /* Overflow can occur when $[ < 0 */
- if (arybase < 0 && pos1_uv < (UV)pos1_iv)
- goto bound_fail;
- pos1_iv = pos1_uv;
- pos1_is_uv = 1;
- }
- else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
- goto bound_fail; /* $[=3; substr($_,2,...) */
- }
- else { /* pos < $[ */
- if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
- pos1_iv = curlen;
- pos1_is_uv = 1;
- } else {
- if (curlen) {
- pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
- pos1_iv += curlen;
- }
- }
- }
- if (pos1_is_uv || pos1_iv > 0) {
- if ((UV)pos1_iv > curlen)
- goto bound_fail;
+ if (!pos1_is_uv && pos1_iv < 0 && curlen) {
+ pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
+ pos1_iv += curlen;
}
+ if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
+ goto bound_fail;
if (num_args > 2) {
if (!len_is_uv && len_iv < 0) {
RETURN;
}
- SvTAINTED_off(TARG); /* decontaminate */
- SvUTF8_off(TARG); /* decontaminate */
-
tmps += byte_pos;
- sv_setpvn(TARG, tmps, byte_len);
+
+ if (rvalue) {
+ SvTAINTED_off(TARG); /* decontaminate */
+ SvUTF8_off(TARG); /* decontaminate */
+ sv_setpvn(TARG, tmps, byte_len);
#ifdef USE_LOCALE_COLLATE
- sv_unmagic(TARG, PERL_MAGIC_collxfrm);
+ sv_unmagic(TARG, PERL_MAGIC_collxfrm);
#endif
- if (utf8_curlen)
- SvUTF8_on(TARG);
+ if (utf8_curlen)
+ SvUTF8_on(TARG);
+ }
if (repl) {
SV* repl_sv_copy = NULL;
repl_sv_copy = newSVsv(repl_sv);
sv_utf8_upgrade(repl_sv_copy);
repl = SvPV_const(repl_sv_copy, repl_len);
- repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
+ repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len;
}
if (!SvOK(sv))
sv_setpvs(sv, "");
}
}
SPAGAIN;
- SvSETMAGIC(TARG);
- PUSHs(TARG);
+ if (rvalue) {
+ SvSETMAGIC(TARG);
+ PUSHs(TARG);
+ }
RETURN;
bound_fail:
I32 retval;
const char *big_p;
const char *little_p;
- const I32 arybase = CopARYBASE_get(PL_curcop);
bool big_utf8;
bool little_utf8;
const bool is_index = PL_op->op_type == OP_INDEX;
+ const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
- if (MAXARG >= 3) {
- /* arybase is in characters, like offset, so combine prior to the
- UTF-8 to bytes calculation. */
- offset = POPi - arybase;
- }
+ if (threeargs)
+ offset = POPi;
little = POPs;
big = POPs;
big_p = SvPV_const(big, biglen);
little_p = SvPVX(little);
}
- if (MAXARG < 3)
+ if (!threeargs)
offset = is_index ? 0 : biglen;
else {
if (big_utf8 && offset > 0)
}
SvREFCNT_dec(temp);
fail:
- PUSHi(retval + arybase);
+ PUSHi(retval);
RETURN;
}
/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
* most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
-/* Below are several macros that generate code */
/* Generates code to store a unicode codepoint c that is known to occupy
- * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
-#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
- STMT_START { \
- *(p) = UTF8_TWO_BYTE_HI(c); \
- *((p)+1) = UTF8_TWO_BYTE_LO(c); \
- } STMT_END
-
-/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
- * available byte after the two bytes */
+ * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
+ * and p is advanced to point to the next available byte after the two bytes */
#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
STMT_START { \
*(p)++ = UTF8_TWO_BYTE_HI(c); \
*((p)++) = UTF8_TWO_BYTE_LO(c); \
} STMT_END
-/* Generates code to store the upper case of latin1 character l which is known
- * to have its upper case be non-latin1 into the two bytes p and p+1. There
- * are only two characters that fit this description, and this macro knows
- * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
- * bytes */
-#define STORE_NON_LATIN1_UC(p, l) \
-STMT_START { \
- if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
- STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
- } else { /* Must be the following letter */ \
- STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
- } \
-} STMT_END
-
-/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
- * after the character stored */
-#define CAT_NON_LATIN1_UC(p, l) \
-STMT_START { \
- if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
- CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
- } else { \
- CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
- } \
-} STMT_END
-
-/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
- * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
- * and must require two bytes to store it. Advances p to point to the next
- * available position */
-#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
-STMT_START { \
- if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
- CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
- } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
- *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
- } else {/* else is one of the other two special cases */ \
- CAT_NON_LATIN1_UC((p), (l)); \
- } \
-} STMT_END
-
PP(pp_ucfirst)
{
/* Actually is both lcfirst() and ucfirst(). Only the first character
if (! slen) { /* If empty */
need = 1; /* still need a trailing NUL */
+ ulen = 0;
}
else if (DO_UTF8(source)) { /* Is the source utf8? */
doing_utf8 = TRUE;
+ ulen = UTF8SKIP(s);
+ if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
+ else toLOWER_utf8(s, tmpbuf, &tculen);
-/* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
- * and doesn't allow for the user to specify their own. When code is added to
- * detect if there is a user-defined mapping in force here, and if so to use
- * that, then the code below can be compiled. The detection would be a good
- * thing anyway, as currently the user-defined mappings only work on utf8
- * strings, and thus depend on the chosen internal storage method, which is a
- * bad thing */
-#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
- if (UTF8_IS_INVARIANT(*s)) {
-
- /* An invariant source character is either ASCII or, in EBCDIC, an
- * ASCII equivalent or a caseless C1 control. In both these cases,
- * the lower and upper cases of any character are also invariants
- * (and title case is the same as upper case). So it is safe to
- * use the simple case change macros which avoid the overhead of
- * the general functions. Note that if perl were to be extended to
- * do locale handling in UTF-8 strings, this wouldn't be true in,
- * for example, Lithuanian or Turkic. */
- *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
- tculen = ulen = 1;
- need = slen + 1;
- }
- else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- U8 chr;
-
- /* Similarly, if the source character isn't invariant but is in the
- * latin1 range (or EBCDIC equivalent thereof), we have the case
- * changes compiled into perl, and can avoid the overhead of the
- * general functions. In this range, the characters are stored as
- * two UTF-8 bytes, and it so happens that any changed-case version
- * is also two bytes (in both ASCIIish and EBCDIC machines). */
- tculen = ulen = 2;
- need = slen + 1;
-
- /* Convert the two source bytes to a single Unicode code point
- * value, change case and save for below */
- chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
- if (op_type == OP_LCFIRST) { /* lower casing is easy */
- U8 lower = toLOWER_LATIN1(chr);
- STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
- }
- else { /* ucfirst */
- U8 upper = toUPPER_LATIN1_MOD(chr);
-
- /* Most of the latin1 range characters are well-behaved. Their
- * title and upper cases are the same, and are also in the
- * latin1 range. The macro above returns their upper (hence
- * title) case, and all that need be done is to save the result
- * for below. However, several characters are problematic, and
- * have to be handled specially. The MOD in the macro name
- * above means that these tricky characters all get mapped to
- * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
- * This mapping saves some tests for the majority of the
- * characters */
-
- if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
-
- /* Not tricky. Just save it. */
- STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
- }
- else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
-
- /* This one is tricky because it is two characters long,
- * though the UTF-8 is still two bytes, so the stored
- * length doesn't change */
- *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
- *(tmpbuf + 1) = 's';
- }
- else {
-
- /* The other two have their title and upper cases the same,
- * but are tricky because the changed-case characters
- * aren't in the latin1 range. They, however, do fit into
- * two UTF-8 bytes */
- STORE_NON_LATIN1_UC(tmpbuf, chr);
- }
- }
- }
- else {
-#endif /* end of dont want to break user-defined casing */
-
- /* Here, can't short-cut the general case */
-
- utf8_to_uvchr(s, &ulen);
- if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
- else toLOWER_utf8(s, tmpbuf, &tculen);
-
- /* we can't do in-place if the length changes. */
- if (ulen != tculen) inplace = FALSE;
- need = slen + 1 - ulen + tculen;
-#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
- }
-#endif
+ /* we can't do in-place if the length changes. */
+ if (ulen != tculen) inplace = FALSE;
+ need = slen + 1 - ulen + tculen;
}
else { /* Non-zero length, non-UTF-8, Need to consider locale and if
* latin1 is treated as caseless. Note that a locale takes
* precedence */
+ ulen = 1; /* Original character is 1 byte */
tculen = 1; /* Most characters will require one byte, but this will
* need to be overridden for the tricky ones */
need = slen + 1;
* native function does */
}
else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
- *tmpbuf = toUPPER_LATIN1_MOD(*s);
-
- /* tmpbuf now has the correct title case for all latin1 characters
- * except for the several ones that have tricky handling. All
- * of these are mapped by the MOD to the letter below. */
- if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
-
- /* The length is going to change, with all three of these, so
- * can't replace just the first character */
- inplace = FALSE;
-
- /* We use the original to distinguish between these tricky
- * cases */
- if (*s == LATIN_SMALL_LETTER_SHARP_S) {
- /* Two character title case 'Ss', but can remain non-UTF-8 */
- need = slen + 2;
- *tmpbuf = 'S';
- *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
- tculen = 2;
+ UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
+ if (tculen > 1) {
+ assert(tculen == 2);
+
+ /* If the result is an upper Latin1-range character, it can
+ * still be represented in one byte, which is its ordinal */
+ if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
+ *tmpbuf = (U8) title_ord;
+ tculen = 1;
}
else {
-
- /* The other two tricky ones have their title case outside
- * latin1. It is the same as their upper case. */
- doing_utf8 = TRUE;
- STORE_NON_LATIN1_UC(tmpbuf, *s);
-
- /* The UTF-8 and UTF-EBCDIC lengths of both these characters
- * and their upper cases is 2. */
- tculen = ulen = 2;
-
- /* The entire result will have to be in UTF-8. Assume worst
- * case sizing in conversion. (all latin1 characters occupy
- * at most two bytes in utf8) */
- convert_source_to_utf8 = TRUE;
- need = slen * 2 + 1;
+ /* Otherwise it became more than one ASCII character (in
+ * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
+ * beyond Latin1, so the number of bytes changed, so can't
+ * replace just the first character in place. */
+ inplace = FALSE;
+
+ /* If the result won't fit in a byte, the entire result will
+ * have to be in UTF-8. Assume worst case sizing in
+ * conversion. (all latin1 characters occupy at most two bytes
+ * in utf8) */
+ if (title_ord > 255) {
+ doing_utf8 = TRUE;
+ convert_source_to_utf8 = TRUE;
+ need = slen * 2 + 1;
+
+ /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
+ * (both) characters whose title case is above 255 is
+ * 2. */
+ ulen = 2;
+ }
+ else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
+ need = slen + 1 + 1;
+ }
}
- } /* End of is one of the three special chars */
+ }
} /* End of use Unicode (Latin1) semantics */
} /* End of changing the case of the first character */
bool in_iota_subscript = FALSE;
while (s < send) {
+ STRLEN u;
+ STRLEN ulen;
+ UV uv;
if (in_iota_subscript && ! is_utf8_mark(s)) {
+
/* A non-mark. Time to output the iota subscript */
#define GREEK_CAPITAL_LETTER_IOTA 0x0399
#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
in_iota_subscript = FALSE;
- }
-
-
-/* See comments at the first instance in this file of this ifdef */
-#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
-
- /* If the UTF-8 character is invariant, then it is in the range
- * known by the standard macro; result is only one byte long */
- if (UTF8_IS_INVARIANT(*s)) {
- *d++ = toUPPER(*s);
- s++;
- }
- else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-
- /* Likewise, if it fits in a byte, its case change is in our
- * table */
- U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
- U8 upper = toUPPER_LATIN1_MOD(orig);
- CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
- s++;
- }
- else {
-#else
- {
-#endif
-
- /* Otherwise, need the general UTF-8 case. Get the changed
- * case value and copy it to the output buffer */
+ }
- const STRLEN u = UTF8SKIP(s);
- STRLEN ulen;
+ /* Then handle the current character. Get the changed case value
+ * and copy it to the output buffer */
- const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
- if (uv == GREEK_CAPITAL_LETTER_IOTA
- && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
- {
- in_iota_subscript = TRUE;
- }
- else {
- if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
- /* If the eventually required minimum size outgrows
- * the available space, we need to grow. */
- const UV o = d - (U8*)SvPVX_const(dest);
-
- /* If someone uppercases one million U+03B0s we
- * SvGROW() one million times. Or we could try
- * guessing how much to allocate without allocating too
- * much. Such is life. See corresponding comment in
- * lc code for another option */
- SvGROW(dest, min);
- d = (U8*)SvPVX(dest) + o;
- }
- Copy(tmpbuf, d, ulen, U8);
- d += ulen;
- }
- s += u;
- }
+ u = UTF8SKIP(s);
+ uv = toUPPER_utf8(s, tmpbuf, &ulen);
+ if (uv == GREEK_CAPITAL_LETTER_IOTA
+ && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
+ {
+ in_iota_subscript = TRUE;
+ }
+ else {
+ if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+ /* If the eventually required minimum size outgrows the
+ * available space, we need to grow. */
+ const UV o = d - (U8*)SvPVX_const(dest);
+
+ /* If someone uppercases one million U+03B0s we SvGROW()
+ * one million times. Or we could try guessing how much to
+ * allocate without allocating too much. Such is life.
+ * See corresponding comment in lc code for another option
+ * */
+ SvGROW(dest, min);
+ d = (U8*)SvPVX(dest) + o;
+ }
+ Copy(tmpbuf, d, ulen, U8);
+ d += ulen;
+ }
+ s += u;
}
if (in_iota_subscript) {
CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
else {
for (; s < send; d++, s++) {
*d = toUPPER_LATIN1_MOD(*s);
- if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
+ if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) continue;
/* The mainstream case is the tight loop above. To avoid
* extra tests in that, all three characters that require
(send -s) * 2 + 1);
d = (U8*)SvPVX(dest) + len;
- /* And append the current character's upper case in UTF-8 */
- CAT_NON_LATIN1_UC(d, *s);
-
/* Now process the remainder of the source, converting to
* upper and UTF-8. If a resulting byte is invariant in
* UTF-8, output it as-is, otherwise convert to UTF-8 and
* append it to the output. */
-
- s++;
for (; s < send; s++) {
- U8 upper = toUPPER_LATIN1_MOD(*s);
- if UTF8_IS_INVARIANT(upper) {
- *d++ = upper;
- }
- else {
- CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
- }
+ (void) _to_upper_title_latin1(*s, d, &len, 'S');
+ d += len;
}
/* Here have processed the whole source; no need to continue
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
while (s < send) {
-/* See comments at the first instance in this file of this ifdef */
-#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
- if (UTF8_IS_INVARIANT(*s)) {
+ const STRLEN u = UTF8SKIP(s);
+ STRLEN ulen;
- /* Invariant characters use the standard mappings compiled in.
- */
- *d++ = toLOWER(*s);
- s++;
- }
- else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-
- /* As do the ones in the Latin1 range */
- U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
- CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
- s++;
- }
- else {
-#endif
- /* Here, is utf8 not in Latin-1 range, have to go out and get
- * the mappings from the tables. */
+ toLOWER_utf8(s, tmpbuf, &ulen);
- const STRLEN u = UTF8SKIP(s);
- STRLEN ulen;
+ /* Here is where we would do context-sensitive actions. See the
+ * commit message for this comment for why there isn't any */
-#ifndef CONTEXT_DEPENDENT_CASING
- toLOWER_utf8(s, tmpbuf, &ulen);
-#else
-/* This is ifdefd out because it needs more work and thought. It isn't clear
- * that we should do it.
- * A minor objection is that this is based on a hard-coded rule from the
- * Unicode standard, and may change, but this is not very likely at all.
- * mktables should check and warn if it does.
- * More importantly, if the sigma occurs at the end of the string, we don't
- * have enough context to know whether it is part of a larger string or going
- * to be or not. It may be that we are passed a subset of the context, via
- * a \U...\E, for example, and we could conceivably know the larger context if
- * code were changed to pass that in. But, if the string passed in is an
- * intermediate result, and the user concatenates two strings together
- * after we have made a final sigma, that would be wrong. If the final sigma
- * occurs in the middle of the string we are working on, then we know that it
- * should be a final sigma, but otherwise we can't be sure. */
-
- const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
-
- /* If the lower case is a small sigma, it may be that we need
- * to change it to a final sigma. This happens at the end of
- * a word that contains more than just this character, and only
- * when we started with a capital sigma. */
- if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
- s > send - len && /* Makes sure not the first letter */
- utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
- ) {
-
- /* We use the algorithm in:
- * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
- * is a CAPITAL SIGMA): If C is preceded by a sequence
- * consisting of a cased letter and a case-ignorable
- * sequence, and C is not followed by a sequence consisting
- * of a case ignorable sequence and then a cased letter,
- * then when lowercasing C, C becomes a final sigma */
-
- /* To determine if this is the end of a word, need to peek
- * ahead. Look at the next character */
- const U8 *peek = s + u;
-
- /* Skip any case ignorable characters */
- while (peek < send && is_utf8_case_ignorable(peek)) {
- peek += UTF8SKIP(peek);
- }
+ if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
- /* If we reached the end of the string without finding any
- * non-case ignorable characters, or if the next such one
- * is not-cased, then we have met the conditions for it
- * being a final sigma with regards to peek ahead, and so
- * must do peek behind for the remaining conditions. (We
- * know there is stuff behind to look at since we tested
- * above that this isn't the first letter) */
- if (peek >= send || ! is_utf8_cased(peek)) {
- peek = utf8_hop(s, -1);
-
- /* Here are at the beginning of the first character
- * before the original upper case sigma. Keep backing
- * up, skipping any case ignorable characters */
- while (is_utf8_case_ignorable(peek)) {
- peek = utf8_hop(peek, -1);
- }
+ /* If the eventually required minimum size outgrows the
+ * available space, we need to grow. */
+ const UV o = d - (U8*)SvPVX_const(dest);
- /* Here peek points to the first byte of the closest
- * non-case-ignorable character before the capital
- * sigma. If it is cased, then by the Unicode
- * algorithm, we should use a small final sigma instead
- * of what we have */
- if (is_utf8_cased(peek)) {
- STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
- UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
- }
- }
- }
- else { /* Not a context sensitive mapping */
-#endif /* End of commented out context sensitive */
- if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
-
- /* If the eventually required minimum size outgrows
- * the available space, we need to grow. */
- const UV o = d - (U8*)SvPVX_const(dest);
-
- /* If someone lowercases one million U+0130s we
- * SvGROW() one million times. Or we could try
- * guessing how much to allocate without allocating too
- * much. Such is life. Another option would be to
- * grow an extra byte or two more each time we need to
- * grow, which would cut down the million to 500K, with
- * little waste */
- SvGROW(dest, min);
- d = (U8*)SvPVX(dest) + o;
- }
-#ifdef CONTEXT_DEPENDENT_CASING
- }
-#endif
- /* Copy the newly lowercased letter to the output buffer we're
- * building */
- Copy(tmpbuf, d, ulen, U8);
- d += ulen;
- s += u;
-#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
+ /* If someone lowercases one million U+0130s we SvGROW() one
+ * million times. Or we could try guessing how much to
+ * allocate without allocating too much. Such is life.
+ * Another option would be to grow an extra byte or two more
+ * each time we need to grow, which would cut down the million
+ * to 500K, with little waste */
+ SvGROW(dest, min);
+ d = (U8*)SvPVX(dest) + o;
}
-#endif
+
+ /* Copy the newly lowercased letter to the output buffer we're
+ * building */
+ Copy(tmpbuf, d, ulen, U8);
+ d += ulen;
+ s += u;
} /* End of looping through the source string */
SvUTF8_on(dest);
*d = '\0';
register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
if (SvTYPE(av) == SVt_PVAV) {
- const I32 arybase = CopARYBASE_get(PL_curcop);
const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
bool can_preserve = FALSE;
I32 elem = SvIV(*MARK);
bool preeminent = TRUE;
- if (elem > 0)
- elem -= arybase;
if (localizing && can_preserve) {
/* If we can determine whether the element exist,
* Try to preserve the existenceness of a tied array
}
EXTEND(SP, 2);
- mPUSHi(CopARYBASE_get(PL_curcop) + current);
+ mPUSHi(current);
if (gimme == G_ARRAY) {
SV **const element = av_fetch(array, current, 0);
PUSHs(element ? *element : &PL_sv_undef);
}
else if (gimme == G_ARRAY) {
IV n = Perl_av_len(aTHX_ array);
- IV i = CopARYBASE_get(PL_curcop);
+ IV i;
EXTEND(SP, n + 1);
if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
- n += i;
- for (; i <= n; i++) {
+ for (i = 0; i <= n; i++) {
mPUSHi(i);
}
}
SV ** const lastlelem = PL_stack_base + POPMARK;
SV ** const firstlelem = PL_stack_base + POPMARK + 1;
register SV ** const firstrelem = lastlelem + 1;
- const I32 arybase = CopARYBASE_get(PL_curcop);
I32 is_something_there = FALSE;
register const I32 max = lastrelem - lastlelem;
I32 ix = SvIV(*lastlelem);
if (ix < 0)
ix += max;
- else
- ix -= arybase;
if (ix < 0 || ix >= max)
*firstlelem = &PL_sv_undef;
else
I32 ix = SvIV(*lelem);
if (ix < 0)
ix += max;
- else
- ix -= arybase;
if (ix < 0 || ix >= max)
*lelem = &PL_sv_undef;
else {
PP(pp_splice)
{
dVAR; dSP; dMARK; dORIGMARK;
+ int num_args = (SP - MARK);
register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
register SV **src;
register SV **dst;
offset = i = SvIV(*MARK);
if (offset < 0)
offset += AvFILLp(ary) + 1;
- else
- offset -= CopARYBASE_get(PL_curcop);
if (offset < 0)
DIE(aTHX_ PL_no_aelem, i);
if (++MARK < SP) {
length = AvMAX(ary) + 1;
}
if (offset > AvFILLp(ary) + 1) {
- Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
+ if (num_args > 2)
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
offset = AvFILLp(ary) + 1;
}
after = AvFILLp(ary) + 1 - (offset + length);
I32 rex_return;
PUTBACK;
rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
- sv, NULL, 0);
+ sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
SPAGAIN;
if (rex_return == 0)
break;
dSP;
dTOPss;
SV *retsv = sv;
- assert(SvTYPE(retsv) != SVt_PVCV);
SvLOCK(sv);
- if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
+ if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
+ || SvTYPE(retsv) == SVt_PVCV) {
retsv = refto(retsv);
}
SETs(retsv);
RETURN;
}
+/* For sorting out arguments passed to a &CORE:: subroutine */
+PP(pp_coreargs)
+{
+ dSP;
+ int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
+ int defgv = PL_opargs[opnum] & OA_DEFGV, whicharg = 0;
+ AV * const at_ = GvAV(PL_defgv);
+ SV **svp = AvARRAY(at_);
+ I32 minargs = 0, maxargs = 0, numargs = AvFILLp(at_)+1;
+ I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
+ bool seen_question = 0;
+ const char *err = NULL;
+ const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK;
+
+ /* Count how many args there are first, to get some idea how far to
+ extend the stack. */
+ while (oa) {
+ if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; }
+ maxargs++;
+ if (oa & OA_OPTIONAL) seen_question = 1;
+ if (!seen_question) minargs++;
+ oa >>= 4;
+ }
+
+ if(numargs < minargs) err = "Not enough";
+ else if(numargs > maxargs) err = "Too many";
+ if (err)
+ /* diag_listed_as: Too many arguments for %s */
+ Perl_croak(aTHX_
+ "%s arguments for %s", err,
+ opnum ? OP_DESC(PL_op->op_next) : SvPV_nolen_const(cSVOP_sv)
+ );
+
+ /* Reset the stack pointer. Without this, we end up returning our own
+ arguments in list context, in addition to the values we are supposed
+ to return. nextstate usually does this on sub entry, but we need
+ to run the next op with the caller's hints, so we cannot have a
+ nextstate. */
+ SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp;
+
+ if(!maxargs) RETURN;
+
+ /* We do this here, rather than with a separate pushmark op, as it has
+ to come in between two things this function does (stack reset and
+ arg pushing). This seems the easiest way to do it. */
+ if (pushmark) {
+ PUTBACK;
+ (void)Perl_pp_pushmark(aTHX);
+ }
+
+ EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs);
+ PUTBACK; /* The code below can die in various places. */
+
+ oa = PL_opargs[opnum] >> OASHIFT;
+ for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) {
+ whicharg++;
+ switch (oa & 7) {
+ case OA_SCALAR:
+ if (!numargs && defgv && whicharg == minargs + 1) {
+ PERL_SI * const oldsi = PL_curstackinfo;
+ I32 const oldcxix = oldsi->si_cxix;
+ CV *caller;
+ if (oldcxix) oldsi->si_cxix--;
+ else PL_curstackinfo = oldsi->si_prev;
+ caller = find_runcv(NULL);
+ PL_curstackinfo = oldsi;
+ oldsi->si_cxix = oldcxix;
+ PUSHs(find_rundefsv2(
+ caller,cxstack[cxstack_ix].blk_oldcop->cop_seq
+ ));
+ }
+ else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL);
+ break;
+ case OA_LIST:
+ while (numargs--) {
+ PUSHs(svp && *svp ? *svp : &PL_sv_undef);
+ svp++;
+ }
+ RETURN;
+ case OA_HVREF:
+ if (!svp || !*svp || !SvROK(*svp)
+ || SvTYPE(SvRV(*svp)) != SVt_PVHV)
+ DIE(aTHX_
+ /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
+ "Type of arg %d to &CORE::%s must be hash reference",
+ whicharg, OP_DESC(PL_op->op_next)
+ );
+ PUSHs(SvRV(*svp));
+ break;
+ case OA_FILEREF:
+ if (!numargs) PUSHs(NULL);
+ else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp)))
+ /* no magic here, as the prototype will have added an extra
+ refgen and we just want what was there before that */
+ PUSHs(SvRV(*svp));
+ else {
+ const bool constr = PL_op->op_private & whicharg;
+ PUSHs(S_rv2gv(aTHX_
+ svp && *svp ? *svp : &PL_sv_undef,
+ constr, CopHINTS_get(PL_curcop) & HINT_STRICT_REFS,
+ !constr
+ ));
+ }
+ break;
+ case OA_SCALARREF:
+ {
+ const bool wantscalar =
+ PL_op->op_private & OPpCOREARGS_SCALARMOD;
+ if (!svp || !*svp || !SvROK(*svp)
+ /* We have to permit globrefs even for the \$ proto, as
+ *foo is indistinguishable from ${\*foo}, and the proto-
+ type permits the latter. */
+ || SvTYPE(SvRV(*svp)) > (
+ wantscalar ? SVt_PVLV
+ : opnum == OP_LOCK ? SVt_PVCV
+ : SVt_PVHV
+ )
+ )
+ DIE(aTHX_
+ /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
+ "Type of arg %d to &CORE::%s must be %s",
+ whicharg, OP_DESC(PL_op->op_next),
+ wantscalar
+ ? "scalar reference"
+ : opnum == OP_LOCK
+ ? "reference to one of [$@%&*]"
+ : "reference to one of [$@%*]"
+ );
+ PUSHs(SvRV(*svp));
+ break;
+ }
+ default:
+ DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7));
+ }
+ oa = oa >> 4;
+ }
+
+ RETURN;
+}
+
+PP(pp_runcv)
+{
+ dSP;
+ CV *cv;
+ if (PL_op->op_private & OPpOFFBYONE) {
+ PERL_SI * const oldsi = PL_curstackinfo;
+ I32 const oldcxix = oldsi->si_cxix;
+ if (oldcxix) oldsi->si_cxix--;
+ else PL_curstackinfo = oldsi->si_prev;
+ cv = find_runcv(NULL);
+ PL_curstackinfo = oldsi;
+ oldsi->si_cxix = oldcxix;
+ }
+ else cv = find_runcv(NULL);
+ XPUSHs(CvUNIQUE(cv) ? &PL_sv_undef : sv_2mortal(newRV((SV *)cv)));
+ RETURN;
+}
+
+
/*
* Local variables:
* c-indentation-style: bsd