This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Better document setlocale, "use locale" interactions
[perl5.git] / pp.c
diff --git a/pp.c b/pp.c
index 6da9970..e7e06ff 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -47,6 +47,9 @@ extern Pid_t getpid (void);
     _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
 #endif
 
+static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
+static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
+
 /* variations on pp_null */
 
 PP(pp_stub)
@@ -65,8 +68,8 @@ PP(pp_padav)
     dVAR; dSP; dTARGET;
     I32 gimme;
     assert(SvTYPE(TARG) == SVt_PVAV);
-    if (PL_op->op_private & OPpLVAL_INTRO)
-       if (!(PL_op->op_private & OPpPAD_STATE))
+    if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
+       if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
            SAVECLEARSV(PAD_SVl(PL_op->op_targ));
     EXTEND(SP, 1);
     if (PL_op->op_flags & OPf_REF) {
@@ -115,8 +118,8 @@ PP(pp_padhv)
 
     assert(SvTYPE(TARG) == SVt_PVHV);
     XPUSHs(TARG);
-    if (PL_op->op_private & OPpLVAL_INTRO)
-       if (!(PL_op->op_private & OPpPAD_STATE))
+    if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
+       if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
            SAVECLEARSV(PAD_SVl(PL_op->op_targ));
     if (PL_op->op_flags & OPf_REF)
        RETURN;
@@ -234,8 +237,10 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
                        Perl_croak_no_modify();
                    if (cUNOP->op_targ) {
                        SV * const namesv = PAD_SV(cUNOP->op_targ);
+                       HV *stash = CopSTASH(PL_curcop);
+                       if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
                        gv = MUTABLE_GV(newSV(0));
-                       gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0);
+                       gv_init_sv(gv, stash, namesv, 0);
                    }
                    else {
                        const char * const name = CopSTASHPV(PL_curcop);
@@ -487,11 +492,8 @@ PP(pp_prototype)
        if (strnEQ(s, "CORE::", 6)) {
            const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
            if (!code || code == -KEY_CORE)
-               DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"",
-                   SVfARG(newSVpvn_flags(
-                       s+6, SvCUR(TOPs)-6,
-                       (SvFLAGS(TOPs) & SVf_UTF8)|SVs_TEMP
-                   )));
+               DIE(aTHX_ "Can't find an opnumber for \"%"UTF8f"\"",
+                  UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6));
            {
                SV * const sv = core_prototype(NULL, s + 6, code, NULL);
                if (sv) ret = sv;
@@ -585,10 +587,8 @@ PP(pp_ref)
     dVAR; dSP; dTARGET;
     SV * const sv = POPs;
 
-    if (sv)
-       SvGETMAGIC(sv);
-
-    if (!sv || !SvROK(sv))
+    SvGETMAGIC(sv);
+    if (!SvROK(sv))
        RETPUSHNO;
 
     (void)sv_ref(TARG,SvRV(sv),TRUE);
@@ -602,8 +602,12 @@ PP(pp_bless)
     HV *stash;
 
     if (MAXARG == 1)
+    {
       curstash:
        stash = CopSTASH(PL_curcop);
+       if (SvTYPE(stash) != SVt_PVHV)
+           Perl_croak(aTHX_ "Attempt to bless into a freed package");
+    }
     else {
        SV * const ssv = POPs;
        STRLEN len;
@@ -969,7 +973,12 @@ PP(pp_undef)
                           "Constant subroutine %"SVf" undefined",
                           SVfARG(CvANON((const CV *)sv)
                              ? newSVpvs_flags("(anonymous)", SVs_TEMP)
-                             : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
+                             : sv_2mortal(newSVhek(
+                                CvNAMED(sv)
+                                 ? CvNAME_HEK((CV *)sv)
+                                 : GvENAME_HEK(CvGV((const CV *)sv))
+                               ))
+                           ));
        /* FALLTHROUGH */
     case SVt_PVFM:
        {
@@ -1654,7 +1663,7 @@ PP(pp_repeat)
        MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
        /* Did the max computation overflow? */
        if (items > 0 && max > 0 && (max < items || max < count))
-          Perl_croak(aTHX_ oom_list_extend);
+          Perl_croak(aTHX_ "%s", oom_list_extend);
        MEXTEND(MARK, max);
        if (count > 1) {
            while (SP > MARK) {
@@ -1712,7 +1721,7 @@ PP(pp_repeat)
            else {
                const STRLEN max = (UV)count * len;
                if (len > MEM_SIZE_MAX / count)
-                    Perl_croak(aTHX_ oom_string_extend);
+                    Perl_croak(aTHX_ "%s", oom_string_extend);
                MEM_WRAP_CHECK_1(max, char, oom_string_extend);
                SvGROW(TARG, max + 1);
                repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
@@ -3430,15 +3439,6 @@ PP(pp_crypt)
 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level.  So 
  * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
 
-/* Generates code to store a unicode codepoint c that is known to occupy
- * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
- * and p is advanced to point to the next available byte after the two bytes */
-#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c)                                     \
-    STMT_START {                                                           \
-       *(p)++ = UTF8_TWO_BYTE_HI(c);                                       \
-       *((p)++) = UTF8_TWO_BYTE_LO(c);                                     \
-    } STMT_END
-
 PP(pp_ucfirst)
 {
     /* Actually is both lcfirst() and ucfirst().  Only the first character
@@ -3740,7 +3740,7 @@ PP(pp_uc)
 
     if (DO_UTF8(source)) {
        const U8 *const send = s + len;
-       U8 tmpbuf[UTF8_MAXBYTES+1];
+       U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
        bool tainted = FALSE;
 
        /* All occurrences of these are to be moved to follow any other marks.
@@ -3762,10 +3762,8 @@ PP(pp_uc)
            if (in_iota_subscript && ! _is_utf8_mark(s)) {
 
                /* A non-mark.  Time to output the iota subscript */
-#define GREEK_CAPITAL_LETTER_IOTA 0x0399
-#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
-
-               CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+               Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+                d += capital_iota_len;
                in_iota_subscript = FALSE;
             }
 
@@ -3775,6 +3773,8 @@ PP(pp_uc)
             u = UTF8SKIP(s);
             uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
                                      cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#define GREEK_CAPITAL_LETTER_IOTA 0x0399
+#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
             if (uv == GREEK_CAPITAL_LETTER_IOTA
                 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
             {
@@ -3800,7 +3800,8 @@ PP(pp_uc)
             s += u;
        }
        if (in_iota_subscript) {
-           CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+            Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+            d += capital_iota_len;
        }
        SvUTF8_on(dest);
        *d = '\0';
@@ -4142,7 +4143,7 @@ PP(pp_fc)
     const U8 *s;
     const U8 *send;
     U8 *d;
-    U8 tmpbuf[UTF8_MAXBYTES * UTF8_MAX_FOLD_CHAR_EXPAND + 1];
+    U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
     const bool full_folding = TRUE;
     const U8 flags = ( full_folding      ? FOLD_FLAGS_FULL   : 0 )
                    | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 );
@@ -4198,23 +4199,20 @@ PP(pp_fc)
        }
     } /* Unflagged string */
     else if (len) {
-        /* For locale, bytes, and nothing, the behavior is supposed to be the
-         * same as lc().
-         */
         if ( IN_LOCALE_RUNTIME ) { /* Under locale */
             TAINT;
             SvTAINTED_on(dest);
             for (; s < send; d++, s++)
-                *d = toLOWER_LC(*s);
+                *d = toFOLD_LC(*s);
         }
         else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
             for (; s < send; d++, s++)
-                *d = toLOWER(*s);
+                *d = toFOLD(*s);
         }
         else {
             /* For ASCII and the Latin-1 range, there's only two troublesome
              * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
-             * casefolding becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which
+             * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
              * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
              * For the rest, the casefold is their lowercase.  */
             for (; s < send; d++, s++) {
@@ -4234,7 +4232,8 @@ PP(pp_fc)
                                                 (send -s) * 2 + 1);
                     d = (U8*)SvPVX(dest) + len;
 
-                    CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU);
+                    Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
+                    d += small_mu_len;
                     s++;
                     for (; s < send; s++) {
                         STRLEN ulen;
@@ -4518,7 +4517,8 @@ S_do_delete_local(pTHX)
                }
                else {
                    sv = hv_delete_ent(hv, keysv, 0, 0);
-                   SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+                   if (preeminent)
+                       SvREFCNT_inc_simple_void(sv); /* De-mortalize */
                }
                if (preeminent) {
                    if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
@@ -4553,7 +4553,8 @@ S_do_delete_local(pTHX)
                    }
                    else {
                        sv = av_delete(av, idx, 0);
-                       SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+                       if (preeminent)
+                          SvREFCNT_inc_simple_void(sv); /* De-mortalize */
                    }
                    if (preeminent) {
                        save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
@@ -4665,7 +4666,7 @@ PP(pp_exists)
     SV *tmpsv;
     HV *hv;
 
-    if (PL_op->op_private & OPpEXISTS_SUB) {
+    if (UNLIKELY( PL_op->op_private & OPpEXISTS_SUB )) {
        GV *gv;
        SV * const sv = POPs;
        CV * const cv = sv_2cv(sv, &hv, &gv, 0);
@@ -4677,7 +4678,7 @@ PP(pp_exists)
     }
     tmpsv = POPs;
     hv = MUTABLE_HV(POPs);
-    if (SvTYPE(hv) == SVt_PVHV) {
+    if (LIKELY( SvTYPE(hv) == SVt_PVHV )) {
        if (hv_exists_ent(hv, tmpsv, 0))
            RETPUSHYES;
     }
@@ -4816,10 +4817,10 @@ PP(pp_lslice)
 
 PP(pp_anonlist)
 {
-    dVAR; dSP; dMARK; dORIGMARK;
+    dVAR; dSP; dMARK;
     const I32 items = SP - MARK;
     SV * const av = MUTABLE_SV(av_make(items, MARK+1));
-    SP = ORIGMARK;             /* av_make() might realloc stack_sp */
+    SP = MARK;
     mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
            ? newRV_noinc(av) : av);
     RETURN;
@@ -4828,7 +4829,10 @@ PP(pp_anonlist)
 PP(pp_anonhash)
 {
     dVAR; dSP; dMARK; dORIGMARK;
-    HV* const hv = (HV *)sv_2mortal((SV *)newHV());
+    HV* const hv = newHV();
+    SV* const retval = sv_2mortal( PL_op->op_flags & OPf_SPECIAL
+                                    ? newRV_noinc(MUTABLE_SV(hv))
+                                    : MUTABLE_SV(hv) );
 
     while (MARK < SP) {
        SV * const key =
@@ -4849,9 +4853,7 @@ PP(pp_anonhash)
        (void)hv_store_ent(hv,key,val,0);
     }
     SP = ORIGMARK;
-    if (PL_op->op_flags & OPf_SPECIAL)
-       mXPUSHs(newRV_inc(MUTABLE_SV(hv)));
-    else XPUSHs(MUTABLE_SV(hv));
+    XPUSHs(retval);
     RETURN;
 }
 
@@ -4901,7 +4903,7 @@ PP(pp_splice)
     const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
 
     if (mg) {
-       return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
+       return Perl_tied_method(aTHX_ SV_CONST(SPLICE), mark - 1, MUTABLE_SV(ary), mg,
                                    GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
                                    sp - mark);
     }
@@ -5099,7 +5101,7 @@ PP(pp_push)
        PUSHMARK(MARK);
        PUTBACK;
        ENTER_with_name("call_PUSH");
-       call_method("PUSH",G_SCALAR|G_DISCARD);
+       call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
        LEAVE_with_name("call_PUSH");
        SPAGAIN;
     }
@@ -5152,7 +5154,7 @@ PP(pp_unshift)
        PUSHMARK(MARK);
        PUTBACK;
        ENTER_with_name("call_UNSHIFT");
-       call_method("UNSHIFT",G_SCALAR|G_DISCARD);
+       call_sv(SV_CONST(UNSHIFT),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
        LEAVE_with_name("call_UNSHIFT");
        SPAGAIN;
     }
@@ -5347,8 +5349,6 @@ PP(pp_split)
     TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
              (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
 
-    RX_MATCH_UTF8_set(rx, do_utf8);
-
 #ifdef USE_ITHREADS
     if (pm->op_pmreplrootu.op_pmtargetoff) {
        ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
@@ -5712,7 +5712,7 @@ PP(pp_split)
        else {
            PUTBACK;
            ENTER_with_name("call_PUSH");
-           call_method("PUSH",G_SCALAR|G_DISCARD);
+           call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
            LEAVE_with_name("call_PUSH");
            SPAGAIN;
            if (gimme == G_ARRAY) {