perlexperiment: version of acceptance for \N{name}

[perl5.git] / pp.c
diff --git a/pp.c b/pp.c

index 14ba91d..e63c342 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -47,6 +47,9 @@ extern Pid_t getpid (void);
      _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
  #endif
  
+static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
+static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
+
  /* variations on pp_null */
  
  PP(pp_stub)
@@ -234,8 +237,10 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
                         Perl_croak_no_modify();
                     if (cUNOP->op_targ) {
                         SV * const namesv = PAD_SV(cUNOP->op_targ);
+                       HV *stash = CopSTASH(PL_curcop);
+                       if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
                         gv = MUTABLE_GV(newSV(0));
-                       gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0);
+                       gv_init_sv(gv, stash, namesv, 0);
                     }
                     else {
                         const char * const name = CopSTASHPV(PL_curcop);
@@ -602,8 +607,12 @@ PP(pp_bless)
      HV *stash;
  
      if (MAXARG == 1)
+    {
        curstash:
         stash = CopSTASH(PL_curcop);
+       if (SvTYPE(stash) != SVt_PVHV)
+           Perl_croak(aTHX_ "Attempt to bless into a freed package");
+    }
      else {
         SV * const ssv = POPs;
         STRLEN len;
@@ -1654,7 +1663,7 @@ PP(pp_repeat)
         MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
         /* Did the max computation overflow? */
         if (items > 0 && max > 0 && (max < items || max < count))
-          Perl_croak(aTHX_ oom_list_extend);
+          Perl_croak(aTHX_ "%s", oom_list_extend);
         MEXTEND(MARK, max);
         if (count > 1) {
             while (SP > MARK) {
@@ -1712,7 +1721,7 @@ PP(pp_repeat)
             else {
                 const STRLEN max = (UV)count * len;
                 if (len > MEM_SIZE_MAX / count)
-                    Perl_croak(aTHX_ oom_string_extend);
+                    Perl_croak(aTHX_ "%s", oom_string_extend);
                 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
                 SvGROW(TARG, max + 1);
                 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
@@ -3430,15 +3439,6 @@ PP(pp_crypt)
  /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level.  So 
   * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
  
-/* Generates code to store a unicode codepoint c that is known to occupy
- * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
- * and p is advanced to point to the next available byte after the two bytes */
-#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c)                                     \
-    STMT_START {                                                           \
-       *(p)++ = UTF8_TWO_BYTE_HI(c);                                       \
-       *((p)++) = UTF8_TWO_BYTE_LO(c);                                     \
-    } STMT_END
-
  PP(pp_ucfirst)
  {
      /* Actually is both lcfirst() and ucfirst().  Only the first character
@@ -3762,10 +3762,8 @@ PP(pp_uc)
             if (in_iota_subscript && ! _is_utf8_mark(s)) {
  
                 /* A non-mark.  Time to output the iota subscript */
-#define GREEK_CAPITAL_LETTER_IOTA 0x0399
-#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
-
-               CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+               Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+                d += capital_iota_len;
                 in_iota_subscript = FALSE;
              }
  
@@ -3775,6 +3773,8 @@ PP(pp_uc)
              u = UTF8SKIP(s);
              uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
                                       cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#define GREEK_CAPITAL_LETTER_IOTA 0x0399
+#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
              if (uv == GREEK_CAPITAL_LETTER_IOTA
                  && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
              {
@@ -3800,7 +3800,8 @@ PP(pp_uc)
              s += u;
         }
         if (in_iota_subscript) {
-           CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+            Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+            d += capital_iota_len;
         }
         SvUTF8_on(dest);
         *d = '\0';
@@ -4198,23 +4199,20 @@ PP(pp_fc)
         }
      } /* Unflagged string */
      else if (len) {
-        /* For locale, bytes, and nothing, the behavior is supposed to be the
-         * same as lc().
-         */
          if ( IN_LOCALE_RUNTIME ) { /* Under locale */
              TAINT;
              SvTAINTED_on(dest);
              for (; s < send; d++, s++)
-                *d = toLOWER_LC(*s);
+                *d = toFOLD_LC(*s);
          }
          else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
              for (; s < send; d++, s++)
-                *d = toLOWER(*s);
+                *d = toFOLD(*s);
          }
          else {
              /* For ASCII and the Latin-1 range, there's only two troublesome
               * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
-             * casefolding becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which
+             * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
               * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
               * For the rest, the casefold is their lowercase.  */
              for (; s < send; d++, s++) {
@@ -4234,7 +4232,8 @@ PP(pp_fc)
                                                  (send -s) * 2 + 1);
                      d = (U8*)SvPVX(dest) + len;
  
-                    CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU);
+                    Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
+                    d += small_mu_len;
                      s++;
                      for (; s < send; s++) {
                          STRLEN ulen;
@@ -4518,7 +4517,8 @@ S_do_delete_local(pTHX)
                 }
                 else {
                     sv = hv_delete_ent(hv, keysv, 0, 0);
-                   SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+                   if (preeminent)
+                       SvREFCNT_inc_simple_void(sv); /* De-mortalize */
                 }
                 if (preeminent) {
                     if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
@@ -4553,7 +4553,8 @@ S_do_delete_local(pTHX)
                     }
                     else {
                         sv = av_delete(av, idx, 0);
-                       SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+                       if (preeminent)
+                          SvREFCNT_inc_simple_void(sv); /* De-mortalize */
                     }
                     if (preeminent) {
                         save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
@@ -5313,7 +5314,6 @@ PP(pp_split)
      STRLEN len;
      const char *s = SvPV_const(sv, len);
      const bool do_utf8 = DO_UTF8(sv);
-    const bool skipwhite = PL_op->op_flags & OPf_SPECIAL;
      const char *strend = s + len;
      PMOP *pm;
      REGEXP *rx;
@@ -5346,9 +5346,7 @@ PP(pp_split)
      rx = PM_GETRE(pm);
  
      TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
-            (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite));
-
-    RX_MATCH_UTF8_set(rx, do_utf8);
+             (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
  
  #ifdef USE_ITHREADS
      if (pm->op_pmreplrootu.op_pmtargetoff) {
@@ -5386,7 +5384,7 @@ PP(pp_split)
      }
      base = SP - PL_stack_base;
      orig = s;
-    if (skipwhite) {
+    if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
         if (do_utf8) {
             while (isSPACE_utf8(s))
                 s += UTF8SKIP(s);
@@ -5408,7 +5406,7 @@ PP(pp_split)
  
      if (!limit)
         limit = maxiters + 2;
-    if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) {
+    if (RX_EXTFLAGS(rx) & RXf_WHITE) {
         while (--limit) {
             m = s;
             /* this one uses 'm' and is a negative test */