pp.c: Remove macro no-longer called

[perl5.git] / pp.c
diff --git a/pp.c b/pp.c

index a102a21..7059a48 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -165,7 +165,7 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
         sv = SvRV(sv);
         if (SvTYPE(sv) == SVt_PVIO) {
             GV * const gv = MUTABLE_GV(sv_newmortal());
-           gv_init(gv, 0, "", 0, 0);
+           gv_init(gv, 0, "$__ANONIO__", 11, 0);
             GvIOp(gv) = MUTABLE_IO(sv);
             SvREFCNT_inc_void_NN(sv);
             sv = MUTABLE_SV(gv);
@@ -457,7 +457,9 @@ PP(pp_prototype)
      }
      cv = sv_2cv(TOPs, &stash, &gv, 0);
      if (cv && SvPOK(cv))
-       ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
+       ret = newSVpvn_flags(
+           CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
+       );
    set:
      SETs(ret);
      RETURN;
@@ -582,7 +584,8 @@ PP(pp_gelem)
      dVAR; dSP;
  
      SV *sv = POPs;
-    const char * const elem = SvPV_nolen_const(sv);
+    STRLEN len;
+    const char * const elem = SvPV_const(sv, len);
      GV * const gv = MUTABLE_GV(POPs);
      SV * tmpRef = NULL;
  
@@ -592,48 +595,48 @@ PP(pp_gelem)
         const char * const second_letter = elem + 1;
         switch (*elem) {
         case 'A':
-           if (strEQ(second_letter, "RRAY"))
+           if (len == 5 && strEQ(second_letter, "RRAY"))
                 tmpRef = MUTABLE_SV(GvAV(gv));
             break;
         case 'C':
-           if (strEQ(second_letter, "ODE"))
+           if (len == 4 && strEQ(second_letter, "ODE"))
                 tmpRef = MUTABLE_SV(GvCVu(gv));
             break;
         case 'F':
-           if (strEQ(second_letter, "ILEHANDLE")) {
+           if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
                 /* finally deprecated in 5.8.0 */
                 deprecate("*glob{FILEHANDLE}");
                 tmpRef = MUTABLE_SV(GvIOp(gv));
             }
             else
-               if (strEQ(second_letter, "ORMAT"))
+               if (len == 6 && strEQ(second_letter, "ORMAT"))
                     tmpRef = MUTABLE_SV(GvFORM(gv));
             break;
         case 'G':
-           if (strEQ(second_letter, "LOB"))
+           if (len == 4 && strEQ(second_letter, "LOB"))
                 tmpRef = MUTABLE_SV(gv);
             break;
         case 'H':
-           if (strEQ(second_letter, "ASH"))
+           if (len == 4 && strEQ(second_letter, "ASH"))
                 tmpRef = MUTABLE_SV(GvHV(gv));
             break;
         case 'I':
-           if (*second_letter == 'O' && !elem[2])
+           if (*second_letter == 'O' && !elem[2] && len == 2)
                 tmpRef = MUTABLE_SV(GvIOp(gv));
             break;
         case 'N':
-           if (strEQ(second_letter, "AME"))
+           if (len == 4 && strEQ(second_letter, "AME"))
                 sv = newSVhek(GvNAME_HEK(gv));
             break;
         case 'P':
-           if (strEQ(second_letter, "ACKAGE")) {
+           if (len == 7 && strEQ(second_letter, "ACKAGE")) {
                 const HV * const stash = GvSTASH(gv);
                 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
                 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
             }
             break;
         case 'S':
-           if (strEQ(second_letter, "CALAR"))
+           if (len == 6 && strEQ(second_letter, "CALAR"))
                 tmpRef = GvSVn(gv);
             break;
         }
@@ -980,9 +983,11 @@ PP(pp_undef)
         break;
      case SVt_PVCV:
         if (cv_const_sv((const CV *)sv))
-           Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
-                          CvANON((const CV *)sv) ? "(anonymous)"
-                          : GvENAME(CvGV((const CV *)sv)));
+           Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+                          "Constant subroutine %"SVf" undefined",
+                          SVfARG(CvANON((const CV *)sv)
+                             ? newSVpvs_flags("(anonymous)", SVs_TEMP)
+                             : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
         /* FALLTHROUGH */
      case SVt_PVFM:
         {
@@ -2966,6 +2971,7 @@ PP(pp_substr)
      IV     len_iv = 0;
      int    len_is_uv = 1;
      const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
+    const bool rvalue = (GIMME_V != G_VOID);
      const char *tmps;
      SV *repl_sv = NULL;
      const char *repl = NULL;
@@ -3094,16 +3100,18 @@ PP(pp_substr)
             RETURN;
         }
  
-       SvTAINTED_off(TARG);                    /* decontaminate */
-       SvUTF8_off(TARG);                       /* decontaminate */
-
         tmps += byte_pos;
-       sv_setpvn(TARG, tmps, byte_len);
+
+       if (rvalue) {
+           SvTAINTED_off(TARG);                        /* decontaminate */
+           SvUTF8_off(TARG);                   /* decontaminate */
+           sv_setpvn(TARG, tmps, byte_len);
  #ifdef USE_LOCALE_COLLATE
-       sv_unmagic(TARG, PERL_MAGIC_collxfrm);
+           sv_unmagic(TARG, PERL_MAGIC_collxfrm);
  #endif
-       if (utf8_curlen)
-           SvUTF8_on(TARG);
+           if (utf8_curlen)
+               SvUTF8_on(TARG);
+       }
  
         if (repl) {
             SV* repl_sv_copy = NULL;
@@ -3123,8 +3131,10 @@ PP(pp_substr)
         }
      }
      SPAGAIN;
-    SvSETMAGIC(TARG);
-    PUSHs(TARG);
+    if (rvalue) {
+       SvSETMAGIC(TARG);
+       PUSHs(TARG);
+    }
      RETURN;
  
  bound_fail:
@@ -3420,17 +3430,9 @@ PP(pp_crypt)
  /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level.  So 
   * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
  
-/* Below are several macros that generate code */
  /* Generates code to store a unicode codepoint c that is known to occupy
- * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
-#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c)                                   \
-    STMT_START {                                                           \
-       *(p) = UTF8_TWO_BYTE_HI(c);                                         \
-       *((p)+1) = UTF8_TWO_BYTE_LO(c);                                     \
-    } STMT_END
-
-/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
- * available byte after the two bytes */
+ * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
+ * and p is advanced to point to the next available byte after the two bytes */
  #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c)                                     \
      STMT_START {                                                           \
         *(p)++ = UTF8_TWO_BYTE_HI(c);                                       \
@@ -3451,32 +3453,6 @@ STMT_START {                                                                 \
      }                                                                      \
  } STMT_END
  
-/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
- * after the character stored */
-#define CAT_NON_LATIN1_UC(p, l)                                                    \
-STMT_START {                                                               \
-    if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {                      \
-       CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);    \
-    } else {                                                               \
-       CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU);             \
-    }                                                                      \
-} STMT_END
-
-/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
- * case of l into p and p+1.  u must be the result of toUPPER_LATIN1_MOD(l),
- * and must require two bytes to store it.  Advances p to point to the next
- * available position */
-#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u)                                \
-STMT_START {                                                               \
-    if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {                      \
-       CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
-    } else if (l == LATIN_SMALL_LETTER_SHARP_S) {                          \
-       *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */                \
-    } else {/* else is one of the other two special cases */               \
-       CAT_NON_LATIN1_UC((p), (l));                                        \
-    }                                                                      \
-} STMT_END
-
  PP(pp_ucfirst)
  {
      /* Actually is both lcfirst() and ucfirst().  Only the first character
@@ -3527,89 +3503,13 @@ PP(pp_ucfirst)
      }
      else if (DO_UTF8(source)) {        /* Is the source utf8? */
         doing_utf8 = TRUE;
+        ulen = UTF8SKIP(s);
+        if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
+        else toLOWER_utf8(s, tmpbuf, &tculen);
  
-       if (UTF8_IS_INVARIANT(*s)) {
-
-           /* An invariant source character is either ASCII or, in EBCDIC, an
-            * ASCII equivalent or a caseless C1 control.  In both these cases,
-            * the lower and upper cases of any character are also invariants
-            * (and title case is the same as upper case).  So it is safe to
-            * use the simple case change macros which avoid the overhead of
-            * the general functions.  Note that if perl were to be extended to
-            * do locale handling in UTF-8 strings, this wouldn't be true in,
-            * for example, Lithuanian or Turkic.  */
-           *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
-           tculen = ulen = 1;
-           need = slen + 1;
-       }
-       else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-           U8 chr;
-
-           /* Similarly, if the source character isn't invariant but is in the
-            * latin1 range (or EBCDIC equivalent thereof), we have the case
-            * changes compiled into perl, and can avoid the overhead of the
-            * general functions.  In this range, the characters are stored as
-            * two UTF-8 bytes, and it so happens that any changed-case version
-            * is also two bytes (in both ASCIIish and EBCDIC machines). */
-           tculen = ulen = 2;
-           need = slen + 1;
-
-           /* Convert the two source bytes to a single Unicode code point
-            * value, change case and save for below */
-           chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
-           if (op_type == OP_LCFIRST) {    /* lower casing is easy */
-               U8 lower = toLOWER_LATIN1(chr);
-               STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
-           }
-           else {      /* ucfirst */
-               U8 upper = toUPPER_LATIN1_MOD(chr);
-
-               /* Most of the latin1 range characters are well-behaved.  Their
-                * title and upper cases are the same, and are also in the
-                * latin1 range.  The macro above returns their upper (hence
-                * title) case, and all that need be done is to save the result
-                * for below.  However, several characters are problematic, and
-                * have to be handled specially.  The MOD in the macro name
-                * above means that these tricky characters all get mapped to
-                * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
-                * This mapping saves some tests for the majority of the
-                * characters */
-
-               if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
-
-                   /* Not tricky.  Just save it. */
-                   STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
-               }
-               else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
-
-                   /* This one is tricky because it is two characters long,
-                    * though the UTF-8 is still two bytes, so the stored
-                    * length doesn't change */
-                   *tmpbuf = 'S';  /* The UTF-8 is 'Ss' */
-                   *(tmpbuf + 1) = 's';
-               }
-               else {
-
-                   /* The other two have their title and upper cases the same,
-                    * but are tricky because the changed-case characters
-                    * aren't in the latin1 range.  They, however, do fit into
-                    * two UTF-8 bytes */
-                   STORE_NON_LATIN1_UC(tmpbuf, chr);    
-               }
-           }
-       }
-       else {
-
-           /* Here, can't short-cut the general case */
-
-           utf8_to_uvchr(s, &ulen);
-           if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
-           else toLOWER_utf8(s, tmpbuf, &tculen);
-
-           /* we can't do in-place if the length changes.  */
-           if (ulen != tculen) inplace = FALSE;
-           need = slen + 1 - ulen + tculen;
-       }
+        /* we can't do in-place if the length changes.  */
+        if (ulen != tculen) inplace = FALSE;
+        need = slen + 1 - ulen + tculen;
      }
      else { /* Non-zero length, non-UTF-8,  Need to consider locale and if
             * latin1 is treated as caseless.  Note that a locale takes
@@ -3857,63 +3757,47 @@ PP(pp_uc)
         bool in_iota_subscript = FALSE;
  
         while (s < send) {
+           STRLEN u;
+           STRLEN ulen;
+           UV uv;
             if (in_iota_subscript && ! is_utf8_mark(s)) {
+
                 /* A non-mark.  Time to output the iota subscript */
  #define GREEK_CAPITAL_LETTER_IOTA 0x0399
  #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
  
                 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
                 in_iota_subscript = FALSE;
-           }
-
-           /* If the UTF-8 character is invariant, then it is in the range
-            * known by the standard macro; result is only one byte long */
-           if (UTF8_IS_INVARIANT(*s)) {
-               *d++ = toUPPER(*s);
-               s++;
-           }
-           else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-
-               /* Likewise, if it fits in a byte, its case change is in our
-                * table */
-               U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
-               U8 upper = toUPPER_LATIN1_MOD(orig);
-               CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
-               s += 2;
-           }
-           else {
-
-               /* Otherwise, need the general UTF-8 case.  Get the changed
-                * case value and copy it to the output buffer */
+            }
  
-               const STRLEN u = UTF8SKIP(s);
-               STRLEN ulen;
+            /* Then handle the current character.  Get the changed case value
+             * and copy it to the output buffer */
  
-               const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
-               if (uv == GREEK_CAPITAL_LETTER_IOTA
-                   && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
-               {
-                   in_iota_subscript = TRUE;
-               }
-               else {
-                   if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
-                       /* If the eventually required minimum size outgrows
-                        * the available space, we need to grow. */
-                       const UV o = d - (U8*)SvPVX_const(dest);
-
-                       /* If someone uppercases one million U+03B0s we
-                        * SvGROW() one million times.  Or we could try
-                        * guessing how much to allocate without allocating too
-                        * much.  Such is life.  See corresponding comment in
-                        * lc code for another option */
-                       SvGROW(dest, min);
-                       d = (U8*)SvPVX(dest) + o;
-                   }
-                   Copy(tmpbuf, d, ulen, U8);
-                   d += ulen;
-               }
-               s += u;
-           }
+            u = UTF8SKIP(s);
+            uv = toUPPER_utf8(s, tmpbuf, &ulen);
+            if (uv == GREEK_CAPITAL_LETTER_IOTA
+                && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
+            {
+                in_iota_subscript = TRUE;
+            }
+            else {
+                if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+                    /* If the eventually required minimum size outgrows the
+                     * available space, we need to grow. */
+                    const UV o = d - (U8*)SvPVX_const(dest);
+
+                    /* If someone uppercases one million U+03B0s we SvGROW()
+                     * one million times.  Or we could try guessing how much to
+                     * allocate without allocating too much.  Such is life.
+                     * See corresponding comment in lc code for another option
+                     * */
+                    SvGROW(dest, min);
+                    d = (U8*)SvPVX(dest) + o;
+                }
+                Copy(tmpbuf, d, ulen, U8);
+                d += ulen;
+            }
+            s += u;
         }
         if (in_iota_subscript) {
             CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
@@ -3943,7 +3827,7 @@ PP(pp_uc)
             else {
                 for (; s < send; d++, s++) {
                     *d = toUPPER_LATIN1_MOD(*s);
-                   if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
+                   if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) continue;
  
                     /* The mainstream case is the tight loop above.  To avoid
                      * extra tests in that, all three characters that require
@@ -4004,23 +3888,13 @@ PP(pp_uc)
                                                 (send -s) * 2 + 1);
                     d = (U8*)SvPVX(dest) + len;
  
-                   /* And append the current character's upper case in UTF-8 */
-                   CAT_NON_LATIN1_UC(d, *s);
-
                     /* Now process the remainder of the source, converting to
                      * upper and UTF-8.  If a resulting byte is invariant in
                      * UTF-8, output it as-is, otherwise convert to UTF-8 and
                      * append it to the output. */
-
-                   s++;
                     for (; s < send; s++) {
-                       U8 upper = toUPPER_LATIN1_MOD(*s);
-                       if UTF8_IS_INVARIANT(upper) {
-                           *d++ = upper;
-                       }
-                       else {
-                           CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
-                       }
+                       (void) _to_upper_title_latin1(*s, d, &len, 'S');
+                       d += len;
                     }
  
                     /* Here have processed the whole source; no need to continue
@@ -4101,122 +3975,35 @@ PP(pp_lc)
         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
  
         while (s < send) {
-           if (UTF8_IS_INVARIANT(*s)) {
+           const STRLEN u = UTF8SKIP(s);
+           STRLEN ulen;
  
-               /* Invariant characters use the standard mappings compiled in.
-                */
-               *d++ = toLOWER(*s);
-               s++;
-           }
-           else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+           toLOWER_utf8(s, tmpbuf, &ulen);
  
-               /* As do the ones in the Latin1 range */
-               U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)));
-               CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
-               s += 2;
-           }
-           else {
-               /* Here, is utf8 not in Latin-1 range, have to go out and get
-                * the mappings from the tables. */
+           /* Here is where we would do context-sensitive actions.  See the
+            * commit message for this comment for why there isn't any */
  
-               const STRLEN u = UTF8SKIP(s);
-               STRLEN ulen;
-
-#ifndef CONTEXT_DEPENDENT_CASING
-               toLOWER_utf8(s, tmpbuf, &ulen);
-#else
-/* This is ifdefd out because it probably is the wrong thing to do.  The right
- * thing is probably to have an I/O layer that converts final sigma to regular
- * on input and vice versa (under the correct circumstances) on output.  In
- * effect, the final sigma is just a glyph variation when the regular one
- * occurs at the end of a word.   And we don't really know what's going to be
- * the end of the word until it is finally output, as splitting and joining can
- * occur at any time and change what once was the word end to be in the middle,
- * and vice versa. */
-
-               const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
-
-               /* If the lower case is a small sigma, it may be that we need
-                * to change it to a final sigma.  This happens at the end of 
-                * a word that contains more than just this character, and only
-                * when we started with a capital sigma. */
-               if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
-                   s > send - len &&   /* Makes sure not the first letter */
-                   utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
-               ) {
-
-                   /* We use the algorithm in:
-                    * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
-                    * is a CAPITAL SIGMA): If C is preceded by a sequence
-                    * consisting of a cased letter and a case-ignorable
-                    * sequence, and C is not followed by a sequence consisting
-                    * of a case ignorable sequence and then a cased letter,
-                    * then when lowercasing C, C becomes a final sigma */
-
-                   /* To determine if this is the end of a word, need to peek
-                    * ahead.  Look at the next character */
-                   const U8 *peek = s + u;
-
-                   /* Skip any case ignorable characters */
-                   while (peek < send && is_utf8_case_ignorable(peek)) {
-                       peek += UTF8SKIP(peek);
-                   }
+           if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
  
-                   /* If we reached the end of the string without finding any
-                    * non-case ignorable characters, or if the next such one
-                    * is not-cased, then we have met the conditions for it
-                    * being a final sigma with regards to peek ahead, and so
-                    * must do peek behind for the remaining conditions. (We
-                    * know there is stuff behind to look at since we tested
-                    * above that this isn't the first letter) */
-                   if (peek >= send || ! is_utf8_cased(peek)) {
-                       peek = utf8_hop(s, -1);
-
-                       /* Here are at the beginning of the first character
-                        * before the original upper case sigma.  Keep backing
-                        * up, skipping any case ignorable characters */
-                       while (is_utf8_case_ignorable(peek)) {
-                           peek = utf8_hop(peek, -1);
-                       }
+               /* If the eventually required minimum size outgrows the
+                * available space, we need to grow. */
+               const UV o = d - (U8*)SvPVX_const(dest);
  
-                       /* Here peek points to the first byte of the closest
-                        * non-case-ignorable character before the capital
-                        * sigma.  If it is cased, then by the Unicode
-                        * algorithm, we should use a small final sigma instead
-                        * of what we have */
-                       if (is_utf8_cased(peek)) {
-                           STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
-                                       UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
-                       }
-                   }
-               }
-               else {  /* Not a context sensitive mapping */
-#endif /* End of commented out context sensitive */
-                   if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
-
-                       /* If the eventually required minimum size outgrows
-                        * the available space, we need to grow. */
-                       const UV o = d - (U8*)SvPVX_const(dest);
-
-                       /* If someone lowercases one million U+0130s we
-                        * SvGROW() one million times.  Or we could try
-                        * guessing how much to allocate without allocating too
-                        * much.  Such is life.  Another option would be to
-                        * grow an extra byte or two more each time we need to
-                        * grow, which would cut down the million to 500K, with
-                        * little waste */
-                       SvGROW(dest, min);
-                       d = (U8*)SvPVX(dest) + o;
-                   }
-#ifdef CONTEXT_DEPENDENT_CASING
-               }
-#endif
-               /* Copy the newly lowercased letter to the output buffer we're
-                * building */
-               Copy(tmpbuf, d, ulen, U8);
-               d += ulen;
-               s += u;
+               /* If someone lowercases one million U+0130s we SvGROW() one
+                * million times.  Or we could try guessing how much to
+                * allocate without allocating too much.  Such is life.
+                * Another option would be to grow an extra byte or two more
+                * each time we need to grow, which would cut down the million
+                * to 500K, with little waste */
+               SvGROW(dest, min);
+               d = (U8*)SvPVX(dest) + o;
             }
+
+           /* Copy the newly lowercased letter to the output buffer we're
+            * building */
+           Copy(tmpbuf, d, ulen, U8);
+           d += ulen;
+           s += u;
         }   /* End of looping through the source string */
         SvUTF8_on(dest);
         *d = '\0';