This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
pp_formline: don't do get mg on PL_formtarget
[perl5.git] / pp_ctl.c
index e136955..2f0ddcd 100644 (file)
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -523,28 +523,30 @@ PP(pp_formline)
 {
     dVAR; dSP; dMARK; dORIGMARK;
     register SV * const tmpForm = *++MARK;
-    SV *formsv;
-    register U32 *fpc;
-    register char *t;
-    const char *f;
+    SV *formsv;                    /* contains text of original format */
+    register U32 *fpc;     /* format ops program counter */
+    register char *t;      /* current append position in target string */
+    const char *f;         /* current position in format string */
     register I32 arg;
-    register SV *sv = NULL;
-    const char *item = NULL;
-    I32 itemsize  = 0;
-    I32 fieldsize = 0;
-    I32 lines = 0;
-    bool chopspace = (strchr(PL_chopset, ' ') != NULL);
-    const char *chophere = NULL;
-    char *linemark = NULL;
+    register SV *sv = NULL; /* current item */
+    const char *item = NULL;/* string value of current item */
+    I32 itemsize  = 0;     /* length of current item, possibly truncated */
+    I32 fieldsize = 0;     /* width of current field */
+    I32 lines = 0;         /* number of lines that have been output */
+    bool chopspace = (strchr(PL_chopset, ' ') != NULL); /* does $: have space */
+    const char *chophere = NULL; /* where to chop current item */
+    char *linemark = NULL;  /* pos of start of line in output */
     NV value;
-    bool gotsome = FALSE;
+    bool gotsome = FALSE;   /* seen at least one non-blank item on this line */
     STRLEN len;
-    STRLEN fudge;
+    STRLEN fudge;          /* estimate of output size in bytes */
     bool item_is_utf8 = FALSE;
     bool targ_is_utf8 = FALSE;
     SV * nsv = NULL;
     const char *fmt;
     MAGIC *mg = NULL;
+    U8 *source;                    /* source of bytes to append */
+    STRLEN to_copy;        /* how may bytes to append */
 
     mg = doparseform(tmpForm);
 
@@ -604,9 +606,23 @@ PP(pp_formline)
        case FF_LITERAL:
            arg = *fpc++;
            if (targ_is_utf8 && !SvUTF8(formsv)) {
+               char *s;
                SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
                *t = '\0';
-               sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv);
+
+               /* this is an unrolled sv_catpvn_utf8_upgrade(),
+                * but with the addition of s/~/ /g */
+               if (!(nsv))
+                   nsv = newSVpvn_flags(f, arg, SVs_TEMP);
+               else
+                   sv_setpvn(nsv, f, arg);
+               SvUTF8_off(nsv);
+               for (s = SvPVX(nsv); s <= SvEND(nsv); s++)
+                   if (*s == '~')
+                       *s = ' ';
+               sv_utf8_upgrade(nsv);
+               sv_catsv(PL_formtarget, nsv);
+
                t = SvEND(PL_formtarget);
                f += arg;
                break;
@@ -614,12 +630,14 @@ PP(pp_formline)
            if (!targ_is_utf8 && DO_UTF8(formsv)) {
                SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
                *t = '\0';
-               sv_utf8_upgrade_flags_grow(PL_formtarget, SV_GMAGIC, fudge + 1);
+               sv_utf8_upgrade_flags_grow(PL_formtarget, 0, fudge + 1);
                t = SvEND(PL_formtarget);
                targ_is_utf8 = TRUE;
            }
-           while (arg--)
-               *t++ = *f++;
+           while (arg--) {
+               *t++ = (*f == '~') ? ' ' : *f;
+               f++;
+           }
            break;
 
        case FF_SKIP:
@@ -798,7 +816,7 @@ PP(pp_formline)
                    if (!targ_is_utf8) {
                        SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
                        *t = '\0';
-                       sv_utf8_upgrade_flags_grow(PL_formtarget, SV_GMAGIC,
+                       sv_utf8_upgrade_flags_grow(PL_formtarget, 0,
                                                                    fudge + 1);
                        t = SvEND(PL_formtarget);
                        targ_is_utf8 = TRUE;
@@ -848,7 +866,7 @@ PP(pp_formline)
                    const int ch = *t++ = *s++;
                    if (iscntrl(ch))
 #else
-                       if ( !((*t++ = *s++) & ~31) )
+                   if ( !((*t++ = *s++) & ~31) )
 #endif
                            t[-1] = ' ';
                }
@@ -873,72 +891,75 @@ PP(pp_formline)
            {
                const bool oneline = fpc[-1] == FF_LINESNGL;
                const char *s = item = SvPV_const(sv, len);
+               const char *const send = s + len;
+
                item_is_utf8 = DO_UTF8(sv);
                itemsize = len;
-               if (itemsize) {
-                   STRLEN to_copy = itemsize;
-                   const char *const send = s + len;
-                   const U8 *source = (const U8 *) s;
-                   U8 *tmp = NULL;
-
-                   gotsome = TRUE;
-                   chophere = s + itemsize;
-                   while (s < send) {
-                       if (*s++ == '\n') {
-                           if (oneline) {
-                               to_copy = s - SvPVX_const(sv) - 1;
-                               chophere = s;
-                               break;
-                           } else {
-                               if (s == send) {
-                                   itemsize--;
-                                   to_copy--;
-                               } else
-                                   lines++;
-                           }
+               if (!itemsize)
+                   break;
+               gotsome = TRUE;
+               chophere = s + itemsize;
+               source = (U8 *) s;
+               to_copy = len;
+               while (s < send) {
+                   if (*s++ == '\n') {
+                       if (oneline) {
+                           to_copy = s - SvPVX_const(sv) - 1;
+                           chophere = s;
+                           break;
+                       } else {
+                           if (s == send) {
+                               itemsize--;
+                               to_copy--;
+                           } else
+                               lines++;
                        }
                    }
-                   if (targ_is_utf8 && !item_is_utf8) {
-                       source = tmp = bytes_to_utf8(source, &to_copy);
+               }
+           }
+
+           {
+               U8 *tmp = NULL;
+               if (targ_is_utf8 && !item_is_utf8) {
+                   source = tmp = bytes_to_utf8(source, &to_copy);
+                   SvCUR_set(PL_formtarget,
+                             t - SvPVX_const(PL_formtarget));
+               } else {
+                   if (item_is_utf8 && !targ_is_utf8) {
+                       /* Upgrade targ to UTF8, and then we reduce it to
+                          a problem we have a simple solution for.  */
                        SvCUR_set(PL_formtarget,
                                  t - SvPVX_const(PL_formtarget));
+                       targ_is_utf8 = TRUE;
+                       /* Don't need get magic.  */
+                       sv_utf8_upgrade_nomg(PL_formtarget);
                    } else {
-                       if (item_is_utf8 && !targ_is_utf8) {
-                           /* Upgrade targ to UTF8, and then we reduce it to
-                              a problem we have a simple solution for.  */
-                           SvCUR_set(PL_formtarget,
-                                     t - SvPVX_const(PL_formtarget));
-                           targ_is_utf8 = TRUE;
-                           /* Don't need get magic.  */
-                           sv_utf8_upgrade_nomg(PL_formtarget);
-                       } else {
-                           SvCUR_set(PL_formtarget,
-                                     t - SvPVX_const(PL_formtarget));
-                       }
-
-                       /* Easy. They agree.  */
-                       assert (item_is_utf8 == targ_is_utf8);
+                       SvCUR_set(PL_formtarget,
+                                 t - SvPVX_const(PL_formtarget));
                    }
-                   SvGROW(PL_formtarget,
-                          SvCUR(PL_formtarget) + to_copy + fudge + 1);
-                   t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget);
-
-                   Copy(source, t, to_copy, char);
-                   t += to_copy;
-                   SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) + to_copy);
-                   if (item_is_utf8) {
-                       if (SvGMAGICAL(sv)) {
-                           /* Mustn't call sv_pos_b2u() as it does a second
-                              mg_get(). Is this a bug? Do we need a _flags()
-                              variant? */
-                           itemsize = utf8_length(source, source + itemsize);
-                       } else {
-                           sv_pos_b2u(sv, &itemsize);
-                       }
-                       assert(!tmp);
-                   } else if (tmp) {
-                       Safefree(tmp);
+
+                   /* Easy. They agree.  */
+                   assert (item_is_utf8 == targ_is_utf8);
+               }
+               SvGROW(PL_formtarget,
+                      SvCUR(PL_formtarget) + to_copy + fudge + 1);
+               t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget);
+
+               Copy(source, t, to_copy, char);
+               t += to_copy;
+               SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) + to_copy);
+               if (item_is_utf8) {
+                   if (SvGMAGICAL(sv)) {
+                       /* Mustn't call sv_pos_b2u() as it does a second
+                          mg_get(). Is this a bug? Do we need a _flags()
+                          variant? */
+                       itemsize = utf8_length(source, source + itemsize);
+                   } else {
+                       sv_pos_b2u(sv, &itemsize);
                    }
+                   assert(!tmp);
+               } else if (tmp) {
+                   Safefree(tmp);
                }
                break;
            }
@@ -1002,14 +1023,8 @@ PP(pp_formline)
            arg = *fpc++;
            if (gotsome) {
                if (arg) {              /* repeat until fields exhausted? */
-                   *t = '\0';
-                   SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
-                   lines += FmLINES(PL_formtarget);
-                   if (targ_is_utf8)
-                       SvUTF8_on(PL_formtarget);
-                   FmLINES(PL_formtarget) = lines;
-                   SP = ORIGMARK;
-                   RETURNOP(cLISTOP->op_first);
+                   fpc--;
+                   goto end;
                }
            }
            else {
@@ -1046,13 +1061,17 @@ PP(pp_formline)
                break;
            }
        case FF_END:
+       end:
            *t = '\0';
            SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
            if (targ_is_utf8)
                SvUTF8_on(PL_formtarget);
            FmLINES(PL_formtarget) += lines;
            SP = ORIGMARK;
-           RETPUSHYES;
+           if (fpc[-1] == FF_BLANK)
+               RETURNOP(cLISTOP->op_first);
+           else
+               RETPUSHYES;
        }
     }
 }
@@ -4918,17 +4937,17 @@ S_doparseform(pTHX_ SV *sv)
     STRLEN len;
     register char *s = SvPV(sv, len);
     register char *send;
-    register char *base = NULL;
-    register I32 skipspaces = 0;
-    bool noblank   = FALSE;
-    bool repeat    = FALSE;
-    bool postspace = FALSE;
+    register char *base = NULL; /* start of current field */
+    register I32 skipspaces = 0; /* number of contiguous spaces seen */
+    bool noblank   = FALSE; /* ~ or ~~ seen on this line */
+    bool repeat    = FALSE; /* ~~ seen on this line */
+    bool postspace = FALSE; /* a text field may need right padding */
     U32 *fops;
     register U32 *fpc;
-    U32 *linepc = NULL;
+    U32 *linepc = NULL;            /* position of last FF_LINEMARK */
     register I32 arg;
-    bool ischop;
-    bool unchopnum = FALSE;
+    bool ischop;           /* it's a ^ rather than a @ */
+    bool unchopnum = FALSE; /* at least one @ (i.e. non-chop) num field seen */
     int maxops = 12; /* FF_LINEMARK + FF_END + 10 (\0 without preceding \n) */
     MAGIC *mg = NULL;
     SV *sv_copy;
@@ -4951,16 +4970,21 @@ S_doparseform(pTHX_ SV *sv)
        if ( !(!!SvUTF8(old) ^ !!SvUTF8(sv))
              && len == SvCUR(old)
              && strnEQ(SvPVX(old), SvPVX(sv), len)
-       )
+       ) {
+           DEBUG_f(PerlIO_printf(Perl_debug_log,"Re-using compiled format\n"));
            return mg;
+       }
 
+       DEBUG_f(PerlIO_printf(Perl_debug_log, "Re-compiling format\n"));
        Safefree(mg->mg_ptr);
        mg->mg_ptr = NULL;
        SvREFCNT_dec(old);
        mg->mg_obj = NULL;
     }
-    else
+    else {
+       DEBUG_f(PerlIO_printf(Perl_debug_log, "Compiling format\n"));
        mg = sv_magicext(sv, NULL, PERL_MAGIC_fm, &PL_vtbl_fm, NULL, 0);
+    }
 
     sv_copy = newSVpvn_utf8(s, len, SvUTF8(sv));
     s = SvPV(sv_copy, len); /* work on the copy, not the original */
@@ -4994,10 +5018,10 @@ S_doparseform(pTHX_ SV *sv)
        case '~':
            if (*s == '~') {
                repeat = TRUE;
-               *s = ' ';
+               skipspaces++;
+               s++;
            }
            noblank = TRUE;
-           s[-1] = ' ';
            /* FALL THROUGH */
        case ' ': case '\t':
            skipspaces++;
@@ -5061,7 +5085,7 @@ S_doparseform(pTHX_ SV *sv)
 
            base = s - 1;
            *fpc++ = FF_FETCH;
-           if (*s == '*') {
+           if (*s == '*') { /*  @* or ^*  */
                s++;
                *fpc++ = 2;  /* skip the @* or ^* */
                if (ischop) {
@@ -5070,7 +5094,7 @@ S_doparseform(pTHX_ SV *sv)
                } else
                    *fpc++ = FF_LINEGLOB;
            }
-           else if (*s == '#' || (*s == '.' && s[1] == '#')) {
+           else if (*s == '#' || (*s == '.' && s[1] == '#')) { /* @###, ^### */
                arg = ischop ? 512 : 0;
                base = s - 1;
                while (*s == '#')
@@ -5103,7 +5127,7 @@ S_doparseform(pTHX_ SV *sv)
                *fpc++ = (U16)arg;
                 unchopnum |= ! ischop;
            }
-           else {
+           else {                              /* text field */
                I32 prespace = 0;
                bool ismore = FALSE;
 
@@ -5130,7 +5154,7 @@ S_doparseform(pTHX_ SV *sv)
                *fpc++ = ischop ? FF_CHECKCHOP : FF_CHECKNL;
 
                if (prespace)
-                   *fpc++ = (U16)prespace;
+                   *fpc++ = (U16)prespace; /* add SPACE or HALFSPACE */
                *fpc++ = FF_ITEM;
                if (ismore)
                    *fpc++ = FF_MORE;