This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Perl_sv_vcatpvfn_flags: remove 'asterisk' var
[perl5.git] / sv.c
diff --git a/sv.c b/sv.c
index dfdf57b..cfbc512 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -11875,14 +11875,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
        STRLEN width     = 0;         /* value of "%NNN..."  */
        bool has_precis  = FALSE;     /* has      "%.NNN..." */
        STRLEN precis    = 0;         /* value of "%.NNN..." */
-       bool asterisk    = FALSE;     /* has      "%*..."    */
         bool used_explicit_ix = FALSE;/* has      "%$n..."   */
-       unsigned base    = 0;         /* base to print in, e.g. 8 for %o */
+       int base         = 0;         /* base to print in, e.g. 8 for %o */
        UV uv            = 0;         /* the value to print of int-ish args */
        IV iv            = 0;         /* ditto for signed types */
 
        bool vectorize   = FALSE;     /* has      "%v..."    */
-       bool vectorarg   = FALSE;     /* has      "%*v..."   */
        SV *vecsv        = NULL;      /* the cur arg for %v  */
        bool vec_utf8    = FALSE;     /* SvUTF8(vecsv)       */
        const U8 *vecstr = NULL;      /* SvPVX(vecsv)        */
@@ -11893,7 +11891,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
        I32 efix         = 0;         /* explicit format parameter index */
        I32 ewix         = 0;         /* explicit width index */
        I32 epix         = 0;         /* explicit precision index */
-       I32 evix         = 0;         /* explicit vector index */
        const I32 osvix  = svix;      /* original index in case of bad fmt */
 
        bool is_utf8     = FALSE;     /* is this item utf8?   */
@@ -11981,8 +11978,25 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
            break;
        }
 
+      /* at this point we can expect one of:
+       *
+       *  123  an explicit width
+       *  *    width taken from next arg
+       *  *12$ width taken from 12th arg
+       *       or no width
+       *
+       * But any width specification may be preceded by a v, in one of its
+       * forms:
+       *        v
+       *        *v
+       *        *12$v
+       * So an asterisk may be either a width specifier or a vector
+       * separator arg specifier, and we don't know which initially
+       */
+
       tryasterisk:
        if (*q == '*') {
+            int i;
            q++;
            if ( (ewix = expect_number(&q)) ) {
                if (*q++ == '$') {
@@ -11993,56 +12007,39 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 } else
                    goto unknown;
             }
-           asterisk = TRUE;
-       }
-       if (*q == 'v') {
-           q++;
-           if (vectorize)
-               goto unknown;
-           if ((vectorarg = asterisk)) {
-               evix = ewix;
-               ewix = 0;
-               asterisk = FALSE;
-           }
-           vectorize = TRUE;
-           goto tryasterisk;
-       }
 
-       if (!asterisk)
-       {
-           if(*q == '0') {
-               fill = TRUE;
+            if (*q == 'v') {
+                /* The asterisk was for  *v, *NNN$v: vectorizing, but not
+                 * with the default "." */
                 q++;
+                if (vectorize)
+                    goto unknown;
+                if (args)
+                    vecsv = va_arg(*args, SV*);
+                else if (ewix) {
+                    FETCH_VCATPVFN_ARGUMENT(
+                        vecsv, ewix > 0 && ewix <= svmax, svargs[ewix-1]);
+                } else {
+                    FETCH_VCATPVFN_ARGUMENT(
+                        vecsv, svix < svmax, svargs[svix++]);
+                }
+                dotstr = SvPV_const(vecsv, dotstrlen);
+                /* Keep the DO_UTF8 test *after* the SvPV call, else things go
+                   bad with tied or overloaded values that return UTF8.  */
+                if (DO_UTF8(vecsv))
+                    is_utf8 = TRUE;
+                else if (has_utf8) {
+                    vecsv = sv_mortalcopy(vecsv);
+                    sv_utf8_upgrade(vecsv);
+                    dotstr = SvPV_const(vecsv, dotstrlen);
+                    is_utf8 = TRUE;
+                }
+               ewix = 0;
+                vectorize = TRUE;
+                goto tryasterisk;
             }
-           width = expect_number(&q);
-       }
-
-       if (vectorize && vectorarg) {
-           /* vectorizing, but not with the default "." */
-           if (args)
-               vecsv = va_arg(*args, SV*);
-           else if (evix) {
-                FETCH_VCATPVFN_ARGUMENT(
-                    vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
-           } else {
-                FETCH_VCATPVFN_ARGUMENT(
-                    vecsv, svix < svmax, svargs[svix++]);
-           }
-           dotstr = SvPV_const(vecsv, dotstrlen);
-           /* Keep the DO_UTF8 test *after* the SvPV call, else things go
-              bad with tied or overloaded values that return UTF8.  */
-           if (DO_UTF8(vecsv))
-               is_utf8 = TRUE;
-           else if (has_utf8) {
-               vecsv = sv_mortalcopy(vecsv);
-               sv_utf8_upgrade(vecsv);
-               dotstr = SvPV_const(vecsv, dotstrlen);
-               is_utf8 = TRUE;
-           }               
-       }
 
-       if (asterisk) {
-            int i;
+            /* the asterisk specified a width */
            if (args)
                i = va_arg(*args, int);
            else
@@ -12050,7 +12047,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                    SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0;
            left |= (i < 0);
            width = (i < 0) ? -i : i;
+        }
+       else if (*q == 'v') {
+           q++;
+           if (vectorize)
+               goto unknown;
+           vectorize = TRUE;
+            goto tryasterisk;
+
+        }
+       else {
+        /* explicit width? */
+           if(*q == '0') {
+               fill = TRUE;
+                q++;
+            }
+           width = expect_number(&q);
        }
+
       gotwidth:
 
        /* PRECISION */
@@ -12216,39 +12230,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 
        c = *q++; /* c now holds the conversion type */
 
-        if (argsv && strchr("BbcDdiOopuUXx", c)) {
-            /* XXX va_arg(*args) case? need peek, use va_copy? */
-            SvGETMAGIC(argsv);
-            if (UNLIKELY(SvAMAGIC(argsv)))
-                argsv = sv_2num(argsv);
-            if (UNLIKELY(isinfnansv(argsv)))
-                goto handle_infnan_argsv;
-        }
-
        switch (c) {
 
            /* STRINGS */
 
-       case 'c':
-           if (vectorize)
-               goto unknown;
-           uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv);
-           if ((uv > 255 ||
-                (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
-               && !IN_BYTES)
-            {
-                assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
-               eptr = ebuf;
-               elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
-               is_utf8 = TRUE;
-           }
-           else {
-               c = (char)uv;
-               eptr = &c;
-               elen = 1;
-           }
-           goto string;
-
        case 's':
            if (vectorize)
                goto unknown;
@@ -12327,7 +12312,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 && !fill
                 && !plus
                 && !has_precis
-                && !asterisk
+                    /* not %*p or %*1$p - any width was explicit */
+                && q[-2] != '*'
+                && q[-2] != '$'
                 && !used_explicit_ix
             ) {
                 if (left) {                    /* %-p (SVf), %-NNNp */
@@ -12365,7 +12352,25 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 
            uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
            base = 16;
-           goto integer;
+           goto do_integer;
+
+       case 'c':
+           if (vectorize)
+               goto unknown;
+            /* Ignore any size specifiers, since they're not documented as
+             * being allowed for %c (ideally we should warn on e.g. '%hc').
+             * Setting a default intsize, along with a positive
+             * (which signals unsigned) base, causes, for C-ish use, the
+             * va_arg to be interpreted as as unsigned int, when it's
+             * actually signed, which will convert -ve values to high +ve
+             * values. Note that unlike the libc %c, values > 255 will
+             * convert to high unicode points rather than being truncated
+             * to 8 bits. For perlish use, it will do SvUV(argsv), which
+             * will again convert -ve args to high -ve values.
+             */
+            intsize = 0;
+            base = 1; /* special value that indicates we're doing a 'c' */
+            goto get_int_arg_val;
 
        case 'D':
 #ifdef IV_IS_QUAD
@@ -12373,7 +12378,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 #else
            intsize = 'l';
 #endif
-            goto do_i;
+            base = -10;
+            goto get_int_arg_val;
 
        case 'd':
             /* probably just a plain %d, but it might be the start of the
@@ -12406,75 +12412,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 
            /* FALLTHROUGH */
        case 'i':
-          do_i:
-           if (vectorize) {
-               STRLEN ulen;
-               if (!veclen)
-                    goto donevalidconversion;
-               if (vec_utf8)
-                   uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
-                                       UTF8_ALLOW_ANYUV);
-               else {
-                   uv = *vecstr;
-                   ulen = 1;
-               }
-               vecstr += ulen;
-               veclen -= ulen;
-               if (plus)
-                    esignbuf[esignlen++] = plus;
-           }
-           else if (args) {
-               switch (intsize) {
-               case 'c':       iv = (char)va_arg(*args, int); break;
-               case 'h':       iv = (short)va_arg(*args, int); break;
-               case 'l':       iv = va_arg(*args, long); break;
-               case 'V':       iv = va_arg(*args, IV); break;
-               case 'z':       iv = va_arg(*args, SSize_t); break;
-#ifdef HAS_PTRDIFF_T
-               case 't':       iv = va_arg(*args, ptrdiff_t); break;
-#endif
-               default:        iv = va_arg(*args, int); break;
-#ifdef I_STDINT
-               case 'j':       iv = va_arg(*args, intmax_t); break;
-#endif
-               case 'q':
-#if IVSIZE >= 8
-                               iv = va_arg(*args, Quad_t); break;
-#else
-                               goto unknown;
-#endif
-               }
-           }
-           else {
-               IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
-               switch (intsize) {
-               case 'c':       iv = (char)tiv; break;
-               case 'h':       iv = (short)tiv; break;
-               case 'l':       iv = (long)tiv; break;
-               case 'V':
-               default:        iv = tiv; break;
-               case 'q':
-#if IVSIZE >= 8
-                               iv = (Quad_t)tiv; break;
-#else
-                               goto unknown;
-#endif
-               }
-           }
-           if ( !vectorize )   /* we already set uv above */
-           {
-               if (iv >= 0) {
-                   uv = iv;
-                   if (plus)
-                       esignbuf[esignlen++] = plus;
-               }
-               else {
-                   uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
-                   esignbuf[esignlen++] = '-';
-               }
-           }
-           base = 10;
-           goto integer;
+            base = -10;
+            goto get_int_arg_val;
 
        case 'U':
 #ifdef IV_IS_QUAD
@@ -12485,12 +12424,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
            /* FALLTHROUGH */
        case 'u':
            base = 10;
-           goto uns_integer;
+           goto get_int_arg_val;
 
        case 'B':
        case 'b':
            base = 2;
-           goto uns_integer;
+           goto get_int_arg_val;
 
        case 'O':
 #ifdef IV_IS_QUAD
@@ -12501,16 +12440,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
            /* FALLTHROUGH */
        case 'o':
            base = 8;
-           goto uns_integer;
+           goto get_int_arg_val;
 
        case 'X':
        case 'x':
            base = 16;
 
-       uns_integer:
+          get_int_arg_val:
+
            if (vectorize) {
                STRLEN ulen;
-       vector:
+
+                if (base < 0) {
+                    base = -base;
+                    if (plus)
+                         esignbuf[esignlen++] = plus;
+                }
+
+             vector:
                if (!veclen)
                     goto donevalidconversion;
                if (vec_utf8)
@@ -12523,46 +12470,119 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                vecstr += ulen;
                veclen -= ulen;
            }
-           else if (args) {
-               switch (intsize) {
-               case 'c':  uv = (unsigned char)va_arg(*args, unsigned); break;
-               case 'h':  uv = (unsigned short)va_arg(*args, unsigned); break;
-               case 'l':  uv = va_arg(*args, unsigned long); break;
-               case 'V':  uv = va_arg(*args, UV); break;
-               case 'z':  uv = va_arg(*args, Size_t); break;
+           else {
+                /* test arg for inf/nan. This can trigger an unwanted
+                 * 'str' overload, so manually force 'num' overload first
+                 * if necessary */
+                if (argsv) {
+                    SvGETMAGIC(argsv);
+                    if (UNLIKELY(SvAMAGIC(argsv)))
+                        argsv = sv_2num(argsv);
+                    if (UNLIKELY(isinfnansv(argsv)))
+                        goto handle_infnan_argsv;
+                }
+
+                if (base < 0) {
+                    /* signed int type */
+                    base = -base;
+                    if (args) {
+                        switch (intsize) {
+                        case 'c':  iv = (char)va_arg(*args, int);  break;
+                        case 'h':  iv = (short)va_arg(*args, int); break;
+                        case 'l':  iv = va_arg(*args, long);       break;
+                        case 'V':  iv = va_arg(*args, IV);         break;
+                        case 'z':  iv = va_arg(*args, SSize_t);    break;
 #ifdef HAS_PTRDIFF_T
-               case 't':  uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
+                        case 't':  iv = va_arg(*args, ptrdiff_t);  break;
 #endif
+                        default:   iv = va_arg(*args, int);        break;
 #ifdef I_STDINT
-               case 'j':  uv = va_arg(*args, uintmax_t); break;
+                        case 'j':  iv = va_arg(*args, intmax_t);   break;
 #endif
-               default:   uv = va_arg(*args, unsigned); break;
-               case 'q':
+                        case 'q':
 #if IVSIZE >= 8
-                          uv = va_arg(*args, Uquad_t); break;
+                                   iv = va_arg(*args, Quad_t);     break;
 #else
-                          goto unknown;
+                                   goto unknown;
 #endif
-               }
-           }
-           else {
-               UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
-               switch (intsize) {
-               case 'c':       uv = (unsigned char)tuv; break;
-               case 'h':       uv = (unsigned short)tuv; break;
-               case 'l':       uv = (unsigned long)tuv; break;
-               case 'V':
-               default:        uv = tuv; break;
-               case 'q':
+                        }
+                    }
+                    else {
+                        IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
+                        switch (intsize) {
+                        case 'c':  iv = (char)tiv;   break;
+                        case 'h':  iv = (short)tiv;  break;
+                        case 'l':  iv = (long)tiv;   break;
+                        case 'V':
+                        default:   iv = tiv;         break;
+                        case 'q':
 #if IVSIZE >= 8
-                               uv = (Uquad_t)tuv; break;
+                                   iv = (Quad_t)tiv; break;
 #else
-                               goto unknown;
+                                   goto unknown;
 #endif
-               }
-           }
+                        }
+                    }
 
-       integer:
+                    /* now convert iv to uv */
+                    if (iv >= 0) {
+                        uv = iv;
+                        if (plus)
+                            esignbuf[esignlen++] = plus;
+                    }
+                    else {
+                        uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
+                        esignbuf[esignlen++] = '-';
+                    }
+                }
+                else {
+                    /* unsigned int type */
+                    if (args) {
+                        switch (intsize) {
+                        case 'c': uv = (unsigned char)va_arg(*args, unsigned);
+                                  break;
+                        case 'h': uv = (unsigned short)va_arg(*args, unsigned);
+                                  break;
+                        case 'l': uv = va_arg(*args, unsigned long); break;
+                        case 'V': uv = va_arg(*args, UV);            break;
+                        case 'z': uv = va_arg(*args, Size_t);        break;
+#ifdef HAS_PTRDIFF_T
+                                  /* will sign extend, but there is no
+                                   * uptrdiff_t, so oh well */
+                        case 't': uv = va_arg(*args, ptrdiff_t);     break;
+#endif
+#ifdef I_STDINT
+                        case 'j': uv = va_arg(*args, uintmax_t);     break;
+#endif
+                        default:  uv = va_arg(*args, unsigned);      break;
+                        case 'q':
+#if IVSIZE >= 8
+                                  uv = va_arg(*args, Uquad_t);       break;
+#else
+                                  goto unknown;
+#endif
+                        }
+                    }
+                    else {
+                        UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
+                        switch (intsize) {
+                        case 'c': uv = (unsigned char)tuv;  break;
+                        case 'h': uv = (unsigned short)tuv; break;
+                        case 'l': uv = (unsigned long)tuv;  break;
+                        case 'V':
+                        default:  uv = tuv;                 break;
+                        case 'q':
+#if IVSIZE >= 8
+                                  uv = (Uquad_t)tuv;        break;
+#else
+                                  goto unknown;
+#endif
+                        }
+                    }
+                }
+            }
+
+       do_integer:
            {
                char *ptr = ebuf + sizeof ebuf;
                bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */
@@ -12599,6 +12619,28 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                        esignbuf[esignlen++] = c;
                    }
                    break;
+
+               case 1:
+                    /* special-case: base 1 indicates a 'c' format:
+                     * we use the common code for extracting a uv,
+                     * but handle that value differently here than
+                     * all the other int types */
+                    if ((uv > 255 ||
+                         (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
+                        && !IN_BYTES)
+                    {
+                        assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
+                        eptr = ebuf;
+                        elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+                        is_utf8 = TRUE;
+                    }
+                    else {
+                        c = (char)uv;
+                        eptr = &c;
+                        elen = 1;
+                    }
+                    goto string;
+
                default:                /* it had better be ten or less */
                    do {
                        dig = uv % base;