Revert "Remove some deprecated functions from mathoms.c"

[perl5.git] / op.c
diff --git a/op.c b/op.c

index 1d31928..ddeb484 100644 (file)
--- a/op.c
+++ b/op.c
@@ -1223,8 +1223,7 @@ S_cop_free(pTHX_ COP* cop)
  }
  
  STATIC void
-S_forget_pmop(pTHX_ PMOP *const o
-             )
+S_forget_pmop(pTHX_ PMOP *const o)
  {
      HV * const pmstash = PmopSTASH(o);
  
@@ -1545,7 +1544,8 @@ Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP* other)
      OpTYPE_set(logop, type);
      logop->op_first = first;
      logop->op_other = other;
-    logop->op_flags = OPf_KIDS;
+    if (first)
+        logop->op_flags = OPf_KIDS;
      while (kid && OpHAS_SIBLING(kid))
          kid = OpSIBLING(kid);
      if (kid)
@@ -1949,6 +1949,11 @@ Perl_scalarvoid(pTHX_ OP *arg)
              if (o->op_type == OP_REPEAT)
                  scalar(cBINOPo->op_first);
              goto func_ops;
+       case OP_CONCAT:
+            if ((o->op_flags & OPf_STACKED) &&
+                   !(o->op_private & OPpCONCAT_NESTED))
+                break;
+           goto func_ops;
          case OP_SUBSTR:
              if (o->op_private == 4)
                  break;
@@ -2662,6 +2667,7 @@ S_maybe_multiconcat(pTHX_ OP *o)
  
      SSize_t nargs  = 0;
      SSize_t nconst = 0;
+    SSize_t nadjconst  = 0; /* adjacent consts - may be demoted to args */
      STRLEN variant;
      bool utf8 = FALSE;
      bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
@@ -2673,6 +2679,7 @@ S_maybe_multiconcat(pTHX_ OP *o)
      U8 private_flags  = 0;   /* ... op_private of the multiconcat op */
      bool is_sprintf = FALSE; /* we're optimising an sprintf */
      bool is_targable  = FALSE; /* targetop is an OPpTARGET_MY candidate */
+    bool prev_was_const = FALSE; /* previous arg was a const */
  
      /* -----------------------------------------------------------------
       * Phase 1:
@@ -2889,7 +2896,7 @@ S_maybe_multiconcat(pTHX_ OP *o)
              last = TRUE;
          }
  
-        if (   nargs              >  PERL_MULTICONCAT_MAXARG        - 2
+        if (   nargs + nadjconst  >  PERL_MULTICONCAT_MAXARG        - 2
              || (argp - args + 1)  > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
          {
              /* At least two spare slots are needed to decompose both
@@ -2920,10 +2927,16 @@ S_maybe_multiconcat(pTHX_ OP *o)
              argp++->p = sv;
              utf8   |= cBOOL(SvUTF8(sv));
              nconst++;
+            if (prev_was_const)
+                /* this const may be demoted back to a plain arg later;
+                 * make sure we have enough arg slots left */
+                nadjconst++;
+            prev_was_const = !prev_was_const;
          }
          else {
              argp++->p = NULL;
              nargs++;
+            prev_was_const = FALSE;
          }
  
          if (last)
@@ -3190,16 +3203,16 @@ S_maybe_multiconcat(pTHX_ OP *o)
              OP *prev;
  
              /* set prev to the sibling *before* the arg to be cut out,
-             * e.g.:
+             * e.g. when cutting EXPR:
               *
               *         |
-             * kid=  CONST
+             * kid=  CONCAT
               *         |
-             * prev= CONST -- EXPR
+             * prev= CONCAT -- EXPR
               *         |
               */
              if (argp == args && kid->op_type != OP_CONCAT) {
-                /* in e.g. '$x . = f(1)' there's no RHS concat tree
+                /* in e.g. '$x .= f(1)' there's no RHS concat tree
                   * so the expression to be cut isn't kid->op_last but
                   * kid itself */
                  OP *o1, *o2;
@@ -4067,7 +4080,10 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags)
      case OP_RV2HV:
         if (type == OP_REFGEN && o->op_flags & OPf_PARENS) {
             PL_modcount = RETURN_UNLIMITED_NUMBER;
-           return o;           /* Treat \(@foo) like ordinary list. */
+           /* Treat \(@foo) like ordinary list, but still mark it as modi-
+              fiable since some contexts need to know.  */
+           o->op_flags |= OPf_MOD;
+           return o;
         }
         /* FALLTHROUGH */
      case OP_RV2GV:
@@ -4132,7 +4148,12 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags)
      case OP_PADHV:
         PL_modcount = RETURN_UNLIMITED_NUMBER;
         if (type == OP_REFGEN && o->op_flags & OPf_PARENS)
-           return o;           /* Treat \(@foo) like ordinary list. */
+       {
+           /* Treat \(@foo) like ordinary list, but still mark it as modi-
+              fiable since some contexts need to know.  */
+           o->op_flags |= OPf_MOD;
+           return o;
+       }
         if (scalar_mod_type(o, type))
             goto nomod;
         if ((o->op_flags & OPf_WANT) != OPf_WANT_SCALAR
@@ -6280,6 +6301,10 @@ Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last)
      return fold_constants(op_integerize(op_std_init((OP *)binop)));
  }
  
+/* Helper function for S_pmtrans(): comparison function to sort an array
+ * of codepoint range pairs. Sorts by start point, or if equal, by end
+ * point */
+
  static int uvcompare(const void *a, const void *b)
      __attribute__nonnull__(1)
      __attribute__nonnull__(2)
@@ -6297,24 +6322,39 @@ static int uvcompare(const void *a, const void *b)
      return 0;
  }
  
+/* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl
+ * containing the search and replacement strings, assemble into
+ * a translation table attached as o->op_pv.
+ * Free expr and repl.
+ * It expects the toker to have already set the
+ *   OPpTRANS_COMPLEMENT
+ *   OPpTRANS_SQUASH
+ *   OPpTRANS_DELETE
+ * flags as appropriate; this function may add
+ *   OPpTRANS_FROM_UTF
+ *   OPpTRANS_TO_UTF
+ *   OPpTRANS_IDENTICAL
+ *   OPpTRANS_GROWS
+ * flags
+ */
+
  static OP *
  S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
  {
      SV * const tstr = ((SVOP*)expr)->op_sv;
-    SV * const rstr =
-                             ((SVOP*)repl)->op_sv;
+    SV * const rstr = ((SVOP*)repl)->op_sv;
      STRLEN tlen;
      STRLEN rlen;
      const U8 *t = (U8*)SvPV_const(tstr, tlen);
      const U8 *r = (U8*)SvPV_const(rstr, rlen);
-    I32 i;
-    I32 j;
-    I32 grows = 0;
-    short *tbl;
-
-    const I32 complement = o->op_private & OPpTRANS_COMPLEMENT;
-    const I32 squash     = o->op_private & OPpTRANS_SQUASH;
-    I32 del              = o->op_private & OPpTRANS_DELETE;
+    Size_t i, j;
+    bool grows = FALSE;
+    OPtrans_map *tbl;
+    SSize_t struct_size; /* malloced size of table struct */
+
+    const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT);
+    const bool squash     = cBOOL(o->op_private & OPpTRANS_SQUASH);
+    const bool del        = cBOOL(o->op_private & OPpTRANS_DELETE);
      SV* swash;
  
      PERL_ARGS_ASSERT_PMTRANS;
@@ -6328,6 +6368,14 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
          o->op_private |= OPpTRANS_TO_UTF;
  
      if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
+
+        /* for utf8 translations, op_sv will be set to point to a swash
+         * containing codepoint ranges. This is done by first assembling
+         * a textual representation of the ranges in listsv then compiling
+         * it using swash_init(). For more details of the textual format,
+         * see L<perlunicode.pod/"User-Defined Character Properties"> .
+         */
+
         SV* const listsv = newSVpvs("# comment\n");
         SV* transv = NULL;
         const U8* tend = t + tlen;
@@ -6369,15 +6417,24 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
   * odd.  */
  
         if (complement) {
+            /* utf8 and /c:
+             * replace t/tlen/tend with a version that has the ranges
+             * complemented
+             */
             U8 tmpbuf[UTF8_MAXBYTES+1];
             UV *cp;
             UV nextmin = 0;
             Newx(cp, 2*tlen, UV);
             i = 0;
             transv = newSVpvs("");
+
+            /* convert search string into array of (start,end) range
+             * codepoint pairs stored in cp[]. Most "ranges" will start
+             * and end at the same char */
             while (t < tend) {
                 cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
                 t += ulen;
+                /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */
                 if (t < tend && *t == ILLEGAL_UTF8_BYTE) {
                     t++;
                     cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
@@ -6388,7 +6445,19 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                 }
                 i++;
             }
+
+            /* sort the ranges */
             qsort(cp, i, 2*sizeof(UV), uvcompare);
+
+            /* Create a utf8 string containing the complement of the
+             * codepoint ranges. For example if cp[] contains [A,B], [C,D],
+             * then transv will contain the equivalent of:
+             * join '', map chr, 0,     ILLEGAL_UTF8_BYTE, A - 1,
+             *                   B + 1, ILLEGAL_UTF8_BYTE, C - 1,
+             *                   D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff;
+             * A range of a single char skips the ILLEGAL_UTF8_BYTE and
+             * end cp.
+             */
             for (j = 0; j < i; j++) {
                 UV  val = cp[2*j];
                 diff = val - nextmin;
@@ -6406,6 +6475,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                 if (val >= nextmin)
                     nextmin = val + 1;
             }
+
             t = uvchr_to_utf8(tmpbuf,nextmin);
             sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
             {
@@ -6422,6 +6492,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
         else if (!rlen && !del) {
             r = t; rlen = tlen; rend = tend;
         }
+
         if (!squash) {
                 if ((!rlen && !del) || t == r ||
                     (tlen == rlen && memEQ((char *)t, (char *)r, tlen)))
@@ -6430,6 +6501,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                 }
         }
  
+        /* extract char ranges from t and r and append them to listsv */
+
         while (t < tend || tfirst <= tlast) {
             /* see if we need more "t" chars */
             if (tfirst > tlast) {
@@ -6502,9 +6575,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
             tfirst += diff + 1;
         }
  
+        /* compile listsv into a swash and attach to o */
+
         none = ++max;
         if (del)
-           del = ++max;
+           ++max;
  
         if (max > 0xffff)
             bits = 32;
@@ -6543,50 +6618,88 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
         goto warnins;
      }
  
-    tbl = (short*)PerlMemShared_calloc(
-       (o->op_private & OPpTRANS_COMPLEMENT) &&
-           !(o->op_private & OPpTRANS_DELETE) ? 258 : 256,
-       sizeof(short));
+    /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup
+     * table. Entries with the value -1 indicate chars not to be
+     * translated, while -2 indicates a search char without a
+     * corresponding replacement char under /d.
+     *
+     * Normally, the table has 256 slots. However, in the presence of
+     * /c, the search charlist has an implicit \x{100}-\x{7fffffff}
+     * added, and if there are enough replacement chars to start pairing
+     * with the \x{100},... search chars, then a larger (> 256) table
+     * is allocated.
+     *
+     * In addition, regardless of whether under /c, an extra slot at the
+     * end is used to store the final repeating char, or -3 under an empty
+     * replacement list, or -2 under /d; which makes the runtime code
+     * easier.
+     *
+     * The toker will have already expanded char ranges in t and r.
+     */
+
+    /* Initially allocate 257-slot table: 256 for basic (non /c) usage,
+     * plus final slot for repeat/-2/-3. Later we realloc if excess > * 0.
+     * The OPtrans_map struct already contains one slot; hence the -1.
+     */
+    struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short);
+    tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1);
+    tbl->size = 256;
      cPVOPo->op_pv = (char*)tbl;
+
      if (complement) {
-       for (i = 0; i < (I32)tlen; i++)
-           tbl[t[i]] = -1;
+        Size_t excess;
+
+        /* in this branch, j is a count of 'consumed' (i.e. paired off
+         * with a search char) replacement chars (so j <= rlen always)
+         */
+       for (i = 0; i < tlen; i++)
+           tbl->map[t[i]] = -1;
+
         for (i = 0, j = 0; i < 256; i++) {
-           if (!tbl[i]) {
-               if (j >= (I32)rlen) {
+           if (!tbl->map[i]) {
+               if (j == rlen) {
                     if (del)
-                       tbl[i] = -2;
+                       tbl->map[i] = -2;
                     else if (rlen)
-                       tbl[i] = r[j-1];
+                       tbl->map[i] = r[j-1];
                     else
-                       tbl[i] = (short)i;
+                       tbl->map[i] = (short)i;
                 }
                 else {
-                   if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j]))
-                       grows = 1;
-                   tbl[i] = r[j++];
+                   tbl->map[i] = r[j++];
                 }
+                if (   tbl->map[i] >= 0
+                    &&  UVCHR_IS_INVARIANT((UV)i)
+                    && !UVCHR_IS_INVARIANT((UV)(tbl->map[i]))
+                )
+                    grows = TRUE;
             }
         }
-       if (!del) {
-           if (!rlen) {
-               j = rlen;
-               if (!squash)
-                   o->op_private |= OPpTRANS_IDENTICAL;
-           }
-           else if (j >= (I32)rlen)
-               j = rlen - 1;
-           else {
-               tbl = 
-                   (short *)
-                   PerlMemShared_realloc(tbl,
-                                         (0x101+rlen-j) * sizeof(short));
-               cPVOPo->op_pv = (char*)tbl;
-           }
-           tbl[0x100] = (short)(rlen - j);
-           for (i=0; i < (I32)rlen - j; i++)
-               tbl[0x101+i] = r[j+i];
-       }
+
+        ASSUME(j <= rlen);
+        excess = rlen - j;
+
+        if (excess) {
+            /* More replacement chars than search chars:
+             * store excess replacement chars at end of main table.
+             */
+
+            struct_size += excess;
+            tbl = (OPtrans_map*)PerlMemShared_realloc(tbl,
+                        struct_size + excess * sizeof(short));
+            tbl->size += excess;
+            cPVOPo->op_pv = (char*)tbl;
+
+            for (i = 0; i < excess; i++)
+                tbl->map[i + 256] = r[j+i];
+        }
+        else {
+            /* no more replacement chars than search chars */
+            if (!rlen && !del && !squash)
+                o->op_private |= OPpTRANS_IDENTICAL;
+        }
+
+        tbl->map[tbl->size] = del ? -2 : rlen ? r[rlen - 1] : -3;
      }
      else {
         if (!rlen && !del) {
@@ -6597,26 +6710,30 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
         else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) {
             o->op_private |= OPpTRANS_IDENTICAL;
         }
+
         for (i = 0; i < 256; i++)
-           tbl[i] = -1;
-       for (i = 0, j = 0; i < (I32)tlen; i++,j++) {
-           if (j >= (I32)rlen) {
+           tbl->map[i] = -1;
+       for (i = 0, j = 0; i < tlen; i++,j++) {
+           if (j >= rlen) {
                 if (del) {
-                   if (tbl[t[i]] == -1)
-                       tbl[t[i]] = -2;
+                   if (tbl->map[t[i]] == -1)
+                       tbl->map[t[i]] = -2;
                     continue;
                 }
                 --j;
             }
-           if (tbl[t[i]] == -1) {
+           if (tbl->map[t[i]] == -1) {
                  if (     UVCHR_IS_INVARIANT(t[i])
                      && ! UVCHR_IS_INVARIANT(r[j]))
-                   grows = 1;
-               tbl[t[i]] = r[j];
+                   grows = TRUE;
+               tbl->map[t[i]] = r[j];
             }
         }
+        tbl->map[tbl->size] = del ? -1 : rlen ? -1 : -3;
      }
  
+    /* both non-utf8 and utf8 code paths end up here */
+
    warnins:
      if(del && rlen == tlen) {
         Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator"); 
@@ -6632,6 +6749,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
      return o;
  }
  
+
  /*
  =for apidoc Am|OP *|newPMOP|I32 type|I32 flags
  
@@ -6833,9 +6951,15 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor)
                 op_null(scope);
             }
  
-           if (is_compiletime)
-               /* runtime finalizes as part of finalizing whole tree */
-                optimize_optree(o);
+            /* XXX optimize_optree() must be called on o before
+             * CALL_PEEP(), as currently S_maybe_multiconcat() can't
+             * currently cope with a peephole-optimised optree.
+             * Calling optimize_optree() here ensures that condition
+             * is met, but may mean optimize_optree() is applied
+             * to the same optree later (where hopefully it won't do any
+             * harm as it can't convert an op to multiconcat if it's
+             * already been converted */
+            optimize_optree(o);
  
             /* have to peep the DOs individually as we've removed it from
              * the op_next chain */
@@ -8948,6 +9072,13 @@ S_looks_like_bool(pTHX_ const OP *o)
         case OP_FLOP:
  
             return TRUE;
+
+       case OP_INDEX:
+       case OP_RINDEX:
+            /* optimised-away (index() != -1) or similar comparison */
+            if (o->op_private & OPpTRUEBOOL)
+                return TRUE;
+            return FALSE;
         
         case OP_CONST:
             /* Detect comparisons that have been optimized away */
@@ -8957,7 +9088,6 @@ S_looks_like_bool(pTHX_ const OP *o)
                 return TRUE;
             else
                 return FALSE;
-
         /* FALLTHROUGH */
         default:
             return FALSE;
@@ -9728,9 +9858,12 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs,
            Also, we may be called from load_module at run time, so
            PL_curstash (which sets CvSTASH) may not point to the stash the
            sub is stored in.  */
+       /* XXX This optimization is currently disabled for packages other
+              than main, since there was too much CPAN breakage.  */
         const I32 flags =
            ec ? GV_NOADD_NOINIT
               :   (IN_PERL_RUNTIME && PL_curstash != CopSTASH(PL_curcop))
+              || PL_curstash != PL_defstash
                || memchr(name, ':', namlen) || memchr(name, '\'', namlen)
                     ? gv_fetch_flags
                     : GV_ADDMULTI | GV_NOINIT | GV_NOTQUAL;
@@ -10907,6 +11040,7 @@ Perl_ck_backtick(pTHX_ OP *o)
      OP *newop = NULL;
      OP *sibl;
      PERL_ARGS_ASSERT_CK_BACKTICK;
+    o = ck_fun(o);
      /* qx and `` have a null pushmark; CORE::readpipe has only one kid. */
      if (o->op_flags & OPf_KIDS && (sibl = OpSIBLING(cUNOPo->op_first))
       && (gv = gv_override("readpipe",8)))
@@ -10932,12 +11066,6 @@ Perl_ck_bitop(pTHX_ OP *o)
  
      o->op_private = (U8)(PL_hints & HINT_INTEGER);
  
-    if (o->op_type == OP_NBIT_OR     || o->op_type == OP_SBIT_OR
-     || o->op_type == OP_NBIT_XOR    || o->op_type == OP_SBIT_XOR
-     || o->op_type == OP_NBIT_AND    || o->op_type == OP_SBIT_AND
-     || o->op_type == OP_NCOMPLEMENT || o->op_type == OP_SCOMPLEMENT)
-       Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__BITWISE),
-                             "The bitwise feature is experimental");
      if (!(o->op_flags & OPf_STACKED) /* Not an assignment */
             && OP_IS_INFIX_BIT(o->op_type))
      {