5.30.3 on Monday

[perl5.git] / op.c
diff --git a/op.c b/op.c

index 322d6d6..0ddc710 100644 (file)
--- a/op.c
+++ b/op.c
@@ -699,8 +699,6 @@ S_bad_type_pv(pTHX_ I32 n, const char *t, const OP *o, const OP *kid)
                  (int)n, PL_op_desc[(o)->op_type], t, OP_DESC(kid)), 0);
  }
  
-/* remove flags var, its unused in all callers, move to to right end since gv
-  and kid are always the same */
  STATIC void
  S_bad_type_gv(pTHX_ I32 n, GV *gv, const OP *kid, const char *t)
  {
@@ -1434,7 +1432,7 @@ op_sibling nodes.  By analogy with the perl-level C<splice()> function, allows
  you to delete zero or more sequential nodes, replacing them with zero or
  more different nodes.  Performs the necessary op_first/op_last
  housekeeping on the parent node and op_sibling manipulation on the
-children.  The last deleted node will be marked as as the last node by
+children.  The last deleted node will be marked as the last node by
  updating the op_sibling/op_sibparent or op_moresib field as appropriate.
  
  Note that op_next is not manipulated, and nodes are not freed; that is the
@@ -2991,7 +2989,7 @@ S_maybe_multiconcat(pTHX_ OP *o)
      }
  
      if (targetop) {
-        /* Can targetop (the LHS) if it's a padsv, be be optimised
+        /* Can targetop (the LHS) if it's a padsv, be optimised
           * away and use OPpTARGET_MY instead?
           */
          if (    (targetop->op_type == OP_PADSV)
@@ -3239,7 +3237,7 @@ S_maybe_multiconcat(pTHX_ OP *o)
       *  X .= Y
       *
       * otherwise we could be doing something like $x = "foo", which
-     * if treated as as a concat, would fail to COW.
+     * if treated as a concat, would fail to COW.
       */
      if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
          return;
@@ -6970,7 +6968,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
       * One of the important characteristics to know about the input is whether
       * the transliteration may be done in place, or does a temporary need to be
       * allocated, then copied.  If the replacement for every character in every
-     * possible string takes up no more bytes than the the character it
+     * possible string takes up no more bytes than the character it
       * replaces, then it can be edited in place.  Otherwise the replacement
       * could overwrite a byte we are about to read, depending on the strings
       * being processed.  The comments and variable names here refer to this as
@@ -7063,12 +7061,13 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
       * these up into smaller chunks, but doesn't merge any together.  This
       * makes it easy to find the instances it's looking for.  A second pass is
       * done after this has been determined which merges things together to
-     * shrink the table for runtime.  For ASCII platforms, the table is
-     * trivial, given below, and uses the fundamental characteristics of UTF-8
-     * to construct the values.  For EBCDIC, it isn't so, and we rely on a
-     * table constructed by the perl script that generates these kinds of
-     * things */
-#ifndef EBCDIC
+     * shrink the table for runtime.  The table below is used for both ASCII
+     * and EBCDIC platforms.  On EBCDIC, the byte length is not monotonically
+     * increasing for code points below 256.  To correct for that, the macro
+     * CP_ADJUST defined below converts those code points to ASCII in the first
+     * pass, and we use the ASCII partition values.  That works because the
+     * growth factor will be unaffected, which is all that is calculated during
+     * the first pass. */
      UV PL_partition_by_byte_length[] = {
          0,
          0x80,   /* Below this is 1 byte representations */
@@ -7085,8 +7084,6 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
  
      };
  
-#endif
-
      PERL_ARGS_ASSERT_PMTRANS;
  
      PL_hints |= HINT_BLOCK_SCOPE;
@@ -7214,6 +7211,21 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
              t_array = invlist_array(t_invlist);
          }
  
+/* As noted earlier, we convert EBCDIC code points to Unicode in the first pass
+ * so as to get the well-behaved length 1 vs length 2 boundary.  Only code
+ * points below 256 differ between the two character sets in this regard.  For
+ * these, we also can't have any ranges, as they have to be individually
+ * converted. */
+#ifdef EBCDIC
+#  define CP_ADJUST(x)          ((pass2) ? (x) : NATIVE_TO_UNI(x))
+#  define FORCE_RANGE_LEN_1(x)  ((pass2) ? 0 : ((x) < 256))
+#  define CP_SKIP(x)            ((pass2) ? UVCHR_SKIP(x) : OFFUNISKIP(x))
+#else
+#  define CP_ADJUST(x)          (x)
+#  define FORCE_RANGE_LEN_1(x)  0
+#  define CP_SKIP(x)            UVCHR_SKIP(x)
+#endif
+
          /* And the mapping of each of the ranges is initialized.  Initially,
           * everything is TR_UNLISTED. */
          for (i = 0; i < len; i++) {
@@ -7347,7 +7359,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
  
                      /* Here, not in the middle of a range, and not UTF-8.  The
                       * next code point is the single byte where we're at */
-                    t_cp = *t;
+                    t_cp = CP_ADJUST(*t);
                      t_range_count = 1;
                      t++;
                  }
@@ -7358,7 +7370,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                       * next code point is the next UTF-8 char in the input.  We
                       * know the input is valid, because the toker constructed
                       * it */
-                    t_cp = valid_utf8_to_uvchr(t, &t_char_len);
+                    t_cp = CP_ADJUST(valid_utf8_to_uvchr(t, &t_char_len));
                      t += t_char_len;
  
                      /* UTF-8 strings (only) have been parsed in toke.c to have
@@ -7366,7 +7378,9 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                       * the first element of a range.  If so, get the final
                       * element and calculate the range size.  If not, the range
                       * size is 1 */
-                    if (t < tend && *t == RANGE_INDICATOR) {
+                    if (   t < tend && *t == RANGE_INDICATOR
+                        && ! FORCE_RANGE_LEN_1(t_cp))
+                    {
                          t++;
                          t_range_count = valid_utf8_to_uvchr(t, &t_char_len)
                                        - t_cp + 1;
@@ -7398,16 +7412,18 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                  }
                  else {
                      if (! rstr_utf8) {
-                        r_cp = *r;
+                        r_cp = CP_ADJUST(*r);
                          r_range_count = 1;
                          r++;
                      }
                      else {
                          Size_t r_char_len;
  
-                        r_cp = valid_utf8_to_uvchr(r, &r_char_len);
+                        r_cp = CP_ADJUST(valid_utf8_to_uvchr(r, &r_char_len));
                          r += r_char_len;
-                        if (r < rend && *r == RANGE_INDICATOR) {
+                        if (   r < rend && *r == RANGE_INDICATOR
+                            && ! FORCE_RANGE_LEN_1(r_cp))
+                        {
                              r++;
                              r_range_count = valid_utf8_to_uvchr(r,
                                                      &r_char_len) - r_cp + 1;
@@ -7539,7 +7555,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                   * code point in the rhs against any code point in the lhs. */
                  if ( ! pass2
                      && r_cp_end != TR_SPECIAL_HANDLING
-                    && UVCHR_SKIP(t_cp_end) < UVCHR_SKIP(r_cp_end))
+                    && CP_SKIP(t_cp_end) < CP_SKIP(r_cp_end))
                  {
                      /* Here, we will need to make a copy of the input string
                       * before doing the transliteration.  The worst possible
@@ -7562,8 +7578,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                       * string not being UTF-8 */
                      NV t_size = (can_force_utf8 && t_cp < 256)
                                  ? 1
-                                : UVCHR_SKIP(t_cp_end);
-                    NV ratio = UVCHR_SKIP(r_cp_end) / t_size;
+                                : CP_SKIP(t_cp_end);
+                    NV ratio = CP_SKIP(r_cp_end) / t_size;
  
                      o->op_private |= OPpTRANS_GROWS;
  
@@ -7596,8 +7612,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                       * is if it 'grows'.  But in the 2nd pass, there's no
                       * reason to not merge */
                      if (   (i > 0 && (   pass2
-                                      || UVCHR_SKIP(t_array[i-1])
-                                                        == UVCHR_SKIP(t_cp)))
+                                      || CP_SKIP(t_array[i-1])
+                                                            == CP_SKIP(t_cp)))
                          && (   (   r_cp == TR_SPECIAL_HANDLING
                                  && r_map[i-1] == TR_SPECIAL_HANDLING)
                              || (   r_cp != TR_SPECIAL_HANDLING
@@ -7617,7 +7633,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                      adjacent_to_range_above = TRUE;
                      if (i + 1 < len)
                      if (    (   pass2
-                             || UVCHR_SKIP(t_cp) == UVCHR_SKIP(t_array[i+1]))
+                             || CP_SKIP(t_cp) == CP_SKIP(t_array[i+1]))
                          && (   (   r_cp == TR_SPECIAL_HANDLING
                                  && r_map[i+1] == (UV) TR_SPECIAL_HANDLING)
                              || (   r_cp != TR_SPECIAL_HANDLING
@@ -15110,7 +15126,7 @@ Perl_ck_subr(pTHX_ OP *o)
         if (CvISXSUB(cv) || !CvROOT(cv))
             S_entersub_alloc_targ(aTHX_ o);
         if (!namegv) {
-           /* The original call checker API guarantees that a GV will be
+           /* The original call checker API guarantees that a GV will
                be provided with the right name.  So, if the old API was
                used (or the REQUIRE_GV flag was passed), we have to reify
                the CV’s GV, unless this is an anonymous sub.  This is not
@@ -15749,7 +15765,7 @@ S_aassign_scan(pTHX_ OP* o, bool rhs, int *scalars_p)
          }
  
          /* if its an unrecognised, non-dangerous op, assume that it
-         * it the cause of at least one safe scalar */
+         * is the cause of at least one safe scalar */
          (*scalars_p)++;
          flags = AAS_SAFE_SCALAR;
          break;