(int)n, PL_op_desc[(o)->op_type], t, OP_DESC(kid)), 0);
}
-/* remove flags var, its unused in all callers, move to to right end since gv
- and kid are always the same */
STATIC void
S_bad_type_gv(pTHX_ I32 n, GV *gv, const OP *kid, const char *t)
{
you to delete zero or more sequential nodes, replacing them with zero or
more different nodes. Performs the necessary op_first/op_last
housekeeping on the parent node and op_sibling manipulation on the
-children. The last deleted node will be marked as as the last node by
+children. The last deleted node will be marked as the last node by
updating the op_sibling/op_sibparent or op_moresib field as appropriate.
Note that op_next is not manipulated, and nodes are not freed; that is the
}
if (targetop) {
- /* Can targetop (the LHS) if it's a padsv, be be optimised
+ /* Can targetop (the LHS) if it's a padsv, be optimised
* away and use OPpTARGET_MY instead?
*/
if ( (targetop->op_type == OP_PADSV)
* X .= Y
*
* otherwise we could be doing something like $x = "foo", which
- * if treated as as a concat, would fail to COW.
+ * if treated as a concat, would fail to COW.
*/
if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
return;
* One of the important characteristics to know about the input is whether
* the transliteration may be done in place, or does a temporary need to be
* allocated, then copied. If the replacement for every character in every
- * possible string takes up no more bytes than the the character it
+ * possible string takes up no more bytes than the character it
* replaces, then it can be edited in place. Otherwise the replacement
* could overwrite a byte we are about to read, depending on the strings
* being processed. The comments and variable names here refer to this as
* these up into smaller chunks, but doesn't merge any together. This
* makes it easy to find the instances it's looking for. A second pass is
* done after this has been determined which merges things together to
- * shrink the table for runtime. For ASCII platforms, the table is
- * trivial, given below, and uses the fundamental characteristics of UTF-8
- * to construct the values. For EBCDIC, it isn't so, and we rely on a
- * table constructed by the perl script that generates these kinds of
- * things */
-#ifndef EBCDIC
+ * shrink the table for runtime. The table below is used for both ASCII
+ * and EBCDIC platforms. On EBCDIC, the byte length is not monotonically
+ * increasing for code points below 256. To correct for that, the macro
+ * CP_ADJUST defined below converts those code points to ASCII in the first
+ * pass, and we use the ASCII partition values. That works because the
+ * growth factor will be unaffected, which is all that is calculated during
+ * the first pass. */
UV PL_partition_by_byte_length[] = {
0,
0x80, /* Below this is 1 byte representations */
};
-#endif
-
PERL_ARGS_ASSERT_PMTRANS;
PL_hints |= HINT_BLOCK_SCOPE;
t_array = invlist_array(t_invlist);
}
+/* As noted earlier, we convert EBCDIC code points to Unicode in the first pass
+ * so as to get the well-behaved length 1 vs length 2 boundary. Only code
+ * points below 256 differ between the two character sets in this regard. For
+ * these, we also can't have any ranges, as they have to be individually
+ * converted. */
+#ifdef EBCDIC
+# define CP_ADJUST(x) ((pass2) ? (x) : NATIVE_TO_UNI(x))
+# define FORCE_RANGE_LEN_1(x) ((pass2) ? 0 : ((x) < 256))
+# define CP_SKIP(x) ((pass2) ? UVCHR_SKIP(x) : OFFUNISKIP(x))
+#else
+# define CP_ADJUST(x) (x)
+# define FORCE_RANGE_LEN_1(x) 0
+# define CP_SKIP(x) UVCHR_SKIP(x)
+#endif
+
/* And the mapping of each of the ranges is initialized. Initially,
* everything is TR_UNLISTED. */
for (i = 0; i < len; i++) {
/* Here, not in the middle of a range, and not UTF-8. The
* next code point is the single byte where we're at */
- t_cp = *t;
+ t_cp = CP_ADJUST(*t);
t_range_count = 1;
t++;
}
* next code point is the next UTF-8 char in the input. We
* know the input is valid, because the toker constructed
* it */
- t_cp = valid_utf8_to_uvchr(t, &t_char_len);
+ t_cp = CP_ADJUST(valid_utf8_to_uvchr(t, &t_char_len));
t += t_char_len;
/* UTF-8 strings (only) have been parsed in toke.c to have
* the first element of a range. If so, get the final
* element and calculate the range size. If not, the range
* size is 1 */
- if (t < tend && *t == RANGE_INDICATOR) {
+ if ( t < tend && *t == RANGE_INDICATOR
+ && ! FORCE_RANGE_LEN_1(t_cp))
+ {
t++;
t_range_count = valid_utf8_to_uvchr(t, &t_char_len)
- t_cp + 1;
}
else {
if (! rstr_utf8) {
- r_cp = *r;
+ r_cp = CP_ADJUST(*r);
r_range_count = 1;
r++;
}
else {
Size_t r_char_len;
- r_cp = valid_utf8_to_uvchr(r, &r_char_len);
+ r_cp = CP_ADJUST(valid_utf8_to_uvchr(r, &r_char_len));
r += r_char_len;
- if (r < rend && *r == RANGE_INDICATOR) {
+ if ( r < rend && *r == RANGE_INDICATOR
+ && ! FORCE_RANGE_LEN_1(r_cp))
+ {
r++;
r_range_count = valid_utf8_to_uvchr(r,
&r_char_len) - r_cp + 1;
* code point in the rhs against any code point in the lhs. */
if ( ! pass2
&& r_cp_end != TR_SPECIAL_HANDLING
- && UVCHR_SKIP(t_cp_end) < UVCHR_SKIP(r_cp_end))
+ && CP_SKIP(t_cp_end) < CP_SKIP(r_cp_end))
{
/* Here, we will need to make a copy of the input string
* before doing the transliteration. The worst possible
* string not being UTF-8 */
NV t_size = (can_force_utf8 && t_cp < 256)
? 1
- : UVCHR_SKIP(t_cp_end);
- NV ratio = UVCHR_SKIP(r_cp_end) / t_size;
+ : CP_SKIP(t_cp_end);
+ NV ratio = CP_SKIP(r_cp_end) / t_size;
o->op_private |= OPpTRANS_GROWS;
* is if it 'grows'. But in the 2nd pass, there's no
* reason to not merge */
if ( (i > 0 && ( pass2
- || UVCHR_SKIP(t_array[i-1])
- == UVCHR_SKIP(t_cp)))
+ || CP_SKIP(t_array[i-1])
+ == CP_SKIP(t_cp)))
&& ( ( r_cp == TR_SPECIAL_HANDLING
&& r_map[i-1] == TR_SPECIAL_HANDLING)
|| ( r_cp != TR_SPECIAL_HANDLING
adjacent_to_range_above = TRUE;
if (i + 1 < len)
if ( ( pass2
- || UVCHR_SKIP(t_cp) == UVCHR_SKIP(t_array[i+1]))
+ || CP_SKIP(t_cp) == CP_SKIP(t_array[i+1]))
&& ( ( r_cp == TR_SPECIAL_HANDLING
&& r_map[i+1] == (UV) TR_SPECIAL_HANDLING)
|| ( r_cp != TR_SPECIAL_HANDLING
if (CvISXSUB(cv) || !CvROOT(cv))
S_entersub_alloc_targ(aTHX_ o);
if (!namegv) {
- /* The original call checker API guarantees that a GV will be
+ /* The original call checker API guarantees that a GV will
be provided with the right name. So, if the old API was
used (or the REQUIRE_GV flag was passed), we have to reify
the CV’s GV, unless this is an anonymous sub. This is not
}
/* if its an unrecognised, non-dangerous op, assume that it
- * it the cause of at least one safe scalar */
+ * is the cause of at least one safe scalar */
(*scalars_p)++;
flags = AAS_SAFE_SCALAR;
break;