return newUNOP(OP_NOT, OPf_SPECIAL, scalar(o));
}
+OP *
+Perl_cmpchain_start(pTHX_ I32 type, OP *left, OP *right)
+{
+ dVAR;
+ BINOP *bop;
+ OP *op;
+
+ if (!left)
+ left = newOP(OP_NULL, 0);
+ if (!right)
+ right = newOP(OP_NULL, 0);
+ scalar(left);
+ scalar(right);
+ NewOp(0, bop, 1, BINOP);
+ op = (OP*)bop;
+ ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP);
+ OpTYPE_set(op, type);
+ cBINOPx(op)->op_flags = OPf_KIDS;
+ cBINOPx(op)->op_private = 2;
+ cBINOPx(op)->op_first = left;
+ cBINOPx(op)->op_last = right;
+ OpMORESIB_set(left, right);
+ OpLASTSIB_set(right, op);
+ return op;
+}
+
+OP *
+Perl_cmpchain_extend(pTHX_ I32 type, OP *ch, OP *right)
+{
+ dVAR;
+ BINOP *bop;
+ OP *op;
+
+ PERL_ARGS_ASSERT_CMPCHAIN_EXTEND;
+ if (!right)
+ right = newOP(OP_NULL, 0);
+ scalar(right);
+ NewOp(0, bop, 1, BINOP);
+ op = (OP*)bop;
+ ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP);
+ OpTYPE_set(op, type);
+ if (ch->op_type != OP_NULL) {
+ UNOP *lch;
+ OP *nch, *cleft, *cright;
+ NewOp(0, lch, 1, UNOP);
+ nch = (OP*)lch;
+ OpTYPE_set(nch, OP_NULL);
+ nch->op_flags = OPf_KIDS;
+ cleft = cBINOPx(ch)->op_first;
+ cright = cBINOPx(ch)->op_last;
+ cBINOPx(ch)->op_first = NULL;
+ cBINOPx(ch)->op_last = NULL;
+ cBINOPx(ch)->op_private = 0;
+ cBINOPx(ch)->op_flags = 0;
+ cUNOPx(nch)->op_first = cright;
+ OpMORESIB_set(cright, ch);
+ OpMORESIB_set(ch, cleft);
+ OpLASTSIB_set(cleft, nch);
+ ch = nch;
+ }
+ OpMORESIB_set(right, op);
+ OpMORESIB_set(op, cUNOPx(ch)->op_first);
+ cUNOPx(ch)->op_first = right;
+ return ch;
+}
+
+OP *
+Perl_cmpchain_finish(pTHX_ OP *ch)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_CMPCHAIN_FINISH;
+ if (ch->op_type != OP_NULL) {
+ OPCODE cmpoptype = ch->op_type;
+ ch = CHECKOP(cmpoptype, ch);
+ if(!ch->op_next && ch->op_type == cmpoptype)
+ ch = fold_constants(op_integerize(op_std_init(ch)));
+ return ch;
+ } else {
+ OP *condop = NULL;
+ OP *rightarg = cUNOPx(ch)->op_first;
+ cUNOPx(ch)->op_first = OpSIBLING(rightarg);
+ OpLASTSIB_set(rightarg, NULL);
+ while (1) {
+ OP *cmpop = cUNOPx(ch)->op_first;
+ OP *leftarg = OpSIBLING(cmpop);
+ OPCODE cmpoptype = cmpop->op_type;
+ OP *nextrightarg;
+ bool is_last;
+ is_last = !(cUNOPx(ch)->op_first = OpSIBLING(leftarg));
+ OpLASTSIB_set(cmpop, NULL);
+ OpLASTSIB_set(leftarg, NULL);
+ if (is_last) {
+ ch->op_flags = 0;
+ op_free(ch);
+ nextrightarg = NULL;
+ } else {
+ nextrightarg = newUNOP(OP_CMPCHAIN_DUP, 0, leftarg);
+ leftarg = newOP(OP_NULL, 0);
+ }
+ cBINOPx(cmpop)->op_first = leftarg;
+ cBINOPx(cmpop)->op_last = rightarg;
+ OpMORESIB_set(leftarg, rightarg);
+ OpLASTSIB_set(rightarg, cmpop);
+ cmpop->op_flags = OPf_KIDS;
+ cmpop->op_private = 2;
+ cmpop = CHECKOP(cmpoptype, cmpop);
+ if(!cmpop->op_next && cmpop->op_type == cmpoptype)
+ cmpop = fold_constants(op_integerize(op_std_init(cmpop)));
+ condop = condop ? newLOGOP(OP_CMPCHAIN_AND, 0, cmpop, condop) :
+ cmpop;
+ if (!nextrightarg)
+ return condop;
+ rightarg = nextrightarg;
+ }
+ }
+}
+
/*
=for apidoc op_scope
}
/* This function exists solely to provide a scope to limit
- setjmp/longjmp() messing with auto variables.
+ setjmp/longjmp() messing with auto variables. It cannot be inlined because
+ it uses setjmp
*/
-PERL_STATIC_INLINE int
+STATIC int
S_fold_constants_eval(pTHX) {
int ret = 0;
dJMPENV;
* allocated, then copied. If the replacement for every character in every
* possible string takes up no more bytes than the the character it
* replaces, then it can be edited in place. Otherwise the replacement
- * could "grow", depending on the strings being processed. Some inputs
- * won't grow, and might even shrink under /d, but some inputs could grow,
- * so we have to assume any given one might grow. On very long inputs, the
- * temporary could eat up a lot of memory, so we want to avoid it if
- * possible. For non-UTF-8 inputs, everything is single-byte, so can be
- * edited in place, unless there is something in the pattern that could
- * force it into UTF-8. The inversion map makes it feasible to determine
- * this. Previous versions of this code pretty much punted on determining
- * if UTF-8 could be edited in place. Now, this code is rigorous in making
- * that determination.
+ * could overwrite a byte we are about to read, depending on the strings
+ * being processed. The comments and variable names here refer to this as
+ * "growing". Some inputs won't grow, and might even shrink under /d, but
+ * some inputs could grow, so we have to assume any given one might grow.
+ * On very long inputs, the temporary could eat up a lot of memory, so we
+ * want to avoid it if possible. For non-UTF-8 inputs, everything is
+ * single-byte, so can be edited in place, unless there is something in the
+ * pattern that could force it into UTF-8. The inversion map makes it
+ * feasible to determine this. Previous versions of this code pretty much
+ * punted on determining if UTF-8 could be edited in place. Now, this code
+ * is rigorous in making that determination.
*
* Another characteristic we need to know is whether the lhs and rhs are
* identical. If so, and no other flags are present, the only effect of
const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH);
const bool del = cBOOL(o->op_private & OPpTRANS_DELETE);
- /* Set to true if there is some character < 256 in the lhs that maps to >
- * 255. If so, a non-UTF-8 match string can be forced into requiring to be
- * in UTF-8 by a tr/// operation. */
+ /* Set to true if there is some character < 256 in the lhs that maps to
+ * above 255. If so, a non-UTF-8 match string can be forced into being in
+ * UTF-8 by a tr/// operation. */
bool can_force_utf8 = FALSE;
/* What is the maximum expansion factor in UTF-8 transliterations. If a
* expansion factor is 1.5. This number is used at runtime to calculate
* how much space to allocate for non-inplace transliterations. Without
* this number, the worst case is 14, which is extremely unlikely to happen
- * in real life, and would require significant memory overhead. */
+ * in real life, and could require significant memory overhead. */
NV max_expansion = 1.;
UV t_range_count, r_range_count, min_range_count;
#ifndef EBCDIC
UV PL_partition_by_byte_length[] = {
0,
- 0x80,
- (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))),
- (16 * (1UL << (2 * UTF_ACCUMULATION_SHIFT))),
- ( 8 * (1UL << (3 * UTF_ACCUMULATION_SHIFT))),
- ( 4 * (1UL << (4 * UTF_ACCUMULATION_SHIFT))),
- ( 2 * (1UL << (5 * UTF_ACCUMULATION_SHIFT)))
+ 0x80, /* Below this is 1 byte representations */
+ (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))), /* 2 bytes below this */
+ (16 * (1UL << (2 * UTF_ACCUMULATION_SHIFT))), /* 3 bytes below this */
+ ( 8 * (1UL << (3 * UTF_ACCUMULATION_SHIFT))), /* 4 bytes below this */
+ ( 4 * (1UL << (4 * UTF_ACCUMULATION_SHIFT))), /* 5 bytes below this */
+ ( 2 * (1UL << (5 * UTF_ACCUMULATION_SHIFT))) /* 6 bytes below this */
# ifdef UV_IS_QUAD
,
- ( ((UV) 1U << (6 * UTF_ACCUMULATION_SHIFT)))
+ ( ((UV) 1U << (6 * UTF_ACCUMULATION_SHIFT))) /* 7 bytes below this */
# endif
};
t_cp_end = MIN(IV_MAX, t_cp + span - 1);
if (r_cp == TR_SPECIAL_HANDLING) {
- r_cp_end = TR_SPECIAL_HANDLING;
+
+ /* If unmatched lhs code points map to the final map, use that
+ * value. This being set to TR_SPECIAL_HANDLING indicates that
+ * we don't have a final map: unmatched lhs code points are
+ * simply deleted */
+ r_cp_end = (del) ? TR_SPECIAL_HANDLING : final_map;
}
else {
r_cp_end = MIN(IV_MAX, r_cp + span - 1);
* we use the above sample data. The t_cp chunk must be any
* contiguous subset of M, N, O, P, and/or Q.
*
+ * In the first pass, calculate if there is any possible input
+ * string that has a character whose transliteration will be
+ * longer than it. If none, the transliteration may be done
+ * in-place, as it can't write over a so-far unread byte.
+ * Otherwise, a copy must first be made. This could be
+ * expensive for long inputs.
+ *
* In the first pass, the t_invlist has been partitioned so
* that all elements in any single range have the same number
* of bytes in their UTF-8 representations. And the r space is
&& r_cp_end != TR_SPECIAL_HANDLING
&& UVCHR_SKIP(t_cp_end) < UVCHR_SKIP(r_cp_end))
{
- /* Consider tr/\xCB/\X{E000}/. The maximum expansion
- * factor is 1 byte going to 3 if the lhs is not UTF-8, but
- * 2 bytes going to 3 if it is in UTF-8. We could pass two
- * different values so doop could choose based on the
- * UTF-8ness of the target. But khw thinks (perhaps
- * wrongly) that is overkill. It is used only to make sure
- * we malloc enough space. If no target string can force
- * the result to be UTF-8, then we don't have to worry
- * about this */
+ /* Here, we will need to make a copy of the input string
+ * before doing the transliteration. The worst possible
+ * case is an expansion ratio of 14:1. This is rare, and
+ * we'd rather allocate only the necessary amount of extra
+ * memory for that copy. We can calculate the worst case
+ * for this particular transliteration is by keeping track
+ * of the expansion factor for each range.
+ *
+ * Consider tr/\xCB/\X{E000}/. The maximum expansion
+ * factor is 1 byte going to 3 if the target string is not
+ * UTF-8, but 2 bytes going to 3 if it is in UTF-8. We
+ * could pass two different values so doop could choose
+ * based on the UTF-8ness of the target. But khw thinks
+ * (perhaps wrongly) that is overkill. It is used only to
+ * make sure we malloc enough space.
+ *
+ * If no target string can force the result to be UTF-8,
+ * then we don't have to worry about the case of the target
+ * string not being UTF-8 */
NV t_size = (can_force_utf8 && t_cp < 256)
? 1
: UVCHR_SKIP(t_cp_end);
* to PL_compiling, IN_PERL_COMPILETIME/IN_PERL_RUNTIME
* will give the wrong answer.
*/
- Newx(PL_curcop, 1, COP);
- StructCopy(&PL_compiling, PL_curcop, COP);
- PL_curcop->op_slabbed = 0;
- SAVEFREEPV(PL_curcop);
+ PL_curcop = (COP*)newSTATEOP(PL_compiling.op_flags, NULL, NULL);
+ CopLINE_set(PL_curcop, CopLINE(&PL_compiling));
+ SAVEFREEOP(PL_curcop);
}
PUSHSTACKi(PERLSI_REQUIRE);
case OP_AND:
case OP_OR:
case OP_DOR:
+ case OP_CMPCHAIN_AND:
while (cLOGOP->op_other->op_type == OP_NULL)
cLOGOP->op_other = cLOGOP->op_other->op_next;
while (o->op_next && ( o->op_type == o->op_next->op_type