return slab;
}
+/* opslot_size includes the size of the slot header, and an op can't be smaller than BASEOP */
+#define OPSLOT_SIZE_BASE (SIZE_TO_PSIZE(sizeof(OP)) + OPSLOT_HEADER_P)
+#define OPSLOT_SIZE_TO_INDEX(sz) ((sz) - OPSLOT_SIZE_BASE)
+
+#define link_freed_op(slab, o) S_link_freed_op(aTHX_ slab, o)
+static void
+S_link_freed_op(pTHX_ OPSLAB *slab, OP *o) {
+ U16 sz = OpSLOT(o)->opslot_size;
+ U16 index = OPSLOT_SIZE_TO_INDEX(sz);
+
+ assert(sz >= OPSLOT_SIZE_BASE);
+ /* make sure the array is large enough to include ops this large */
+ if (!slab->opslab_freed) {
+ /* we don't have a free list array yet, make a new one */
+ slab->opslab_freed_size = index+1;
+ slab->opslab_freed = (OP**)PerlMemShared_calloc((slab->opslab_freed_size), sizeof(OP*));
+
+ if (!slab->opslab_freed)
+ croak_no_mem();
+ }
+ else if (index >= slab->opslab_freed_size) {
+ /* It's probably not worth doing exponential expansion here, the number of op sizes
+ is small.
+ */
+ /* We already have a list that isn't large enough, expand it */
+ size_t newsize = index+1;
+ OP **p = (OP **)PerlMemShared_realloc(slab->opslab_freed, newsize * sizeof(OP*));
+
+ if (!p)
+ croak_no_mem();
+
+ Zero(p+slab->opslab_freed_size, newsize - slab->opslab_freed_size, OP *);
+
+ slab->opslab_freed = p;
+ slab->opslab_freed_size = newsize;
+ }
+
+ o->op_next = slab->opslab_freed[index];
+ slab->opslab_freed[index] = o;
+}
/* Returns a sz-sized block of memory (suitable for holding an op) from
* a free slot in the chain of op slabs attached to PL_compcv.
opsz = SIZE_TO_PSIZE(sz);
sz = opsz + OPSLOT_HEADER_P;
- /* The slabs maintain a free list of OPs. In particular, constant folding
+ /* The head slab for each CV maintains a free list of OPs. In particular, constant folding
will free up OPs, so it makes sense to re-use them where possible. A
freed up slot is used in preference to a new allocation. */
- if (head_slab->opslab_freed) {
- OP **too = &head_slab->opslab_freed;
- o = *too;
- DEBUG_S_warn((aTHX_ "found free op at %p, slab %p, head slab %p",
- (void*)o,
- (I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset,
- (void*)head_slab));
-
- while (o && OpSLOT(o)->opslot_size < sz) {
- DEBUG_S_warn((aTHX_ "Alas! too small"));
- o = *(too = &o->op_next);
- if (o) { DEBUG_S_warn((aTHX_ "found another free op at %p", (void*)o)); }
- }
- if (o) {
+ if (head_slab->opslab_freed &&
+ OPSLOT_SIZE_TO_INDEX(sz) < head_slab->opslab_freed_size) {
+ U16 base_index;
+
+ /* look for a large enough size with any freed ops */
+ for (base_index = OPSLOT_SIZE_TO_INDEX(sz);
+ base_index < head_slab->opslab_freed_size && !head_slab->opslab_freed[base_index];
+ ++base_index) {
+ }
+
+ if (base_index < head_slab->opslab_freed_size) {
+ /* found a freed op */
+ o = head_slab->opslab_freed[base_index];
+
DEBUG_S_warn((aTHX_ "realloced op at %p, slab %p, head slab %p",
(void*)o,
(I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset,
(void*)head_slab));
- *too = o->op_next;
+ head_slab->opslab_freed[base_index] = o->op_next;
Zero(o, opsz, I32 *);
o->op_slabbed = 1;
goto gotit;
slot = &slab2->opslab_slots;
INIT_OPSLOT(slab2->opslab_free_space);
o->op_type = OP_FREED;
- o->op_next = head_slab->opslab_freed;
- head_slab->opslab_freed = o;
+ link_freed_op(head_slab, o);
}
/* Create a new slab. Make this one twice as big. */
/* If this op is already freed, our refcount will get screwy. */
assert(o->op_type != OP_FREED);
o->op_type = OP_FREED;
- o->op_next = slab->opslab_freed;
- slab->opslab_freed = o;
+ link_freed_op(slab, o);
DEBUG_S_warn((aTHX_ "freeing op at %p, slab %p, head slab %p",
(void*)o,
(I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset,
PERL_UNUSED_CONTEXT;
DEBUG_S_warn((aTHX_ "freeing slab %p", (void*)slab));
assert(slab->opslab_refcnt == 1);
+ PerlMemShared_free(slab->opslab_freed);
do {
slab2 = slab->opslab_next;
#ifdef DEBUGGING
(int)n, PL_op_desc[(o)->op_type], t, OP_DESC(kid)), 0);
}
-/* remove flags var, its unused in all callers, move to to right end since gv
- and kid are always the same */
STATIC void
S_bad_type_gv(pTHX_ I32 n, GV *gv, const OP *kid, const char *t)
{
if (!(flags & SVf_UTF8 && UTF8_IS_START(name[1]))
&& isASCII(name[1])
- && (!isPRINT(name[1]) || strchr("\t\n\r\f", name[1]))) {
+ && (!isPRINT(name[1]) || memCHRs("\t\n\r\f", name[1]))) {
/* diag_listed_as: Can't use global %s in %s */
yyerror(Perl_form(aTHX_ "Can't use global %c^%c%.*s in %s",
name[0], toCTRL(name[1]),
you to delete zero or more sequential nodes, replacing them with zero or
more different nodes. Performs the necessary op_first/op_last
housekeeping on the parent node and op_sibling manipulation on the
-children. The last deleted node will be marked as as the last node by
+children. The last deleted node will be marked as the last node by
updating the op_sibling/op_sibparent or op_moresib field as appropriate.
Note that op_next is not manipulated, and nodes are not freed; that is the
targetop = OpSIBLING(topop);
if (!targetop) /* probably some sort of syntax error */
return;
+
+ /* don't optimise away assign in 'local $foo = ....' */
+ if ( (targetop->op_private & OPpLVAL_INTRO)
+ /* these are the common ops which do 'local', but
+ * not all */
+ && ( targetop->op_type == OP_GVSV
+ || targetop->op_type == OP_RV2SV
+ || targetop->op_type == OP_AELEM
+ || targetop->op_type == OP_HELEM
+ )
+ )
+ return;
}
else if ( topop->op_type == OP_CONCAT
&& (topop->op_flags & OPf_STACKED)
}
if (targetop) {
- /* Can targetop (the LHS) if it's a padsv, be be optimised
+ /* Can targetop (the LHS) if it's a padsv, be optimised
* away and use OPpTARGET_MY instead?
*/
if ( (targetop->op_type == OP_PADSV)
* X .= Y
*
* otherwise we could be doing something like $x = "foo", which
- * if treated as as a concat, would fail to COW.
+ * if treated as a concat, would fail to COW.
*/
if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
return;
return newUNOP(OP_NOT, OPf_SPECIAL, scalar(o));
}
+OP *
+Perl_cmpchain_start(pTHX_ I32 type, OP *left, OP *right)
+{
+ dVAR;
+ BINOP *bop;
+ OP *op;
+
+ if (!left)
+ left = newOP(OP_NULL, 0);
+ if (!right)
+ right = newOP(OP_NULL, 0);
+ scalar(left);
+ scalar(right);
+ NewOp(0, bop, 1, BINOP);
+ op = (OP*)bop;
+ ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP);
+ OpTYPE_set(op, type);
+ cBINOPx(op)->op_flags = OPf_KIDS;
+ cBINOPx(op)->op_private = 2;
+ cBINOPx(op)->op_first = left;
+ cBINOPx(op)->op_last = right;
+ OpMORESIB_set(left, right);
+ OpLASTSIB_set(right, op);
+ return op;
+}
+
+OP *
+Perl_cmpchain_extend(pTHX_ I32 type, OP *ch, OP *right)
+{
+ dVAR;
+ BINOP *bop;
+ OP *op;
+
+ PERL_ARGS_ASSERT_CMPCHAIN_EXTEND;
+ if (!right)
+ right = newOP(OP_NULL, 0);
+ scalar(right);
+ NewOp(0, bop, 1, BINOP);
+ op = (OP*)bop;
+ ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP);
+ OpTYPE_set(op, type);
+ if (ch->op_type != OP_NULL) {
+ UNOP *lch;
+ OP *nch, *cleft, *cright;
+ NewOp(0, lch, 1, UNOP);
+ nch = (OP*)lch;
+ OpTYPE_set(nch, OP_NULL);
+ nch->op_flags = OPf_KIDS;
+ cleft = cBINOPx(ch)->op_first;
+ cright = cBINOPx(ch)->op_last;
+ cBINOPx(ch)->op_first = NULL;
+ cBINOPx(ch)->op_last = NULL;
+ cBINOPx(ch)->op_private = 0;
+ cBINOPx(ch)->op_flags = 0;
+ cUNOPx(nch)->op_first = cright;
+ OpMORESIB_set(cright, ch);
+ OpMORESIB_set(ch, cleft);
+ OpLASTSIB_set(cleft, nch);
+ ch = nch;
+ }
+ OpMORESIB_set(right, op);
+ OpMORESIB_set(op, cUNOPx(ch)->op_first);
+ cUNOPx(ch)->op_first = right;
+ return ch;
+}
+
+OP *
+Perl_cmpchain_finish(pTHX_ OP *ch)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_CMPCHAIN_FINISH;
+ if (ch->op_type != OP_NULL) {
+ OPCODE cmpoptype = ch->op_type;
+ ch = CHECKOP(cmpoptype, ch);
+ if(!ch->op_next && ch->op_type == cmpoptype)
+ ch = fold_constants(op_integerize(op_std_init(ch)));
+ return ch;
+ } else {
+ OP *condop = NULL;
+ OP *rightarg = cUNOPx(ch)->op_first;
+ cUNOPx(ch)->op_first = OpSIBLING(rightarg);
+ OpLASTSIB_set(rightarg, NULL);
+ while (1) {
+ OP *cmpop = cUNOPx(ch)->op_first;
+ OP *leftarg = OpSIBLING(cmpop);
+ OPCODE cmpoptype = cmpop->op_type;
+ OP *nextrightarg;
+ bool is_last;
+ is_last = !(cUNOPx(ch)->op_first = OpSIBLING(leftarg));
+ OpLASTSIB_set(cmpop, NULL);
+ OpLASTSIB_set(leftarg, NULL);
+ if (is_last) {
+ ch->op_flags = 0;
+ op_free(ch);
+ nextrightarg = NULL;
+ } else {
+ nextrightarg = newUNOP(OP_CMPCHAIN_DUP, 0, leftarg);
+ leftarg = newOP(OP_NULL, 0);
+ }
+ cBINOPx(cmpop)->op_first = leftarg;
+ cBINOPx(cmpop)->op_last = rightarg;
+ OpMORESIB_set(leftarg, rightarg);
+ OpLASTSIB_set(rightarg, cmpop);
+ cmpop->op_flags = OPf_KIDS;
+ cmpop->op_private = 2;
+ cmpop = CHECKOP(cmpoptype, cmpop);
+ if(!cmpop->op_next && cmpop->op_type == cmpoptype)
+ cmpop = fold_constants(op_integerize(op_std_init(cmpop)));
+ condop = condop ? newLOGOP(OP_CMPCHAIN_AND, 0, cmpop, condop) :
+ cmpop;
+ if (!nextrightarg)
+ return condop;
+ rightarg = nextrightarg;
+ }
+ }
+}
+
/*
=for apidoc op_scope
bool sigil = FALSE;
/* some heuristics to detect a potential error */
- while (*s && (strchr(", \t\n", *s)))
+ while (*s && (memCHRs(", \t\n", *s)))
s++;
while (1) {
- if (*s && (strchr("@$%", *s) || (!lex && *s == '*'))
+ if (*s && (memCHRs("@$%", *s) || (!lex && *s == '*'))
&& *++s
&& (isWORDCHAR(*s) || UTF8_IS_CONTINUED(*s))) {
s++;
sigil = TRUE;
while (*s && (isWORDCHAR(*s) || UTF8_IS_CONTINUED(*s)))
s++;
- while (*s && (strchr(", \t\n", *s)))
+ while (*s && (memCHRs(", \t\n", *s)))
s++;
}
else
}
/* This function exists solely to provide a scope to limit
- setjmp/longjmp() messing with auto variables.
+ setjmp/longjmp() messing with auto variables. It cannot be inlined because
+ it uses setjmp
*/
-PERL_STATIC_INLINE int
+STATIC int
S_fold_constants_eval(pTHX) {
int ret = 0;
dJMPENV;
child of the unary op; it is consumed by this function and become part
of the constructed op tree.
+=for apidoc Amnh||OPf_KIDS
+
=cut
*/
* One of the important characteristics to know about the input is whether
* the transliteration may be done in place, or does a temporary need to be
* allocated, then copied. If the replacement for every character in every
- * possible string takes up no more bytes than the the character it
+ * possible string takes up no more bytes than the character it
* replaces, then it can be edited in place. Otherwise the replacement
- * could "grow", depending on the strings being processed. Some inputs
- * won't grow, and might even shrink under /d, but some inputs could grow,
- * so we have to assume any given one might grow. On very long inputs, the
- * temporary could eat up a lot of memory, so we want to avoid it if
- * possible. For non-UTF-8 inputs, everything is single-byte, so can be
- * edited in place, unless there is something in the pattern that could
- * force it into UTF-8. The inversion map makes it feasible to determine
- * this. Previous versions of this code pretty much punted on determining
- * if UTF-8 could be edited in place. Now, this code is rigorous in making
- * that determination.
+ * could overwrite a byte we are about to read, depending on the strings
+ * being processed. The comments and variable names here refer to this as
+ * "growing". Some inputs won't grow, and might even shrink under /d, but
+ * some inputs could grow, so we have to assume any given one might grow.
+ * On very long inputs, the temporary could eat up a lot of memory, so we
+ * want to avoid it if possible. For non-UTF-8 inputs, everything is
+ * single-byte, so can be edited in place, unless there is something in the
+ * pattern that could force it into UTF-8. The inversion map makes it
+ * feasible to determine this. Previous versions of this code pretty much
+ * punted on determining if UTF-8 could be edited in place. Now, this code
+ * is rigorous in making that determination.
*
* Another characteristic we need to know is whether the lhs and rhs are
* identical. If so, and no other flags are present, the only effect of
const U8 * r0 = (U8*)SvPV_const(rstr, rlen);
const U8 * t = t0;
const U8 * r = r0;
- Size_t t_count = 0, r_count = 0; /* Number of characters in search and
+ UV t_count = 0, r_count = 0; /* Number of characters in search and
replacement lists */
/* khw thinks some of the private flags for this op are quaintly named.
const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH);
const bool del = cBOOL(o->op_private & OPpTRANS_DELETE);
- /* Set to true if there is some character < 256 in the lhs that maps to >
- * 255. If so, a non-UTF-8 match string can be forced into requiring to be
- * in UTF-8 by a tr/// operation. */
+ /* Set to true if there is some character < 256 in the lhs that maps to
+ * above 255. If so, a non-UTF-8 match string can be forced into being in
+ * UTF-8 by a tr/// operation. */
bool can_force_utf8 = FALSE;
/* What is the maximum expansion factor in UTF-8 transliterations. If a
* expansion factor is 1.5. This number is used at runtime to calculate
* how much space to allocate for non-inplace transliterations. Without
* this number, the worst case is 14, which is extremely unlikely to happen
- * in real life, and would require significant memory overhead. */
+ * in real life, and could require significant memory overhead. */
NV max_expansion = 1.;
- SSize_t t_range_count, r_range_count, min_range_count;
+ UV t_range_count, r_range_count, min_range_count;
UV* t_array;
SV* t_invlist;
UV* r_map;
UV r_cp, t_cp;
- IV t_cp_end = -1;
+ UV t_cp_end = (UV) -1;
UV r_cp_end;
Size_t len;
AV* invmap;
* these up into smaller chunks, but doesn't merge any together. This
* makes it easy to find the instances it's looking for. A second pass is
* done after this has been determined which merges things together to
- * shrink the table for runtime. For ASCII platforms, the table is
- * trivial, given below, and uses the fundamental characteristics of UTF-8
- * to construct the values. For EBCDIC, it isn't so, and we rely on a
- * table constructed by the perl script that generates these kinds of
- * things */
-#ifndef EBCDIC
+ * shrink the table for runtime. The table below is used for both ASCII
+ * and EBCDIC platforms. On EBCDIC, the byte length is not monotonically
+ * increasing for code points below 256. To correct for that, the macro
+ * CP_ADJUST defined below converts those code points to ASCII in the first
+ * pass, and we use the ASCII partition values. That works because the
+ * growth factor will be unaffected, which is all that is calculated during
+ * the first pass. */
UV PL_partition_by_byte_length[] = {
0,
- 0x80,
- (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))),
- (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))),
- (16 * (1UL << (2 * UTF_ACCUMULATION_SHIFT))),
- ( 8 * (1UL << (3 * UTF_ACCUMULATION_SHIFT))),
- ( 4 * (1UL << (4 * UTF_ACCUMULATION_SHIFT))),
- ( 2 * (1UL << (5 * UTF_ACCUMULATION_SHIFT)))
+ 0x80, /* Below this is 1 byte representations */
+ (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))), /* 2 bytes below this */
+ (16 * (1UL << (2 * UTF_ACCUMULATION_SHIFT))), /* 3 bytes below this */
+ ( 8 * (1UL << (3 * UTF_ACCUMULATION_SHIFT))), /* 4 bytes below this */
+ ( 4 * (1UL << (4 * UTF_ACCUMULATION_SHIFT))), /* 5 bytes below this */
+ ( 2 * (1UL << (5 * UTF_ACCUMULATION_SHIFT))) /* 6 bytes below this */
# ifdef UV_IS_QUAD
,
- ( ((UV) 1U << (6 * UTF_ACCUMULATION_SHIFT)))
+ ( ((UV) 1U << (6 * UTF_ACCUMULATION_SHIFT))) /* 7 bytes below this */
# endif
};
-#endif
-
PERL_ARGS_ASSERT_PMTRANS;
PL_hints |= HINT_BLOCK_SCOPE;
SV * inverted_tlist = _new_invlist(tlen);
Size_t temp_len;
+ DEBUG_y(PerlIO_printf(Perl_debug_log,
+ "%s: %d: tstr before inversion=\n%s\n",
+ __FILE__, __LINE__, _byte_dump_string(t, tend - t, 0)));
+
while (t < tend) {
/* Non-utf8 strings don't have ranges, so each character is listed
t_invlist = _new_invlist(1);
+ /* Initialize to a single range */
+ t_invlist = _add_range_to_invlist(t_invlist, 0, UV_MAX);
+
+ /* For the first pass, the lhs is partitioned such that the
+ * number of UTF-8 bytes required to represent a code point in each
+ * partition is the same as the number for any other code point in
+ * that partion. We copy the pre-compiled partion. */
+ len = C_ARRAY_LENGTH(PL_partition_by_byte_length);
+ invlist_extend(t_invlist, len);
+ t_array = invlist_array(t_invlist);
+ Copy(PL_partition_by_byte_length, t_array, len, UV);
+ invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist)));
+ Newx(r_map, len + 1, UV);
+
/* Parse the (potentially adjusted) input, creating the inversion map.
* This is done in two passes. The first pass is to determine if the
* transliteration can be done in place. The inversion map it creates
* output of the second pass, which starts with a more compact table and
* allows more ranges to be merged */
for (pass2 = 0; pass2 < 2; pass2++) {
-
- /* Initialize to a single range */
- t_invlist = _add_range_to_invlist(t_invlist, 0, UV_MAX);
-
- /* In the second pass, we just have the single range */
-
if (pass2) {
+ /* Initialize to a single range */
+ t_invlist = _add_range_to_invlist(t_invlist, 0, UV_MAX);
+
+ /* In the second pass, we just have the single range */
len = 1;
t_array = invlist_array(t_invlist);
}
- else {
- /* But in the first pass, the lhs is partitioned such that the
- * number of UTF-8 bytes required to represent a code point in each
- * partition is the same as the number for any other code point in
- * that partion. We copy the pre-compiled partion. */
- len = C_ARRAY_LENGTH(PL_partition_by_byte_length);
- invlist_extend(t_invlist, len);
- t_array = invlist_array(t_invlist);
- Copy(PL_partition_by_byte_length, t_array, len, UV);
- invlist_set_len(t_invlist,
- len,
- *(get_invlist_offset_addr(t_invlist)));
- Newx(r_map, len + 1, UV);
- }
+/* As noted earlier, we convert EBCDIC code points to Unicode in the first pass
+ * so as to get the well-behaved length 1 vs length 2 boundary. Only code
+ * points below 256 differ between the two character sets in this regard. For
+ * these, we also can't have any ranges, as they have to be individually
+ * converted. */
+#ifdef EBCDIC
+# define CP_ADJUST(x) ((pass2) ? (x) : NATIVE_TO_UNI(x))
+# define FORCE_RANGE_LEN_1(x) ((pass2) ? 0 : ((x) < 256))
+# define CP_SKIP(x) ((pass2) ? UVCHR_SKIP(x) : OFFUNISKIP(x))
+#else
+# define CP_ADJUST(x) (x)
+# define FORCE_RANGE_LEN_1(x) 0
+# define CP_SKIP(x) UVCHR_SKIP(x)
+#endif
/* And the mapping of each of the ranges is initialized. Initially,
* everything is TR_UNLISTED. */
r_count = 0;
t_range_count = r_range_count = 0;
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d:\ntstr=%s\n",
+ __FILE__, __LINE__, _byte_dump_string(t, tend - t, 0)));
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "rstr=%s\n",
+ _byte_dump_string(r, rend - r, 0)));
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "/c=%d; /s=%d; /d=%d\n",
+ complement, squash, del));
+ DEBUG_y(invmap_dump(t_invlist, r_map));
+
/* Now go through the search list constructing an inversion map. The
* input is not necessarily in any particular order. Making it an
* inversion map orders it, potentially simplifying, and makes it easy
* This routine modifies traditional inversion maps to reserve two
* mappings:
*
- * TR_UNLISTED (or -1) indicates that the no code point in the range
+ * TR_UNLISTED (or -1) indicates that no code point in the range
* is listed in the tr/// searchlist. At runtime, these are
* always passed through unchanged. In the inversion map, all
* points in the range are mapped to -1, instead of increasing,
bool merge_with_range_above = FALSE;
bool merge_with_range_below = FALSE;
- SSize_t i, span, invmap_range_length_remaining;
+ UV span, invmap_range_length_remaining;
+ SSize_t j;
+ Size_t i;
/* If we are in the middle of processing a range in the 'target'
* side, the previous iteration has set us up. Otherwise, look at
/* Here, not in the middle of a range, and not UTF-8. The
* next code point is the single byte where we're at */
- t_cp = *t;
+ t_cp = CP_ADJUST(*t);
t_range_count = 1;
t++;
}
* next code point is the next UTF-8 char in the input. We
* know the input is valid, because the toker constructed
* it */
- t_cp = valid_utf8_to_uvchr(t, &t_char_len);
+ t_cp = CP_ADJUST(valid_utf8_to_uvchr(t, &t_char_len));
t += t_char_len;
/* UTF-8 strings (only) have been parsed in toke.c to have
* the first element of a range. If so, get the final
* element and calculate the range size. If not, the range
* size is 1 */
- if (t < tend && *t == RANGE_INDICATOR) {
+ if ( t < tend && *t == RANGE_INDICATOR
+ && ! FORCE_RANGE_LEN_1(t_cp))
+ {
t++;
t_range_count = valid_utf8_to_uvchr(t, &t_char_len)
- t_cp + 1;
* range the same size as the lhs one. */
r_cp = TR_SPECIAL_HANDLING;
r_range_count = t_range_count;
+
+ if (! del) {
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "final_map =%" UVXf "\n", final_map));
+ }
}
else {
if (! rstr_utf8) {
- r_cp = *r;
+ r_cp = CP_ADJUST(*r);
r_range_count = 1;
r++;
}
else {
Size_t r_char_len;
- r_cp = valid_utf8_to_uvchr(r, &r_char_len);
+ r_cp = CP_ADJUST(valid_utf8_to_uvchr(r, &r_char_len));
r += r_char_len;
- if (r < rend && *r == RANGE_INDICATOR) {
+ if ( r < rend && *r == RANGE_INDICATOR
+ && ! FORCE_RANGE_LEN_1(r_cp))
+ {
r++;
r_range_count = valid_utf8_to_uvchr(r,
&r_char_len) - r_cp + 1;
* the return value is the index into the list's array of the range
* that contains <cp>, that is, 'i' such that array[i] <= cp <
* array[i+1] */
- i = _invlist_search(t_invlist, t_cp);
- assert(i >= 0);
+ j = _invlist_search(t_invlist, t_cp);
+ assert(j >= 0);
+ i = j;
/* Here, the data structure might look like:
*
* the smallest of the first two values. The final one is
* irrelevant if the map is to the special indicator */
- invmap_range_length_remaining = ((Size_t) i + 1 < len)
+ invmap_range_length_remaining = (i + 1 < len)
? t_array[i+1] - t_cp
: IV_MAX - t_cp;
span = MAX(1, MIN(min_range_count, invmap_range_length_remaining));
t_cp_end = MIN(IV_MAX, t_cp + span - 1);
if (r_cp == TR_SPECIAL_HANDLING) {
- r_cp_end = TR_SPECIAL_HANDLING;
+
+ /* If unmatched lhs code points map to the final map, use that
+ * value. This being set to TR_SPECIAL_HANDLING indicates that
+ * we don't have a final map: unmatched lhs code points are
+ * simply deleted */
+ r_cp_end = (del) ? TR_SPECIAL_HANDLING : final_map;
}
else {
r_cp_end = MIN(IV_MAX, r_cp + span - 1);
* has been set up so that all members in it will be of the same
* ilk) */
if (r_map[i] == TR_UNLISTED) {
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "Processing %" UVxf "-%" UVxf " => %" UVxf "-%" UVxf "\n",
+ t_cp, t_cp_end, r_cp, r_cp_end));
/* This is the first definition for this chunk, hence is valid
* and needs to be processed. Here and in the comments below,
* we use the above sample data. The t_cp chunk must be any
* contiguous subset of M, N, O, P, and/or Q.
*
+ * In the first pass, calculate if there is any possible input
+ * string that has a character whose transliteration will be
+ * longer than it. If none, the transliteration may be done
+ * in-place, as it can't write over a so-far unread byte.
+ * Otherwise, a copy must first be made. This could be
+ * expensive for long inputs.
+ *
* In the first pass, the t_invlist has been partitioned so
* that all elements in any single range have the same number
* of bytes in their UTF-8 representations. And the r space is
* code point in the rhs against any code point in the lhs. */
if ( ! pass2
&& r_cp_end != TR_SPECIAL_HANDLING
- && UVCHR_SKIP(t_cp_end) < UVCHR_SKIP(r_cp_end))
+ && CP_SKIP(t_cp_end) < CP_SKIP(r_cp_end))
{
- /* Consider tr/\xCB/\X{E000}/. The maximum expansion
- * factor is 1 byte going to 3 if the lhs is not UTF-8, but
- * 2 bytes going to 3 if it is in UTF-8. We could pass two
- * different values so doop could choose based on the
- * UTF-8ness of the target. But khw thinks (perhaps
- * wrongly) that is overkill. It is used only to make sure
- * we malloc enough space. If no target string can force
- * the result to be UTF-8, then we don't have to worry
- * about this */
+ /* Here, we will need to make a copy of the input string
+ * before doing the transliteration. The worst possible
+ * case is an expansion ratio of 14:1. This is rare, and
+ * we'd rather allocate only the necessary amount of extra
+ * memory for that copy. We can calculate the worst case
+ * for this particular transliteration is by keeping track
+ * of the expansion factor for each range.
+ *
+ * Consider tr/\xCB/\X{E000}/. The maximum expansion
+ * factor is 1 byte going to 3 if the target string is not
+ * UTF-8, but 2 bytes going to 3 if it is in UTF-8. We
+ * could pass two different values so doop could choose
+ * based on the UTF-8ness of the target. But khw thinks
+ * (perhaps wrongly) that is overkill. It is used only to
+ * make sure we malloc enough space.
+ *
+ * If no target string can force the result to be UTF-8,
+ * then we don't have to worry about the case of the target
+ * string not being UTF-8 */
NV t_size = (can_force_utf8 && t_cp < 256)
? 1
- : UVCHR_SKIP(t_cp_end);
- NV ratio = UVCHR_SKIP(r_cp_end) / t_size;
+ : CP_SKIP(t_cp_end);
+ NV ratio = CP_SKIP(r_cp_end) / t_size;
o->op_private |= OPpTRANS_GROWS;
* largest ratio */
if (ratio > max_expansion) {
max_expansion = ratio;
+ DEBUG_y(PerlIO_printf(Perl_debug_log,
+ "New expansion factor: %" NVgf "\n",
+ max_expansion));
}
}
* is if it 'grows'. But in the 2nd pass, there's no
* reason to not merge */
if ( (i > 0 && ( pass2
- || UVCHR_SKIP(t_array[i-1])
- == UVCHR_SKIP(t_cp)))
+ || CP_SKIP(t_array[i-1])
+ == CP_SKIP(t_cp)))
&& ( ( r_cp == TR_SPECIAL_HANDLING
&& r_map[i-1] == TR_SPECIAL_HANDLING)
|| ( r_cp != TR_SPECIAL_HANDLING
* it adjoins the range above, and if the map is suitable, can
* be merged with it */
if ( t_cp_end >= IV_MAX - 1
- || ( (Size_t) i + 1 < len
- && (Size_t) t_cp_end + 1 == t_array[i+1]))
+ || ( i + 1 < len
+ && t_cp_end + 1 == t_array[i+1]))
{
adjacent_to_range_above = TRUE;
- if ((Size_t) i + 1 < len)
+ if (i + 1 < len)
if ( ( pass2
- || UVCHR_SKIP(t_cp) == UVCHR_SKIP(t_array[i+1]))
+ || CP_SKIP(t_cp) == CP_SKIP(t_array[i+1]))
&& ( ( r_cp == TR_SPECIAL_HANDLING
&& r_map[i+1] == (UV) TR_SPECIAL_HANDLING)
|| ( r_cp != TR_SPECIAL_HANDLING
* [-1] Z -1 # Z => default; as do Z+1, ... infinity
*/
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "Before fixing up: len=%d, i=%d\n",
+ (int) len, (int) i));
+ DEBUG_yv(invmap_dump(t_invlist, r_map));
+
invlist_extend(t_invlist, len + 2);
t_array = invlist_array(t_invlist);
Renew(r_map, len + 2, UV);
t_array[i+2] = t_cp_end + 1;
r_map[i+2] = TR_UNLISTED;
}
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "After iteration: span=%" UVuf ", t_range_count=%"
+ UVuf " r_range_count=%" UVuf "\n",
+ span, t_range_count, r_range_count));
+ DEBUG_yv(invmap_dump(t_invlist, r_map));
} /* End of this chunk needs to be processed */
/* Done with this chunk. */
SvREFCNT_dec(inverted_tstr);
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "After everything: \n"));
+ DEBUG_y(invmap_dump(t_invlist, r_map));
+
/* We now have normalized the input into an inversion map.
*
* See if the lhs and rhs are equivalent. If so, this tr/// is a no-op
}
else {
OPtrans_map *tbl;
- Size_t i;
+ unsigned short i;
/* The OPtrans_map struct already contains one slot; hence the -1. */
SSize_t struct_size = sizeof(OPtrans_map)
: (short) rlen
? (short) final_map
: (short) TR_R_EMPTY;
+ DEBUG_y(PerlIO_printf(Perl_debug_log,"%s: %d\n", __FILE__, __LINE__));
+ for (i = 0; i < tbl->size; i++) {
+ if (tbl->map[i] < 0) {
+ DEBUG_y(PerlIO_printf(Perl_debug_log," %02x=>%d",
+ (unsigned) i, tbl->map[i]));
+ }
+ else {
+ DEBUG_y(PerlIO_printf(Perl_debug_log," %02x=>%02x",
+ (unsigned) i, tbl->map[i]));
+ }
+ if ((i+1) % 8 == 0 || i + 1 == (short) tbl->size) {
+ DEBUG_y(PerlIO_printf(Perl_debug_log,"\n"));
+ }
+ }
+ DEBUG_y(PerlIO_printf(Perl_debug_log,"Final map 0x%x=>%02x\n",
+ (unsigned) tbl->size, tbl->map[tbl->size]));
SvREFCNT_dec(t_invlist);
}
else {
/* no more replacement chars than search chars */
+ }
#endif
}
+ DEBUG_y(PerlIO_printf(Perl_debug_log,
+ "/d=%d, /s=%d, /c=%d, identical=%d, grows=%d,"
+ " use_svop=%d, can_force_utf8=%d,\nexpansion=%" NVgf "\n",
+ del, squash, complement,
+ cBOOL(o->op_private & OPpTRANS_IDENTICAL),
+ cBOOL(o->op_private & OPpTRANS_USE_SVOP),
+ cBOOL(o->op_private & OPpTRANS_GROWS),
+ cBOOL(o->op_private & OPpTRANS_CAN_FORCE_UTF8),
+ max_expansion));
+
Safefree(r_map);
if(del && rlen != 0 && r_count == t_count) {
is_compiletime = 1;
has_code = 0;
if (expr->op_type == OP_LIST) {
- OP *o;
- for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) {
- if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
- has_code = 1;
- assert(!o->op_next);
- if (UNLIKELY(!OpHAS_SIBLING(o))) {
- assert(PL_parser && PL_parser->error_count);
- /* This can happen with qr/ (?{(^{})/. Just fake up
- the op we were expecting to see, to avoid crashing
- elsewhere. */
- op_sibling_splice(expr, o, 0,
- newSVOP(OP_CONST, 0, &PL_sv_no));
- }
- o->op_next = OpSIBLING(o);
- }
- else if (o->op_type != OP_CONST && o->op_type != OP_PUSHMARK)
- is_compiletime = 0;
- }
+ OP *child;
+ for (child = cLISTOPx(expr)->op_first; child; child = OpSIBLING(child)) {
+ if (child->op_type == OP_NULL && (child->op_flags & OPf_SPECIAL)) {
+ has_code = 1;
+ assert(!child->op_next);
+ if (UNLIKELY(!OpHAS_SIBLING(child))) {
+ assert(PL_parser && PL_parser->error_count);
+ /* This can happen with qr/ (?{(^{})/. Just fake up
+ the op we were expecting to see, to avoid crashing
+ elsewhere. */
+ op_sibling_splice(expr, child, 0,
+ newSVOP(OP_CONST, 0, &PL_sv_no));
+ }
+ child->op_next = OpSIBLING(child);
+ }
+ else if (child->op_type != OP_CONST && child->op_type != OP_PUSHMARK)
+ is_compiletime = 0;
+ }
}
else if (expr->op_type != OP_CONST)
is_compiletime = 0;
* also, mark any arrays as LIST/REF */
if (expr->op_type == OP_LIST) {
- OP *o;
- for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) {
+ OP *child;
+ for (child = cLISTOPx(expr)->op_first; child; child = OpSIBLING(child)) {
- if (o->op_type == OP_PADAV || o->op_type == OP_RV2AV) {
- assert( !(o->op_flags & OPf_WANT));
+ if (child->op_type == OP_PADAV || child->op_type == OP_RV2AV) {
+ assert( !(child->op_flags & OPf_WANT));
/* push the array rather than its contents. The regex
* engine will retrieve and join the elements later */
- o->op_flags |= (OPf_WANT_LIST | OPf_REF);
+ child->op_flags |= (OPf_WANT_LIST | OPf_REF);
continue;
}
- if (!(o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)))
- continue;
- o->op_next = NULL; /* undo temporary hack from above */
- scalar(o);
- LINKLIST(o);
- if (cLISTOPo->op_first->op_type == OP_LEAVE) {
- LISTOP *leaveop = cLISTOPx(cLISTOPo->op_first);
- /* skip ENTER */
- assert(leaveop->op_first->op_type == OP_ENTER);
- assert(OpHAS_SIBLING(leaveop->op_first));
- o->op_next = OpSIBLING(leaveop->op_first);
- /* skip leave */
- assert(leaveop->op_flags & OPf_KIDS);
- assert(leaveop->op_last->op_next == (OP*)leaveop);
- leaveop->op_next = NULL; /* stop on last op */
- op_null((OP*)leaveop);
- }
- else {
- /* skip SCOPE */
- OP *scope = cLISTOPo->op_first;
- assert(scope->op_type == OP_SCOPE);
- assert(scope->op_flags & OPf_KIDS);
- scope->op_next = NULL; /* stop on last op */
- op_null(scope);
- }
+ if (!(child->op_type == OP_NULL && (child->op_flags & OPf_SPECIAL)))
+ continue;
+ child->op_next = NULL; /* undo temporary hack from above */
+ scalar(child);
+ LINKLIST(child);
+ if (cLISTOPx(child)->op_first->op_type == OP_LEAVE) {
+ LISTOP *leaveop = cLISTOPx(cLISTOPx(child)->op_first);
+ /* skip ENTER */
+ assert(leaveop->op_first->op_type == OP_ENTER);
+ assert(OpHAS_SIBLING(leaveop->op_first));
+ child->op_next = OpSIBLING(leaveop->op_first);
+ /* skip leave */
+ assert(leaveop->op_flags & OPf_KIDS);
+ assert(leaveop->op_last->op_next == (OP*)leaveop);
+ leaveop->op_next = NULL; /* stop on last op */
+ op_null((OP*)leaveop);
+ }
+ else {
+ /* skip SCOPE */
+ OP *scope = cLISTOPx(child)->op_first;
+ assert(scope->op_type == OP_SCOPE);
+ assert(scope->op_flags & OPf_KIDS);
+ scope->op_next = NULL; /* stop on last op */
+ op_null(scope);
+ }
/* XXX optimize_optree() must be called on o before
* CALL_PEEP(), as currently S_maybe_multiconcat() can't
* to the same optree later (where hopefully it won't do any
* harm as it can't convert an op to multiconcat if it's
* already been converted */
- optimize_optree(o);
-
- /* have to peep the DOs individually as we've removed it from
- * the op_next chain */
- CALL_PEEP(o);
- S_prune_chain_head(&(o->op_next));
- if (is_compiletime)
- /* runtime finalizes as part of finalizing whole tree */
- finalize_optree(o);
- }
+ optimize_optree(child);
+
+ /* have to peep the DOs individually as we've removed it from
+ * the op_next chain */
+ CALL_PEEP(child);
+ S_prune_chain_head(&(child->op_next));
+ if (is_compiletime)
+ /* runtime finalizes as part of finalizing whole tree */
+ finalize_optree(child);
+ }
}
else if (expr->op_type == OP_PADAV || expr->op_type == OP_RV2AV) {
assert( !(expr->op_flags & OPf_WANT));
If C<PERL_LOADMOD_DENY> is set, the module is loaded as if with C<no> rather
than C<use>.
+=for apidoc Amnh||PERL_LOADMOD_DENY
+=for apidoc Amnh||PERL_LOADMOD_NOIMPORT
+=for apidoc Amnh||PERL_LOADMOD_IMPORT_OPS
+
=cut */
void
A null pointer is returned as usual if there is no statically-determinable
subroutine.
+=for apidoc Amnh||OPpEARLY_CV
+=for apidoc Amnh||OPpENTERSUB_AMPER
+=for apidoc Amnh||RV2CVOPCV_MARK_EARLY
+=for apidoc Amnh||RV2CVOPCV_RETURN_NAME_GV
+
=cut
*/
continue;
case '_':
/* _ must be at the end */
- if (proto[1] && !strchr(";@%", proto[1]))
+ if (proto[1] && !memCHRs(";@%", proto[1]))
goto oops;
/* FALLTHROUGH */
case '$':
only the C<CALL_CHECKER_REQUIRE_GV> bit currently has a defined meaning
(for which see above). All other bits should be clear.
+=for apidoc Amnh||CALL_CHECKER_REQUIRE_GV
+
=for apidoc cv_get_call_checker
The original form of L</cv_get_call_checker_flags>, which does not return
if (CvISXSUB(cv) || !CvROOT(cv))
S_entersub_alloc_targ(aTHX_ o);
if (!namegv) {
- /* The original call checker API guarantees that a GV will be
+ /* The original call checker API guarantees that a GV will
be provided with the right name. So, if the old API was
used (or the REQUIRE_GV flag was passed), we have to reify
the CV’s GV, unless this is an anonymous sub. This is not
}
+OP *
+Perl_ck_isa(pTHX_ OP *o)
+{
+ OP *classop = cBINOPo->op_last;
+
+ PERL_ARGS_ASSERT_CK_ISA;
+
+ /* Convert barename into PV */
+ if(classop->op_type == OP_CONST && classop->op_private & OPpCONST_BARE) {
+ /* TODO: Optionally convert package to raw HV here */
+ classop->op_private &= ~(OPpCONST_BARE|OPpCONST_STRICT);
+ }
+
+ return o;
+}
+
/*
---------------------------------------------------------
}
/* if its an unrecognised, non-dangerous op, assume that it
- * it the cause of at least one safe scalar */
+ * is the cause of at least one safe scalar */
(*scalars_p)++;
flags = AAS_SAFE_SCALAR;
break;
bool next_is_hash = FALSE; /* is the next lookup to be a hash? */
bool is_last = FALSE; /* no more derefs to follow */
bool maybe_aelemfast = FALSE; /* we can replace with aelemfast? */
+ UV action_word = 0; /* all actions so far */
UNOP_AUX_item *arg = arg_buf;
UNOP_AUX_item *action_ptr = arg_buf;
- if (pass)
- action_ptr->uv = 0;
- arg++;
+ arg++; /* reserve slot for first action word */
switch (action) {
case MDEREF_HV_gvsv_vivify_rv2hv_helem:
arg--;
}
- if (pass)
- action_ptr->uv |= (action << (action_ix * MDEREF_SHIFT));
+ action_word |= (action << (action_ix * MDEREF_SHIFT));
action_ix++;
action_count++;
- /* if there's no space for the next action, create a new slot
+ /* if there's no space for the next action, reserve a new slot
* for it *before* we start adding args for that action */
if ((action_ix + 1) * MDEREF_SHIFT > UVSIZE*8) {
- action_ptr = arg;
if (pass)
- arg->uv = 0;
+ action_ptr->uv = action_word;
+ action_word = 0;
+ action_ptr = arg;
arg++;
action_ix = 0;
}
/* success! */
+ if (!action_ix)
+ /* slot reserved for next action word not now needed */
+ arg--;
+ else if (pass)
+ action_ptr->uv = action_word;
+
if (pass) {
OP *mderef;
OP *p, *q;
case OP_AND:
case OP_OR:
case OP_DOR:
+ case OP_CMPCHAIN_AND:
while (cLOGOP->op_other->op_type == OP_NULL)
cLOGOP->op_other = cLOGOP->op_other->op_next;
while (o->op_next && ( o->op_type == o->op_next->op_type