#include "keywords.h"
#include "feature.h"
#include "regcomp.h"
+#include "invlist_inline.h"
#define CALL_PEEP(o) PL_peepp(aTHX_ o)
#define CALL_RPEEP(o) PL_rpeepp(aTHX_ o)
/* rounds up to nearest pointer */
#define SIZE_TO_PSIZE(x) (((x) + sizeof(I32 *) - 1)/sizeof(I32 *))
-#define DIFF(o,p) ((size_t)((I32 **)(p) - (I32**)(o)))
+
+#define DIFF(o,p) \
+ (assert(((char *)(p) - (char *)(o)) % sizeof(I32**) == 0), \
+ ((size_t)((I32 **)(p) - (I32**)(o))))
/* requires double parens and aTHX_ */
#define DEBUG_S_warn(args) \
PerlIO_printf(Perl_debug_log, "%s", SvPVx_nolen(Perl_mess args)) \
)
+/* opslot_size includes the size of the slot header, and an op can't be smaller than BASEOP */
+#define OPSLOT_SIZE_BASE (SIZE_TO_PSIZE(sizeof(OPSLOT)))
+
+/* the number of bytes to allocate for a slab with sz * sizeof(I32 **) space for op */
+#define OpSLABSizeBytes(sz) \
+ ((sz) * sizeof(I32 *) + STRUCT_OFFSET(OPSLAB, opslab_slots))
/* malloc a new op slab (suitable for attaching to PL_compcv).
- * sz is in units of pointers */
+ * sz is in units of pointers from the beginning of opslab_opslots */
static OPSLAB *
S_new_slab(pTHX_ OPSLAB *head, size_t sz)
{
OPSLAB *slab;
+ size_t sz_bytes = OpSLABSizeBytes(sz);
/* opslot_offset is only U16 */
- assert(sz < U16_MAX);
+ assert(sz < U16_MAX);
+ /* room for at least one op */
+ assert(sz >= OPSLOT_SIZE_BASE);
#ifdef PERL_DEBUG_READONLY_OPS
- slab = (OPSLAB *) mmap(0, sz * sizeof(I32 *),
+ slab = (OPSLAB *) mmap(0, sz_bytes,
PROT_READ|PROT_WRITE,
MAP_ANON|MAP_PRIVATE, -1, 0);
DEBUG_m(PerlIO_printf(Perl_debug_log, "mapped %lu at %p\n",
abort();
}
#else
- slab = (OPSLAB *)PerlMemShared_calloc(sz, sizeof(I32 *));
+ slab = (OPSLAB *)PerlMemShared_malloc(sz_bytes);
+ Zero(slab, sz_bytes, char);
#endif
slab->opslab_size = (U16)sz;
/* The context is unused in non-Windows */
PERL_UNUSED_CONTEXT;
#endif
- slab->opslab_free_space = sz - DIFF(slab, &slab->opslab_slots);
+ slab->opslab_free_space = sz;
slab->opslab_head = head ? head : slab;
DEBUG_S_warn((aTHX_ "allocated new op slab sz 0x%x, %p, head slab %p",
(unsigned int)slab->opslab_size, (void*)slab,
return slab;
}
+#define OPSLOT_SIZE_TO_INDEX(sz) ((sz) - OPSLOT_SIZE_BASE)
+
+#define link_freed_op(slab, o) S_link_freed_op(aTHX_ slab, o)
+static void
+S_link_freed_op(pTHX_ OPSLAB *slab, OP *o) {
+ U16 sz = OpSLOT(o)->opslot_size;
+ U16 index = OPSLOT_SIZE_TO_INDEX(sz);
+
+ assert(sz >= OPSLOT_SIZE_BASE);
+ /* make sure the array is large enough to include ops this large */
+ if (!slab->opslab_freed) {
+ /* we don't have a free list array yet, make a new one */
+ slab->opslab_freed_size = index+1;
+ slab->opslab_freed = (OP**)PerlMemShared_calloc((slab->opslab_freed_size), sizeof(OP*));
+
+ if (!slab->opslab_freed)
+ croak_no_mem();
+ }
+ else if (index >= slab->opslab_freed_size) {
+ /* It's probably not worth doing exponential expansion here, the number of op sizes
+ is small.
+ */
+ /* We already have a list that isn't large enough, expand it */
+ size_t newsize = index+1;
+ OP **p = (OP **)PerlMemShared_realloc(slab->opslab_freed, newsize * sizeof(OP*));
+
+ if (!p)
+ croak_no_mem();
+
+ Zero(p+slab->opslab_freed_size, newsize - slab->opslab_freed_size, OP *);
+
+ slab->opslab_freed = p;
+ slab->opslab_freed_size = newsize;
+ }
+
+ o->op_next = slab->opslab_freed[index];
+ slab->opslab_freed[index] = o;
+}
/* Returns a sz-sized block of memory (suitable for holding an op) from
* a free slot in the chain of op slabs attached to PL_compcv.
OPSLAB *slab2;
OPSLOT *slot;
OP *o;
- size_t opsz;
+ size_t sz_in_p; /* size in pointer units, including the OPSLOT header */
/* We only allocate ops from the slab during subroutine compilation.
We find the slab via PL_compcv, hence that must be non-NULL. It could
}
else ++(head_slab = (OPSLAB *)CvSTART(PL_compcv))->opslab_refcnt;
- opsz = SIZE_TO_PSIZE(sz);
- sz = opsz + OPSLOT_HEADER_P;
+ sz_in_p = SIZE_TO_PSIZE(sz + OPSLOT_HEADER);
- /* The slabs maintain a free list of OPs. In particular, constant folding
+ /* The head slab for each CV maintains a free list of OPs. In particular, constant folding
will free up OPs, so it makes sense to re-use them where possible. A
freed up slot is used in preference to a new allocation. */
- if (head_slab->opslab_freed) {
- OP **too = &head_slab->opslab_freed;
- o = *too;
- DEBUG_S_warn((aTHX_ "found free op at %p, slab %p, head slab %p",
- (void*)o,
- (I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset,
- (void*)head_slab));
-
- while (o && OpSLOT(o)->opslot_size < sz) {
- DEBUG_S_warn((aTHX_ "Alas! too small"));
- o = *(too = &o->op_next);
- if (o) { DEBUG_S_warn((aTHX_ "found another free op at %p", (void*)o)); }
- }
- if (o) {
+ if (head_slab->opslab_freed &&
+ OPSLOT_SIZE_TO_INDEX(sz_in_p) < head_slab->opslab_freed_size) {
+ U16 base_index;
+
+ /* look for a large enough size with any freed ops */
+ for (base_index = OPSLOT_SIZE_TO_INDEX(sz_in_p);
+ base_index < head_slab->opslab_freed_size && !head_slab->opslab_freed[base_index];
+ ++base_index) {
+ }
+
+ if (base_index < head_slab->opslab_freed_size) {
+ /* found a freed op */
+ o = head_slab->opslab_freed[base_index];
+
DEBUG_S_warn((aTHX_ "realloced op at %p, slab %p, head slab %p",
- (void*)o,
- (I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset,
- (void*)head_slab));
- *too = o->op_next;
- Zero(o, opsz, I32 *);
+ (void *)o, (void *)OpMySLAB(o), (void *)head_slab));
+ head_slab->opslab_freed[base_index] = o->op_next;
+ Zero(o, sz, char);
o->op_slabbed = 1;
goto gotit;
}
}
#define INIT_OPSLOT(s) \
- slot->opslot_offset = DIFF(slab2, slot) ; \
+ slot->opslot_offset = DIFF(&slab2->opslab_slots, slot) ; \
slot->opslot_size = s; \
slab2->opslab_free_space -= s; \
o = &slot->opslot_op; \
/* The partially-filled slab is next in the chain. */
slab2 = head_slab->opslab_next ? head_slab->opslab_next : head_slab;
- if (slab2->opslab_free_space < sz) {
+ if (slab2->opslab_free_space < sz_in_p) {
/* Remaining space is too small. */
/* If we can fit a BASEOP, add it to the free chain, so as not
to waste it. */
- if (slab2->opslab_free_space >= SIZE_TO_PSIZE(sizeof(OP)) + OPSLOT_HEADER_P) {
+ if (slab2->opslab_free_space >= OPSLOT_SIZE_BASE) {
slot = &slab2->opslab_slots;
INIT_OPSLOT(slab2->opslab_free_space);
o->op_type = OP_FREED;
- o->op_next = head_slab->opslab_freed;
- head_slab->opslab_freed = o;
+ DEBUG_S_warn((aTHX_ "linked unused op space at %p, slab %p, head slab %p",
+ (void *)o, (void *)slab2, (void *)head_slab));
+ link_freed_op(head_slab, o);
}
/* Create a new slab. Make this one twice as big. */
slab2->opslab_next = head_slab->opslab_next;
head_slab->opslab_next = slab2;
}
- assert(slab2->opslab_size >= sz);
+ assert(slab2->opslab_size >= sz_in_p);
/* Create a new op slot */
- slot = (OPSLOT *)
- ((I32 **)&slab2->opslab_slots
- + slab2->opslab_free_space - sz);
+ slot = OpSLOToff(slab2, slab2->opslab_free_space - sz_in_p);
assert(slot >= &slab2->opslab_slots);
- INIT_OPSLOT(sz);
+ INIT_OPSLOT(sz_in_p);
DEBUG_S_warn((aTHX_ "allocating op at %p, slab %p, head slab %p",
(void*)o, (void*)slab2, (void*)head_slab));
slab->opslab_readonly = 1;
for (; slab; slab = slab->opslab_next) {
/*DEBUG_U(PerlIO_printf(Perl_debug_log,"mprotect ->ro %lu at %p\n",
- (unsigned long) slab->opslab_size, slab));*/
- if (mprotect(slab, slab->opslab_size * sizeof(I32 *), PROT_READ))
- Perl_warn(aTHX_ "mprotect for %p %lu failed with %d", slab,
+ (unsigned long) slab->opslab_size, (void *)slab));*/
+ if (mprotect(slab, OpSLABSizeBytes(slab->opslab_size), PROT_READ))
+ Perl_warn(aTHX_ "mprotect for %p %lu failed with %d", (void *)slab,
(unsigned long)slab->opslab_size, errno);
}
}
slab2 = slab;
for (; slab2; slab2 = slab2->opslab_next) {
/*DEBUG_U(PerlIO_printf(Perl_debug_log,"mprotect ->rw %lu at %p\n",
- (unsigned long) size, slab2));*/
- if (mprotect((void *)slab2, slab2->opslab_size * sizeof(I32 *),
+ (unsigned long) size, (void *)slab2));*/
+ if (mprotect((void *)slab2, OpSLABSizeBytes(slab2->opslab_size),
PROT_READ|PROT_WRITE)) {
- Perl_warn(aTHX_ "mprotect RW for %p %lu failed with %d", slab,
+ Perl_warn(aTHX_ "mprotect RW for %p %lu failed with %d", (void *)slab,
(unsigned long)slab2->opslab_size, errno);
}
}
/* If this op is already freed, our refcount will get screwy. */
assert(o->op_type != OP_FREED);
o->op_type = OP_FREED;
- o->op_next = slab->opslab_freed;
- slab->opslab_freed = o;
+ link_freed_op(slab, o);
DEBUG_S_warn((aTHX_ "freeing op at %p, slab %p, head slab %p",
- (void*)o,
- (I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset,
- (void*)slab));
+ (void*)o, (void *)OpMySLAB(o), (void*)slab));
OpslabREFCNT_dec_padok(slab);
}
PERL_UNUSED_CONTEXT;
DEBUG_S_warn((aTHX_ "freeing slab %p", (void*)slab));
assert(slab->opslab_refcnt == 1);
+ PerlMemShared_free(slab->opslab_freed);
do {
slab2 = slab->opslab_next;
#ifdef DEBUGGING
#ifdef PERL_DEBUG_READONLY_OPS
DEBUG_m(PerlIO_printf(Perl_debug_log, "Deallocate slab at %p\n",
(void*)slab));
- if (munmap(slab, slab->opslab_size * sizeof(I32 *))) {
+ if (munmap(slab, OpSLABSizeBytes(slab->opslab_size))) {
perror("munmap failed");
abort();
}
PERL_ARGS_ASSERT_OPSLAB_FORCE_FREE;
slab2 = slab;
do {
- OPSLOT *slot = (OPSLOT*)
- ((I32**)&slab2->opslab_slots + slab2->opslab_free_space);
- OPSLOT *end = (OPSLOT*)
- ((I32**)slab2 + slab2->opslab_size);
+ OPSLOT *slot = OpSLOToff(slab2, slab2->opslab_free_space);
+ OPSLOT *end = OpSLOToff(slab2, slab2->opslab_size);
for (; slot < end;
slot = (OPSLOT*) ((I32**)slot + slot->opslot_size) )
{
(int)n, PL_op_desc[(o)->op_type], t, OP_DESC(kid)), 0);
}
-/* remove flags var, its unused in all callers, move to to right end since gv
- and kid are always the same */
STATIC void
S_bad_type_gv(pTHX_ I32 n, GV *gv, const OP *kid, const char *t)
{
if (!(flags & SVf_UTF8 && UTF8_IS_START(name[1]))
&& isASCII(name[1])
- && (!isPRINT(name[1]) || strchr("\t\n\r\f", name[1]))) {
+ && (!isPRINT(name[1]) || memCHRs("\t\n\r\f", name[1]))) {
/* diag_listed_as: Can't use global %s in %s */
yyerror(Perl_form(aTHX_ "Can't use global %c^%c%.*s in %s",
name[0], toCTRL(name[1]),
}
/*
-=head1 Optree Manipulation Functions
+=for apidoc_section $optree_manipulation
=for apidoc alloccopstash
void
Perl_op_free(pTHX_ OP *o)
{
- dVAR;
OPCODE type;
OP *top_op = o;
OP *next_op = o;
Perl_op_clear(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_OP_CLEAR;
case OP_TRANS:
case OP_TRANSR:
if ( (o->op_type == OP_TRANS || o->op_type == OP_TRANSR)
- && (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)))
+ && (o->op_private & OPpTRANS_USE_SVOP))
{
#ifdef USE_ITHREADS
if (cPADOPo->op_padix > 0) {
void
Perl_op_null(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_OP_NULL;
Perl_op_refcnt_lock(pTHX)
PERL_TSA_ACQUIRE(PL_op_mutex)
{
-#ifdef USE_ITHREADS
- dVAR;
-#endif
PERL_UNUSED_CONTEXT;
OP_REFCNT_LOCK;
}
Perl_op_refcnt_unlock(pTHX)
PERL_TSA_RELEASE(PL_op_mutex)
{
-#ifdef USE_ITHREADS
- dVAR;
-#endif
PERL_UNUSED_CONTEXT;
OP_REFCNT_UNLOCK;
}
you to delete zero or more sequential nodes, replacing them with zero or
more different nodes. Performs the necessary op_first/op_last
housekeeping on the parent node and op_sibling manipulation on the
-children. The last deleted node will be marked as as the last node by
+children. The last deleted node will be marked as the last node by
updating the op_sibling/op_sibparent or op_moresib field as appropriate.
Note that op_next is not manipulated, and nodes are not freed; that is the
LOGOP *
Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP* other)
{
- dVAR;
LOGOP *logop;
OP *kid = first;
NewOp(1101, logop, 1, LOGOP);
OP *
Perl_scalarvoid(pTHX_ OP *arg)
{
- dVAR;
OP *kid;
SV* sv;
OP *o = arg;
STATIC void
S_maybe_multiconcat(pTHX_ OP *o)
{
- dVAR;
OP *lastkidop; /* the right-most of any kids unshifted onto o */
OP *topop; /* the top-most op in the concat tree (often equals o,
unless there are assign/stringify ops above it */
targetop = OpSIBLING(topop);
if (!targetop) /* probably some sort of syntax error */
return;
+
+ /* don't optimise away assign in 'local $foo = ....' */
+ if ( (targetop->op_private & OPpLVAL_INTRO)
+ /* these are the common ops which do 'local', but
+ * not all */
+ && ( targetop->op_type == OP_GVSV
+ || targetop->op_type == OP_RV2SV
+ || targetop->op_type == OP_AELEM
+ || targetop->op_type == OP_HELEM
+ )
+ )
+ return;
}
else if ( topop->op_type == OP_CONCAT
&& (topop->op_flags & OPf_STACKED)
}
if (targetop) {
- /* Can targetop (the LHS) if it's a padsv, be be optimised
+ /* Can targetop (the LHS) if it's a padsv, be optimised
* away and use OPpTARGET_MY instead?
*/
if ( (targetop->op_type == OP_PADSV)
* X .= Y
*
* otherwise we could be doing something like $x = "foo", which
- * if treated as as a concat, would fail to COW.
+ * if treated as a concat, would fail to COW.
*/
if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
return;
static void
S_lvref(pTHX_ OP *o, I32 type)
{
- dVAR;
OP *kid;
OP * top_op = o;
OP *
Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags)
{
- dVAR;
OP *top_op = o;
if (!o || (PL_parser && PL_parser->error_count))
OP *
Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref)
{
- dVAR;
OP * top_op = o;
PERL_ARGS_ASSERT_DOREF;
return newUNOP(OP_NOT, OPf_SPECIAL, scalar(o));
}
+OP *
+Perl_cmpchain_start(pTHX_ I32 type, OP *left, OP *right)
+{
+ BINOP *bop;
+ OP *op;
+
+ if (!left)
+ left = newOP(OP_NULL, 0);
+ if (!right)
+ right = newOP(OP_NULL, 0);
+ scalar(left);
+ scalar(right);
+ NewOp(0, bop, 1, BINOP);
+ op = (OP*)bop;
+ ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP);
+ OpTYPE_set(op, type);
+ cBINOPx(op)->op_flags = OPf_KIDS;
+ cBINOPx(op)->op_private = 2;
+ cBINOPx(op)->op_first = left;
+ cBINOPx(op)->op_last = right;
+ OpMORESIB_set(left, right);
+ OpLASTSIB_set(right, op);
+ return op;
+}
+
+OP *
+Perl_cmpchain_extend(pTHX_ I32 type, OP *ch, OP *right)
+{
+ BINOP *bop;
+ OP *op;
+
+ PERL_ARGS_ASSERT_CMPCHAIN_EXTEND;
+ if (!right)
+ right = newOP(OP_NULL, 0);
+ scalar(right);
+ NewOp(0, bop, 1, BINOP);
+ op = (OP*)bop;
+ ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP);
+ OpTYPE_set(op, type);
+ if (ch->op_type != OP_NULL) {
+ UNOP *lch;
+ OP *nch, *cleft, *cright;
+ NewOp(0, lch, 1, UNOP);
+ nch = (OP*)lch;
+ OpTYPE_set(nch, OP_NULL);
+ nch->op_flags = OPf_KIDS;
+ cleft = cBINOPx(ch)->op_first;
+ cright = cBINOPx(ch)->op_last;
+ cBINOPx(ch)->op_first = NULL;
+ cBINOPx(ch)->op_last = NULL;
+ cBINOPx(ch)->op_private = 0;
+ cBINOPx(ch)->op_flags = 0;
+ cUNOPx(nch)->op_first = cright;
+ OpMORESIB_set(cright, ch);
+ OpMORESIB_set(ch, cleft);
+ OpLASTSIB_set(cleft, nch);
+ ch = nch;
+ }
+ OpMORESIB_set(right, op);
+ OpMORESIB_set(op, cUNOPx(ch)->op_first);
+ cUNOPx(ch)->op_first = right;
+ return ch;
+}
+
+OP *
+Perl_cmpchain_finish(pTHX_ OP *ch)
+{
+
+ PERL_ARGS_ASSERT_CMPCHAIN_FINISH;
+ if (ch->op_type != OP_NULL) {
+ OPCODE cmpoptype = ch->op_type;
+ ch = CHECKOP(cmpoptype, ch);
+ if(!ch->op_next && ch->op_type == cmpoptype)
+ ch = fold_constants(op_integerize(op_std_init(ch)));
+ return ch;
+ } else {
+ OP *condop = NULL;
+ OP *rightarg = cUNOPx(ch)->op_first;
+ cUNOPx(ch)->op_first = OpSIBLING(rightarg);
+ OpLASTSIB_set(rightarg, NULL);
+ while (1) {
+ OP *cmpop = cUNOPx(ch)->op_first;
+ OP *leftarg = OpSIBLING(cmpop);
+ OPCODE cmpoptype = cmpop->op_type;
+ OP *nextrightarg;
+ bool is_last;
+ is_last = !(cUNOPx(ch)->op_first = OpSIBLING(leftarg));
+ OpLASTSIB_set(cmpop, NULL);
+ OpLASTSIB_set(leftarg, NULL);
+ if (is_last) {
+ ch->op_flags = 0;
+ op_free(ch);
+ nextrightarg = NULL;
+ } else {
+ nextrightarg = newUNOP(OP_CMPCHAIN_DUP, 0, leftarg);
+ leftarg = newOP(OP_NULL, 0);
+ }
+ cBINOPx(cmpop)->op_first = leftarg;
+ cBINOPx(cmpop)->op_last = rightarg;
+ OpMORESIB_set(leftarg, rightarg);
+ OpLASTSIB_set(rightarg, cmpop);
+ cmpop->op_flags = OPf_KIDS;
+ cmpop->op_private = 2;
+ cmpop = CHECKOP(cmpoptype, cmpop);
+ if(!cmpop->op_next && cmpop->op_type == cmpoptype)
+ cmpop = op_integerize(op_std_init(cmpop));
+ condop = condop ? newLOGOP(OP_CMPCHAIN_AND, 0, cmpop, condop) :
+ cmpop;
+ if (!nextrightarg)
+ return condop;
+ rightarg = nextrightarg;
+ }
+ }
+}
+
/*
=for apidoc op_scope
OP *
Perl_op_scope(pTHX_ OP *o)
{
- dVAR;
if (o) {
if (o->op_flags & OPf_PARENS || PERLDB_NOOPT || TAINTING_get) {
o = op_prepend_elem(OP_LINESEQ,
}
/*
-=head1 Compile-time scope hooks
+=for apidoc_section $scope
=for apidoc blockhook_register
bool sigil = FALSE;
/* some heuristics to detect a potential error */
- while (*s && (strchr(", \t\n", *s)))
+ while (*s && (memCHRs(", \t\n", *s)))
s++;
while (1) {
- if (*s && (strchr("@$%", *s) || (!lex && *s == '*'))
+ if (*s && (memCHRs("@$%", *s) || (!lex && *s == '*'))
&& *++s
&& (isWORDCHAR(*s) || UTF8_IS_CONTINUED(*s))) {
s++;
sigil = TRUE;
while (*s && (isWORDCHAR(*s) || UTF8_IS_CONTINUED(*s)))
s++;
- while (*s && (strchr(", \t\n", *s)))
+ while (*s && (memCHRs(", \t\n", *s)))
s++;
}
else
PERL_ARGS_ASSERT_JMAYBE;
if (o->op_type == OP_LIST) {
- OP * const o2
- = newSVREF(newGVOP(OP_GV, 0, gv_fetchpvs(";", GV_ADD|GV_NOTQUAL, SVt_PV)));
- o = op_convert_list(OP_JOIN, 0, op_prepend_elem(OP_LIST, o2, o));
+ if (FEATURE_MULTIDIMENSIONAL_IS_ENABLED) {
+ OP * const o2
+ = newSVREF(newGVOP(OP_GV, 0, gv_fetchpvs(";", GV_ADD|GV_NOTQUAL, SVt_PV)));
+ o = op_convert_list(OP_JOIN, 0, op_prepend_elem(OP_LIST, o2, o));
+ }
+ else {
+ /* If the user disables this, then a warning might not be enough to alert
+ them to a possible change of behaviour here, so throw an exception.
+ */
+ yyerror("Multidimensional hash lookup is disabled");
+ }
}
return o;
}
/* integerize op. */
if ((PL_opargs[type] & OA_OTHERINT) && (PL_hints & HINT_INTEGER))
{
- dVAR;
o->op_ppaddr = PL_ppaddr[++(o->op_type)];
}
}
/* This function exists solely to provide a scope to limit
- setjmp/longjmp() messing with auto variables.
+ setjmp/longjmp() messing with auto variables. It cannot be inlined because
+ it uses setjmp
*/
-PERL_STATIC_INLINE int
+STATIC int
S_fold_constants_eval(pTHX) {
int ret = 0;
dJMPENV;
static OP *
S_fold_constants(pTHX_ OP *const o)
{
- dVAR;
OP *curop;
OP *newop;
I32 type = o->op_type;
static void
S_gen_constant_list(pTHX_ OP *o)
{
- dVAR;
OP *curop, *old_next;
SV * const oldwarnhook = PL_warnhook;
SV * const olddiehook = PL_diehook;
}
/*
-=head1 Optree Manipulation Functions
+=for apidoc_section $optree_manipulation
*/
/* List constructors */
OP *
Perl_op_convert_list(pTHX_ I32 type, I32 flags, OP *o)
{
- dVAR;
if (type < 0) type = -type, flags |= OPf_SPECIAL;
if (!o || o->op_type != OP_LIST)
o = force_list(o, 0);
/*
-=head1 Optree construction
+=for apidoc_section $optree_construction
=for apidoc newNULLLIST
OP *
Perl_newLISTOP(pTHX_ I32 type, I32 flags, OP *first, OP *last)
{
- dVAR;
LISTOP *listop;
/* Note that allocating an OP_PUSHMARK can die under Safe.pm if
* pushmark is banned. So do it now while existing ops are in a
OP *
Perl_newOP(pTHX_ I32 type, I32 flags)
{
- dVAR;
OP *o;
if (type == -OP_ENTEREVAL) {
child of the unary op; it is consumed by this function and become part
of the constructed op tree.
+=for apidoc Amnh||OPf_KIDS
+
=cut
*/
OP *
Perl_newUNOP(pTHX_ I32 type, I32 flags, OP *first)
{
- dVAR;
UNOP *unop;
if (type == -OP_ENTEREVAL) {
OP *
Perl_newUNOP_AUX(pTHX_ I32 type, I32 flags, OP *first, UNOP_AUX_item *aux)
{
- dVAR;
UNOP_AUX *unop;
assert((PL_opargs[type] & OA_CLASS_MASK) == OA_UNOP_AUX
static OP*
S_newMETHOP_internal(pTHX_ I32 type, I32 flags, OP* dynamic_meth, SV* const_meth) {
- dVAR;
METHOP *methop;
assert((PL_opargs[type] & OA_CLASS_MASK) == OA_METHOP
OP *
Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last)
{
- dVAR;
BINOP *binop;
ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP
return fold_constants(op_integerize(op_std_init((OP *)binop)));
}
-/* Helper function for S_pmtrans(): comparison function to sort an array
- * of codepoint range pairs. Sorts by start point, or if equal, by end
- * point */
-
-static int uvcompare(const void *a, const void *b)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__pure__;
-static int uvcompare(const void *a, const void *b)
+void
+Perl_invmap_dump(pTHX_ SV* invlist, UV *map)
{
- if (*((const UV *)a) < (*(const UV *)b))
- return -1;
- if (*((const UV *)a) > (*(const UV *)b))
- return 1;
- if (*((const UV *)a+1) < (*(const UV *)b+1))
- return -1;
- if (*((const UV *)a+1) > (*(const UV *)b+1))
- return 1;
- return 0;
+ const char indent[] = " ";
+
+ UV len = _invlist_len(invlist);
+ UV * array = invlist_array(invlist);
+ UV i;
+
+ PERL_ARGS_ASSERT_INVMAP_DUMP;
+
+ for (i = 0; i < len; i++) {
+ UV start = array[i];
+ UV end = (i + 1 < len) ? array[i+1] - 1 : IV_MAX;
+
+ PerlIO_printf(Perl_debug_log, "%s[%" UVuf "] 0x%04" UVXf, indent, i, start);
+ if (end == IV_MAX) {
+ PerlIO_printf(Perl_debug_log, " .. INFTY");
+ }
+ else if (end != start) {
+ PerlIO_printf(Perl_debug_log, " .. 0x%04" UVXf, end);
+ }
+ else {
+ PerlIO_printf(Perl_debug_log, " ");
+ }
+
+ PerlIO_printf(Perl_debug_log, "\t");
+
+ if (map[i] == TR_UNLISTED) {
+ PerlIO_printf(Perl_debug_log, "TR_UNLISTED\n");
+ }
+ else if (map[i] == TR_SPECIAL_HANDLING) {
+ PerlIO_printf(Perl_debug_log, "TR_SPECIAL_HANDLING\n");
+ }
+ else {
+ PerlIO_printf(Perl_debug_log, "0x%04" UVXf "\n", map[i]);
+ }
+ }
}
/* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl
* OPpTRANS_SQUASH
* OPpTRANS_DELETE
* flags as appropriate; this function may add
- * OPpTRANS_FROM_UTF
- * OPpTRANS_TO_UTF
+ * OPpTRANS_USE_SVOP
+ * OPpTRANS_CAN_FORCE_UTF8
* OPpTRANS_IDENTICAL
* OPpTRANS_GROWS
* flags
static OP *
S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
{
+ /* This function compiles a tr///, from data gathered from toke.c, into a
+ * form suitable for use by do_trans() in doop.c at runtime.
+ *
+ * It first normalizes the data, while discarding extraneous inputs; then
+ * writes out the compiled data. The normalization allows for complete
+ * analysis, and avoids some false negatives and positives earlier versions
+ * of this code had.
+ *
+ * The normalization form is an inversion map (described below in detail).
+ * This is essentially the compiled form for tr///'s that require UTF-8,
+ * and its easy to use it to write the 257-byte table for tr///'s that
+ * don't need UTF-8. That table is identical to what's been in use for
+ * many perl versions, except that it doesn't handle some edge cases that
+ * it used to, involving code points above 255. The UTF-8 form now handles
+ * these. (This could be changed with extra coding should it shown to be
+ * desirable.)
+ *
+ * If the complement (/c) option is specified, the lhs string (tstr) is
+ * parsed into an inversion list. Complementing these is trivial. Then a
+ * complemented tstr is built from that, and used thenceforth. This hides
+ * the fact that it was complemented from almost all successive code.
+ *
+ * One of the important characteristics to know about the input is whether
+ * the transliteration may be done in place, or does a temporary need to be
+ * allocated, then copied. If the replacement for every character in every
+ * possible string takes up no more bytes than the character it
+ * replaces, then it can be edited in place. Otherwise the replacement
+ * could overwrite a byte we are about to read, depending on the strings
+ * being processed. The comments and variable names here refer to this as
+ * "growing". Some inputs won't grow, and might even shrink under /d, but
+ * some inputs could grow, so we have to assume any given one might grow.
+ * On very long inputs, the temporary could eat up a lot of memory, so we
+ * want to avoid it if possible. For non-UTF-8 inputs, everything is
+ * single-byte, so can be edited in place, unless there is something in the
+ * pattern that could force it into UTF-8. The inversion map makes it
+ * feasible to determine this. Previous versions of this code pretty much
+ * punted on determining if UTF-8 could be edited in place. Now, this code
+ * is rigorous in making that determination.
+ *
+ * Another characteristic we need to know is whether the lhs and rhs are
+ * identical. If so, and no other flags are present, the only effect of
+ * the tr/// is to count the characters present in the input that are
+ * mentioned in the lhs string. The implementation of that is easier and
+ * runs faster than the more general case. Normalizing here allows for
+ * accurate determination of this. Previously there were false negatives
+ * possible.
+ *
+ * Instead of 'transliterated', the comments here use 'unmapped' for the
+ * characters that are left unchanged by the operation; otherwise they are
+ * 'mapped'
+ *
+ * The lhs of the tr/// is here referred to as the t side.
+ * The rhs of the tr/// is here referred to as the r side.
+ */
+
SV * const tstr = ((SVOP*)expr)->op_sv;
SV * const rstr = ((SVOP*)repl)->op_sv;
STRLEN tlen;
STRLEN rlen;
- const U8 *t = (U8*)SvPV_const(tstr, tlen);
- const U8 *r = (U8*)SvPV_const(rstr, rlen);
- Size_t i, j;
- bool grows = FALSE;
- OPtrans_map *tbl;
- SSize_t struct_size; /* malloced size of table struct */
-
+ const U8 * t0 = (U8*)SvPV_const(tstr, tlen);
+ const U8 * r0 = (U8*)SvPV_const(rstr, rlen);
+ const U8 * t = t0;
+ const U8 * r = r0;
+ UV t_count = 0, r_count = 0; /* Number of characters in search and
+ replacement lists */
+
+ /* khw thinks some of the private flags for this op are quaintly named.
+ * OPpTRANS_GROWS for example is TRUE if the replacement for some lhs
+ * character when represented in UTF-8 is longer than the original
+ * character's UTF-8 representation */
const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT);
const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH);
const bool del = cBOOL(o->op_private & OPpTRANS_DELETE);
- SV* swash;
+
+ /* Set to true if there is some character < 256 in the lhs that maps to
+ * above 255. If so, a non-UTF-8 match string can be forced into being in
+ * UTF-8 by a tr/// operation. */
+ bool can_force_utf8 = FALSE;
+
+ /* What is the maximum expansion factor in UTF-8 transliterations. If a
+ * 2-byte UTF-8 encoded character is to be replaced by a 3-byte one, its
+ * expansion factor is 1.5. This number is used at runtime to calculate
+ * how much space to allocate for non-inplace transliterations. Without
+ * this number, the worst case is 14, which is extremely unlikely to happen
+ * in real life, and could require significant memory overhead. */
+ NV max_expansion = 1.;
+
+ UV t_range_count, r_range_count, min_range_count;
+ UV* t_array;
+ SV* t_invlist;
+ UV* r_map;
+ UV r_cp = 0, t_cp = 0;
+ UV t_cp_end = (UV) -1;
+ UV r_cp_end;
+ Size_t len;
+ AV* invmap;
+ UV final_map = TR_UNLISTED; /* The final character in the replacement
+ list, updated as we go along. Initialize
+ to something illegal */
+
+ bool rstr_utf8 = cBOOL(SvUTF8(rstr));
+ bool tstr_utf8 = cBOOL(SvUTF8(tstr));
+
+ const U8* tend = t + tlen;
+ const U8* rend = r + rlen;
+
+ SV * inverted_tstr = NULL;
+
+ Size_t i;
+ unsigned int pass2;
+
+ /* This routine implements detection of a transliteration having a longer
+ * UTF-8 representation than its source, by partitioning all the possible
+ * code points of the platform into equivalence classes of the same UTF-8
+ * byte length in the first pass. As it constructs the mappings, it carves
+ * these up into smaller chunks, but doesn't merge any together. This
+ * makes it easy to find the instances it's looking for. A second pass is
+ * done after this has been determined which merges things together to
+ * shrink the table for runtime. The table below is used for both ASCII
+ * and EBCDIC platforms. On EBCDIC, the byte length is not monotonically
+ * increasing for code points below 256. To correct for that, the macro
+ * CP_ADJUST defined below converts those code points to ASCII in the first
+ * pass, and we use the ASCII partition values. That works because the
+ * growth factor will be unaffected, which is all that is calculated during
+ * the first pass. */
+ UV PL_partition_by_byte_length[] = {
+ 0,
+ 0x80, /* Below this is 1 byte representations */
+ (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))), /* 2 bytes below this */
+ (16 * (1UL << (2 * UTF_ACCUMULATION_SHIFT))), /* 3 bytes below this */
+ ( 8 * (1UL << (3 * UTF_ACCUMULATION_SHIFT))), /* 4 bytes below this */
+ ( 4 * (1UL << (4 * UTF_ACCUMULATION_SHIFT))), /* 5 bytes below this */
+ ( 2 * (1UL << (5 * UTF_ACCUMULATION_SHIFT))) /* 6 bytes below this */
+
+# ifdef UV_IS_QUAD
+ ,
+ ( ((UV) 1U << (6 * UTF_ACCUMULATION_SHIFT))) /* 7 bytes below this */
+# endif
+
+ };
PERL_ARGS_ASSERT_PMTRANS;
PL_hints |= HINT_BLOCK_SCOPE;
- if (SvUTF8(tstr))
- o->op_private |= OPpTRANS_FROM_UTF;
+ /* If /c, the search list is sorted and complemented. This is now done by
+ * creating an inversion list from it, and then trivially inverting that.
+ * The previous implementation used qsort, but creating the list
+ * automatically keeps it sorted as we go along */
+ if (complement) {
+ UV start, end;
+ SV * inverted_tlist = _new_invlist(tlen);
+ Size_t temp_len;
+
+ DEBUG_y(PerlIO_printf(Perl_debug_log,
+ "%s: %d: tstr before inversion=\n%s\n",
+ __FILE__, __LINE__, _byte_dump_string(t, tend - t, 0)));
+
+ while (t < tend) {
+
+ /* Non-utf8 strings don't have ranges, so each character is listed
+ * out */
+ if (! tstr_utf8) {
+ inverted_tlist = add_cp_to_invlist(inverted_tlist, *t);
+ t++;
+ }
+ else { /* But UTF-8 strings have been parsed in toke.c to have
+ * ranges if appropriate. */
+ UV t_cp;
+ Size_t t_char_len;
+
+ /* Get the first character */
+ t_cp = valid_utf8_to_uvchr(t, &t_char_len);
+ t += t_char_len;
+
+ /* If the next byte indicates that this wasn't the first
+ * element of a range, the range is just this one */
+ if (t >= tend || *t != RANGE_INDICATOR) {
+ inverted_tlist = add_cp_to_invlist(inverted_tlist, t_cp);
+ }
+ else { /* Otherwise, ignore the indicator byte, and get the
+ final element, and add the whole range */
+ t++;
+ t_cp_end = valid_utf8_to_uvchr(t, &t_char_len);
+ t += t_char_len;
+
+ inverted_tlist = _add_range_to_invlist(inverted_tlist,
+ t_cp, t_cp_end);
+ }
+ }
+ } /* End of parse through tstr */
+
+ /* The inversion list is done; now invert it */
+ _invlist_invert(inverted_tlist);
+
+ /* Now go through the inverted list and create a new tstr for the rest
+ * of the routine to use. Since the UTF-8 version can have ranges, and
+ * can be much more compact than the non-UTF-8 version, we create the
+ * string in UTF-8 even if not necessary. (This is just an intermediate
+ * value that gets thrown away anyway.) */
+ invlist_iterinit(inverted_tlist);
+ inverted_tstr = newSVpvs("");
+ while (invlist_iternext(inverted_tlist, &start, &end)) {
+ U8 temp[UTF8_MAXBYTES];
+ U8 * temp_end_pos;
+
+ /* IV_MAX keeps things from going out of bounds */
+ start = MIN(IV_MAX, start);
+ end = MIN(IV_MAX, end);
+
+ temp_end_pos = uvchr_to_utf8(temp, start);
+ sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp);
+
+ if (start != end) {
+ Perl_sv_catpvf(aTHX_ inverted_tstr, "%c", RANGE_INDICATOR);
+ temp_end_pos = uvchr_to_utf8(temp, end);
+ sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp);
+ }
+ }
+
+ /* Set up so the remainder of the routine uses this complement, instead
+ * of the actual input */
+ t0 = t = (U8*)SvPV_const(inverted_tstr, temp_len);
+ tend = t0 + temp_len;
+ tstr_utf8 = TRUE;
+
+ SvREFCNT_dec_NN(inverted_tlist);
+ }
+
+ /* For non-/d, an empty rhs means to use the lhs */
+ if (rlen == 0 && ! del) {
+ r0 = t0;
+ rend = tend;
+ rstr_utf8 = tstr_utf8;
+ }
+
+ t_invlist = _new_invlist(1);
+
+ /* Initialize to a single range */
+ t_invlist = _add_range_to_invlist(t_invlist, 0, UV_MAX);
+
+ /* For the first pass, the lhs is partitioned such that the
+ * number of UTF-8 bytes required to represent a code point in each
+ * partition is the same as the number for any other code point in
+ * that partion. We copy the pre-compiled partion. */
+ len = C_ARRAY_LENGTH(PL_partition_by_byte_length);
+ invlist_extend(t_invlist, len);
+ t_array = invlist_array(t_invlist);
+ Copy(PL_partition_by_byte_length, t_array, len, UV);
+ invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist)));
+ Newx(r_map, len + 1, UV);
+
+ /* Parse the (potentially adjusted) input, creating the inversion map.
+ * This is done in two passes. The first pass is to determine if the
+ * transliteration can be done in place. The inversion map it creates
+ * could be used, but generally would be larger and slower to run than the
+ * output of the second pass, which starts with a more compact table and
+ * allows more ranges to be merged */
+ for (pass2 = 0; pass2 < 2; pass2++) {
+ if (pass2) {
+ /* Initialize to a single range */
+ t_invlist = _add_range_to_invlist(t_invlist, 0, UV_MAX);
+
+ /* In the second pass, we just have the single range */
+ len = 1;
+ t_array = invlist_array(t_invlist);
+ }
- if (SvUTF8(rstr))
- o->op_private |= OPpTRANS_TO_UTF;
+/* As noted earlier, we convert EBCDIC code points to Unicode in the first pass
+ * so as to get the well-behaved length 1 vs length 2 boundary. Only code
+ * points below 256 differ between the two character sets in this regard. For
+ * these, we also can't have any ranges, as they have to be individually
+ * converted. */
+#ifdef EBCDIC
+# define CP_ADJUST(x) ((pass2) ? (x) : NATIVE_TO_UNI(x))
+# define FORCE_RANGE_LEN_1(x) ((pass2) ? 0 : ((x) < 256))
+# define CP_SKIP(x) ((pass2) ? UVCHR_SKIP(x) : OFFUNISKIP(x))
+#else
+# define CP_ADJUST(x) (x)
+# define FORCE_RANGE_LEN_1(x) 0
+# define CP_SKIP(x) UVCHR_SKIP(x)
+#endif
- if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
+ /* And the mapping of each of the ranges is initialized. Initially,
+ * everything is TR_UNLISTED. */
+ for (i = 0; i < len; i++) {
+ r_map[i] = TR_UNLISTED;
+ }
- /* for utf8 translations, op_sv will be set to point to a swash
- * containing codepoint ranges. This is done by first assembling
- * a textual representation of the ranges in listsv then compiling
- * it using swash_init(). For more details of the textual format,
- * see L<perlunicode.pod/"User-Defined Character Properties"> .
+ t = t0;
+ t_count = 0;
+ r = r0;
+ r_count = 0;
+ t_range_count = r_range_count = 0;
+
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d:\ntstr=%s\n",
+ __FILE__, __LINE__, _byte_dump_string(t, tend - t, 0)));
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "rstr=%s\n",
+ _byte_dump_string(r, rend - r, 0)));
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "/c=%d; /s=%d; /d=%d\n",
+ complement, squash, del));
+ DEBUG_y(invmap_dump(t_invlist, r_map));
+
+ /* Now go through the search list constructing an inversion map. The
+ * input is not necessarily in any particular order. Making it an
+ * inversion map orders it, potentially simplifying, and makes it easy
+ * to deal with at run time. This is the only place in core that
+ * generates an inversion map; if others were introduced, it might be
+ * better to create general purpose routines to handle them.
+ * (Inversion maps are created in perl in other places.)
+ *
+ * An inversion map consists of two parallel arrays. One is
+ * essentially an inversion list: an ordered list of code points such
+ * that each element gives the first code point of a range of
+ * consecutive code points that map to the element in the other array
+ * that has the same index as this one (in other words, the
+ * corresponding element). Thus the range extends up to (but not
+ * including) the code point given by the next higher element. In a
+ * true inversion map, the corresponding element in the other array
+ * gives the mapping of the first code point in the range, with the
+ * understanding that the next higher code point in the inversion
+ * list's range will map to the next higher code point in the map.
+ *
+ * So if at element [i], let's say we have:
+ *
+ * t_invlist r_map
+ * [i] A a
+ *
+ * This means that A => a, B => b, C => c.... Let's say that the
+ * situation is such that:
+ *
+ * [i+1] L -1
+ *
+ * This means the sequence that started at [i] stops at K => k. This
+ * illustrates that you need to look at the next element to find where
+ * a sequence stops. Except, the highest element in the inversion list
+ * begins a range that is understood to extend to the platform's
+ * infinity.
+ *
+ * This routine modifies traditional inversion maps to reserve two
+ * mappings:
+ *
+ * TR_UNLISTED (or -1) indicates that no code point in the range
+ * is listed in the tr/// searchlist. At runtime, these are
+ * always passed through unchanged. In the inversion map, all
+ * points in the range are mapped to -1, instead of increasing,
+ * like the 'L' in the example above.
+ *
+ * We start the parse with every code point mapped to this, and as
+ * we parse and find ones that are listed in the search list, we
+ * carve out ranges as we go along that override that.
+ *
+ * TR_SPECIAL_HANDLING (or -2) indicates that every code point in the
+ * range needs special handling. Again, all code points in the
+ * range are mapped to -2, instead of increasing.
+ *
+ * Under /d this value means the code point should be deleted from
+ * the transliteration when encountered.
+ *
+ * Otherwise, it marks that every code point in the range is to
+ * map to the final character in the replacement list. This
+ * happens only when the replacement list is shorter than the
+ * search one, so there are things in the search list that have no
+ * correspondence in the replacement list. For example, in
+ * tr/a-z/A/, 'A' is the final value, and the inversion map
+ * generated for this would be like this:
+ * \0 => -1
+ * a => A
+ * b-z => -2
+ * z+1 => -1
+ * 'A' appears once, then the remainder of the range maps to -2.
+ * The use of -2 isn't strictly necessary, as an inversion map is
+ * capable of representing this situation, but not nearly so
+ * compactly, and this is actually quite commonly encountered.
+ * Indeed, the original design of this code used a full inversion
+ * map for this. But things like
+ * tr/\0-\x{FFFF}/A/
+ * generated huge data structures, slowly, and the execution was
+ * also slow. So the current scheme was implemented.
+ *
+ * So, if the next element in our example is:
+ *
+ * [i+2] Q q
+ *
+ * Then all of L, M, N, O, and P map to TR_UNLISTED. If the next
+ * elements are
+ *
+ * [i+3] R z
+ * [i+4] S TR_UNLISTED
+ *
+ * Then Q => q; R => z; and S => TR_UNLISTED. If [i+4] (the 'S') is
+ * the final element in the arrays, every code point from S to infinity
+ * maps to TR_UNLISTED.
+ *
*/
+ /* Finish up range started in what otherwise would
+ * have been the final iteration */
+ while (t < tend || t_range_count > 0) {
+ bool adjacent_to_range_above = FALSE;
+ bool adjacent_to_range_below = FALSE;
+
+ bool merge_with_range_above = FALSE;
+ bool merge_with_range_below = FALSE;
+
+ UV span, invmap_range_length_remaining;
+ SSize_t j;
+ Size_t i;
+
+ /* If we are in the middle of processing a range in the 'target'
+ * side, the previous iteration has set us up. Otherwise, look at
+ * the next character in the search list */
+ if (t_range_count <= 0) {
+ if (! tstr_utf8) {
+
+ /* Here, not in the middle of a range, and not UTF-8. The
+ * next code point is the single byte where we're at */
+ t_cp = CP_ADJUST(*t);
+ t_range_count = 1;
+ t++;
+ }
+ else {
+ Size_t t_char_len;
+
+ /* Here, not in the middle of a range, and is UTF-8. The
+ * next code point is the next UTF-8 char in the input. We
+ * know the input is valid, because the toker constructed
+ * it */
+ t_cp = CP_ADJUST(valid_utf8_to_uvchr(t, &t_char_len));
+ t += t_char_len;
+
+ /* UTF-8 strings (only) have been parsed in toke.c to have
+ * ranges. See if the next byte indicates that this was
+ * the first element of a range. If so, get the final
+ * element and calculate the range size. If not, the range
+ * size is 1 */
+ if ( t < tend && *t == RANGE_INDICATOR
+ && ! FORCE_RANGE_LEN_1(t_cp))
+ {
+ t++;
+ t_range_count = valid_utf8_to_uvchr(t, &t_char_len)
+ - t_cp + 1;
+ t += t_char_len;
+ }
+ else {
+ t_range_count = 1;
+ }
+ }
- SV* const listsv = newSVpvs("# comment\n");
- SV* transv = NULL;
- const U8* tend = t + tlen;
- const U8* rend = r + rlen;
- STRLEN ulen;
- UV tfirst = 1;
- UV tlast = 0;
- IV tdiff;
- STRLEN tcount = 0;
- UV rfirst = 1;
- UV rlast = 0;
- IV rdiff;
- STRLEN rcount = 0;
- IV diff;
- I32 none = 0;
- U32 max = 0;
- I32 bits;
- I32 havefinal = 0;
- U32 final = 0;
- const I32 from_utf = o->op_private & OPpTRANS_FROM_UTF;
- const I32 to_utf = o->op_private & OPpTRANS_TO_UTF;
- U8* tsave = NULL;
- U8* rsave = NULL;
- const U32 flags = UTF8_ALLOW_DEFAULT;
-
- if (!from_utf) {
- STRLEN len = tlen;
- t = tsave = bytes_to_utf8(t, &len);
- tend = t + len;
- }
- if (!to_utf && rlen) {
- STRLEN len = rlen;
- r = rsave = bytes_to_utf8(r, &len);
- rend = r + len;
- }
+ /* Count the total number of listed code points * */
+ t_count += t_range_count;
+ }
-/* There is a snag with this code on EBCDIC: scan_const() in toke.c has
- * encoded chars in native encoding which makes ranges in the EBCDIC 0..255
- * odd. */
+ /* Similarly, get the next character in the replacement list */
+ if (r_range_count <= 0) {
+ if (r >= rend) {
- if (complement) {
- /* utf8 and /c:
- * replace t/tlen/tend with a version that has the ranges
- * complemented
- */
- U8 tmpbuf[UTF8_MAXBYTES+1];
- UV *cp;
- UV nextmin = 0;
- Newx(cp, 2*tlen, UV);
- i = 0;
- transv = newSVpvs("");
-
- /* convert search string into array of (start,end) range
- * codepoint pairs stored in cp[]. Most "ranges" will start
- * and end at the same char */
- while (t < tend) {
- cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
- t += ulen;
- /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */
- if (t < tend && *t == ILLEGAL_UTF8_BYTE) {
- t++;
- cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
- t += ulen;
- }
- else {
- cp[2*i+1] = cp[2*i];
- }
- i++;
- }
+ /* But if we've exhausted the rhs, there is nothing to map
+ * to, except the special handling one, and we make the
+ * range the same size as the lhs one. */
+ r_cp = TR_SPECIAL_HANDLING;
+ r_range_count = t_range_count;
- /* sort the ranges */
- qsort(cp, i, 2*sizeof(UV), uvcompare);
-
- /* Create a utf8 string containing the complement of the
- * codepoint ranges. For example if cp[] contains [A,B], [C,D],
- * then transv will contain the equivalent of:
- * join '', map chr, 0, ILLEGAL_UTF8_BYTE, A - 1,
- * B + 1, ILLEGAL_UTF8_BYTE, C - 1,
- * D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff;
- * A range of a single char skips the ILLEGAL_UTF8_BYTE and
- * end cp.
- */
- for (j = 0; j < i; j++) {
- UV val = cp[2*j];
- diff = val - nextmin;
- if (diff > 0) {
- t = uvchr_to_utf8(tmpbuf,nextmin);
- sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
- if (diff > 1) {
- U8 range_mark = ILLEGAL_UTF8_BYTE;
- t = uvchr_to_utf8(tmpbuf, val - 1);
- sv_catpvn(transv, (char *)&range_mark, 1);
- sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
- }
- }
- val = cp[2*j+1];
- if (val >= nextmin)
- nextmin = val + 1;
- }
+ if (! del) {
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "final_map =%" UVXf "\n", final_map));
+ }
+ }
+ else {
+ if (! rstr_utf8) {
+ r_cp = CP_ADJUST(*r);
+ r_range_count = 1;
+ r++;
+ }
+ else {
+ Size_t r_char_len;
- t = uvchr_to_utf8(tmpbuf,nextmin);
- sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
- {
- U8 range_mark = ILLEGAL_UTF8_BYTE;
- sv_catpvn(transv, (char *)&range_mark, 1);
- }
- t = uvchr_to_utf8(tmpbuf, 0x7fffffff);
- sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
- t = (const U8*)SvPVX_const(transv);
- tlen = SvCUR(transv);
- tend = t + tlen;
- Safefree(cp);
- }
- else if (!rlen && !del) {
- r = t; rlen = tlen; rend = tend;
- }
+ r_cp = CP_ADJUST(valid_utf8_to_uvchr(r, &r_char_len));
+ r += r_char_len;
+ if ( r < rend && *r == RANGE_INDICATOR
+ && ! FORCE_RANGE_LEN_1(r_cp))
+ {
+ r++;
+ r_range_count = valid_utf8_to_uvchr(r,
+ &r_char_len) - r_cp + 1;
+ r += r_char_len;
+ }
+ else {
+ r_range_count = 1;
+ }
+ }
- if (!squash) {
- if ((!rlen && !del) || t == r ||
- (tlen == rlen && memEQ((char *)t, (char *)r, tlen)))
- {
- o->op_private |= OPpTRANS_IDENTICAL;
- }
- }
+ if (r_cp == TR_SPECIAL_HANDLING) {
+ r_range_count = t_range_count;
+ }
- /* extract char ranges from t and r and append them to listsv */
-
- while (t < tend || tfirst <= tlast) {
- /* see if we need more "t" chars */
- if (tfirst > tlast) {
- tfirst = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
- t += ulen;
- if (t < tend && *t == ILLEGAL_UTF8_BYTE) { /* illegal utf8 val indicates range */
- t++;
- tlast = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
- t += ulen;
- }
- else
- tlast = tfirst;
- }
+ /* This is the final character so far */
+ final_map = r_cp + r_range_count - 1;
- /* now see if we need more "r" chars */
- if (rfirst > rlast) {
- if (r < rend) {
- rfirst = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
- r += ulen;
- if (r < rend && *r == ILLEGAL_UTF8_BYTE) { /* illegal utf8 val indicates range */
- r++;
- rlast = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
- r += ulen;
- }
- else
- rlast = rfirst;
- }
- else {
- if (!havefinal++)
- final = rlast;
- rfirst = rlast = 0xffffffff;
- }
- }
+ r_count += r_range_count;
+ }
+ }
+
+ /* Here, we have the next things ready in both sides. They are
+ * potentially ranges. We try to process as big a chunk as
+ * possible at once, but the lhs and rhs must be synchronized, so
+ * things like tr/A-Z/a-ij-z/ will need to be processed in 2 chunks
+ * */
+ min_range_count = MIN(t_range_count, r_range_count);
+
+ /* Search the inversion list for the entry that contains the input
+ * code point <cp>. The inversion map was initialized to cover the
+ * entire range of possible inputs, so this should not fail. So
+ * the return value is the index into the list's array of the range
+ * that contains <cp>, that is, 'i' such that array[i] <= cp <
+ * array[i+1] */
+ j = _invlist_search(t_invlist, t_cp);
+ assert(j >= 0);
+ i = j;
+
+ /* Here, the data structure might look like:
+ *
+ * index t r Meaning
+ * [i-1] J j # J-L => j-l
+ * [i] M -1 # M => default; as do N, O, P, Q
+ * [i+1] R x # R => x, S => x+1, T => x+2
+ * [i+2] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ... infinity
+ *
+ * where 'x' and 'y' above are not to be taken literally.
+ *
+ * The maximum chunk we can handle in this loop iteration, is the
+ * smallest of the three components: the lhs 't_', the rhs 'r_',
+ * and the remainder of the range in element [i]. (In pass 1, that
+ * range will have everything in it be of the same class; we can't
+ * cross into another class.) 'min_range_count' already contains
+ * the smallest of the first two values. The final one is
+ * irrelevant if the map is to the special indicator */
+
+ invmap_range_length_remaining = (i + 1 < len)
+ ? t_array[i+1] - t_cp
+ : IV_MAX - t_cp;
+ span = MAX(1, MIN(min_range_count, invmap_range_length_remaining));
+
+ /* The end point of this chunk is where we are, plus the span, but
+ * never larger than the platform's infinity */
+ t_cp_end = MIN(IV_MAX, t_cp + span - 1);
+
+ if (r_cp == TR_SPECIAL_HANDLING) {
+
+ /* If unmatched lhs code points map to the final map, use that
+ * value. This being set to TR_SPECIAL_HANDLING indicates that
+ * we don't have a final map: unmatched lhs code points are
+ * simply deleted */
+ r_cp_end = (del) ? TR_SPECIAL_HANDLING : final_map;
+ }
+ else {
+ r_cp_end = MIN(IV_MAX, r_cp + span - 1);
+
+ /* If something on the lhs is below 256, and something on the
+ * rhs is above, there is a potential mapping here across that
+ * boundary. Indeed the only way there isn't is if both sides
+ * start at the same point. That means they both cross at the
+ * same time. But otherwise one crosses before the other */
+ if (t_cp < 256 && r_cp_end > 255 && r_cp != t_cp) {
+ can_force_utf8 = TRUE;
+ }
+ }
- /* now see which range will peter out first, if either. */
- tdiff = tlast - tfirst;
- rdiff = rlast - rfirst;
- tcount += tdiff + 1;
- rcount += rdiff + 1;
+ /* If a character appears in the search list more than once, the
+ * 2nd and succeeding occurrences are ignored, so only do this
+ * range if haven't already processed this character. (The range
+ * has been set up so that all members in it will be of the same
+ * ilk) */
+ if (r_map[i] == TR_UNLISTED) {
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "Processing %" UVxf "-%" UVxf " => %" UVxf "-%" UVxf "\n",
+ t_cp, t_cp_end, r_cp, r_cp_end));
+
+ /* This is the first definition for this chunk, hence is valid
+ * and needs to be processed. Here and in the comments below,
+ * we use the above sample data. The t_cp chunk must be any
+ * contiguous subset of M, N, O, P, and/or Q.
+ *
+ * In the first pass, calculate if there is any possible input
+ * string that has a character whose transliteration will be
+ * longer than it. If none, the transliteration may be done
+ * in-place, as it can't write over a so-far unread byte.
+ * Otherwise, a copy must first be made. This could be
+ * expensive for long inputs.
+ *
+ * In the first pass, the t_invlist has been partitioned so
+ * that all elements in any single range have the same number
+ * of bytes in their UTF-8 representations. And the r space is
+ * either a single byte, or a range of strictly monotonically
+ * increasing code points. So the final element in the range
+ * will be represented by no fewer bytes than the initial one.
+ * That means that if the final code point in the t range has
+ * at least as many bytes as the final code point in the r,
+ * then all code points in the t range have at least as many
+ * bytes as their corresponding r range element. But if that's
+ * not true, the transliteration of at least the final code
+ * point grows in length. As an example, suppose we had
+ * tr/\x{fff0}-\x{fff1}/\x{ffff}-\x{10000}/
+ * The UTF-8 for all but 10000 occupies 3 bytes on ASCII
+ * platforms. We have deliberately set up the data structure
+ * so that any range in the lhs gets split into chunks for
+ * processing, such that every code point in a chunk has the
+ * same number of UTF-8 bytes. We only have to check the final
+ * code point in the rhs against any code point in the lhs. */
+ if ( ! pass2
+ && r_cp_end != TR_SPECIAL_HANDLING
+ && CP_SKIP(t_cp_end) < CP_SKIP(r_cp_end))
+ {
+ /* Here, we will need to make a copy of the input string
+ * before doing the transliteration. The worst possible
+ * case is an expansion ratio of 14:1. This is rare, and
+ * we'd rather allocate only the necessary amount of extra
+ * memory for that copy. We can calculate the worst case
+ * for this particular transliteration is by keeping track
+ * of the expansion factor for each range.
+ *
+ * Consider tr/\xCB/\X{E000}/. The maximum expansion
+ * factor is 1 byte going to 3 if the target string is not
+ * UTF-8, but 2 bytes going to 3 if it is in UTF-8. We
+ * could pass two different values so doop could choose
+ * based on the UTF-8ness of the target. But khw thinks
+ * (perhaps wrongly) that is overkill. It is used only to
+ * make sure we malloc enough space.
+ *
+ * If no target string can force the result to be UTF-8,
+ * then we don't have to worry about the case of the target
+ * string not being UTF-8 */
+ NV t_size = (can_force_utf8 && t_cp < 256)
+ ? 1
+ : CP_SKIP(t_cp_end);
+ NV ratio = CP_SKIP(r_cp_end) / t_size;
+
+ o->op_private |= OPpTRANS_GROWS;
+
+ /* Now that we know it grows, we can keep track of the
+ * largest ratio */
+ if (ratio > max_expansion) {
+ max_expansion = ratio;
+ DEBUG_y(PerlIO_printf(Perl_debug_log,
+ "New expansion factor: %" NVgf "\n",
+ max_expansion));
+ }
+ }
- if (tdiff <= rdiff)
- diff = tdiff;
- else
- diff = rdiff;
+ /* The very first range is marked as adjacent to the
+ * non-existent range below it, as it causes things to "just
+ * work" (TradeMark)
+ *
+ * If the lowest code point in this chunk is M, it adjoins the
+ * J-L range */
+ if (t_cp == t_array[i]) {
+ adjacent_to_range_below = TRUE;
+
+ /* And if the map has the same offset from the beginning of
+ * the range as does this new code point (or both are for
+ * TR_SPECIAL_HANDLING), this chunk can be completely
+ * merged with the range below. EXCEPT, in the first pass,
+ * we don't merge ranges whose UTF-8 byte representations
+ * have different lengths, so that we can more easily
+ * detect if a replacement is longer than the source, that
+ * is if it 'grows'. But in the 2nd pass, there's no
+ * reason to not merge */
+ if ( (i > 0 && ( pass2
+ || CP_SKIP(t_array[i-1])
+ == CP_SKIP(t_cp)))
+ && ( ( r_cp == TR_SPECIAL_HANDLING
+ && r_map[i-1] == TR_SPECIAL_HANDLING)
+ || ( r_cp != TR_SPECIAL_HANDLING
+ && r_cp - r_map[i-1] == t_cp - t_array[i-1])))
+ {
+ merge_with_range_below = TRUE;
+ }
+ }
- if (rfirst == 0xffffffff) {
- diff = tdiff; /* oops, pretend rdiff is infinite */
- if (diff > 0)
- Perl_sv_catpvf(aTHX_ listsv, "%04lx\t%04lx\tXXXX\n",
- (long)tfirst, (long)tlast);
- else
- Perl_sv_catpvf(aTHX_ listsv, "%04lx\t\tXXXX\n", (long)tfirst);
- }
- else {
- if (diff > 0)
- Perl_sv_catpvf(aTHX_ listsv, "%04lx\t%04lx\t%04lx\n",
- (long)tfirst, (long)(tfirst + diff),
- (long)rfirst);
- else
- Perl_sv_catpvf(aTHX_ listsv, "%04lx\t\t%04lx\n",
- (long)tfirst, (long)rfirst);
-
- if (rfirst + diff > max)
- max = rfirst + diff;
- if (!grows)
- grows = (tfirst < rfirst &&
- UVCHR_SKIP(tfirst) < UVCHR_SKIP(rfirst + diff));
- rfirst += diff + 1;
- }
- tfirst += diff + 1;
- }
+ /* Similarly, if the highest code point in this chunk is 'Q',
+ * it adjoins the range above, and if the map is suitable, can
+ * be merged with it */
+ if ( t_cp_end >= IV_MAX - 1
+ || ( i + 1 < len
+ && t_cp_end + 1 == t_array[i+1]))
+ {
+ adjacent_to_range_above = TRUE;
+ if (i + 1 < len)
+ if ( ( pass2
+ || CP_SKIP(t_cp) == CP_SKIP(t_array[i+1]))
+ && ( ( r_cp == TR_SPECIAL_HANDLING
+ && r_map[i+1] == (UV) TR_SPECIAL_HANDLING)
+ || ( r_cp != TR_SPECIAL_HANDLING
+ && r_cp_end == r_map[i+1] - 1)))
+ {
+ merge_with_range_above = TRUE;
+ }
+ }
- /* compile listsv into a swash and attach to o */
+ if (merge_with_range_below && merge_with_range_above) {
+
+ /* Here the new chunk looks like M => m, ... Q => q; and
+ * the range above is like R => r, .... Thus, the [i-1]
+ * and [i+1] ranges should be seamlessly melded so the
+ * result looks like
+ *
+ * [i-1] J j # J-T => j-t
+ * [i] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ... infinity
+ */
+ Move(t_array + i + 2, t_array + i, len - i - 2, UV);
+ Move(r_map + i + 2, r_map + i, len - i - 2, UV);
+ len -= 2;
+ invlist_set_len(t_invlist,
+ len,
+ *(get_invlist_offset_addr(t_invlist)));
+ }
+ else if (merge_with_range_below) {
+
+ /* Here the new chunk looks like M => m, .... But either
+ * (or both) it doesn't extend all the way up through Q; or
+ * the range above doesn't start with R => r. */
+ if (! adjacent_to_range_above) {
+
+ /* In the first case, let's say the new chunk extends
+ * through O. We then want:
+ *
+ * [i-1] J j # J-O => j-o
+ * [i] P -1 # P => -1, Q => -1
+ * [i+1] R x # R => x, S => x+1, T => x+2
+ * [i+2] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ...
+ * infinity
+ */
+ t_array[i] = t_cp_end + 1;
+ r_map[i] = TR_UNLISTED;
+ }
+ else { /* Adjoins the range above, but can't merge with it
+ (because 'x' is not the next map after q) */
+ /*
+ * [i-1] J j # J-Q => j-q
+ * [i] R x # R => x, S => x+1, T => x+2
+ * [i+1] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ...
+ * infinity
+ */
- none = ++max;
- if (del)
- ++max;
+ Move(t_array + i + 1, t_array + i, len - i - 1, UV);
+ Move(r_map + i + 1, r_map + i, len - i - 1, UV);
+ len--;
+ invlist_set_len(t_invlist, len,
+ *(get_invlist_offset_addr(t_invlist)));
+ }
+ }
+ else if (merge_with_range_above) {
+
+ /* Here the new chunk ends with Q => q, and the range above
+ * must start with R => r, so the two can be merged. But
+ * either (or both) the new chunk doesn't extend all the
+ * way down to M; or the mapping of the final code point
+ * range below isn't m */
+ if (! adjacent_to_range_below) {
+
+ /* In the first case, let's assume the new chunk starts
+ * with P => p. Then, because it's merge-able with the
+ * range above, that range must be R => r. We want:
+ *
+ * [i-1] J j # J-L => j-l
+ * [i] M -1 # M => -1, N => -1
+ * [i+1] P p # P-T => p-t
+ * [i+2] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ...
+ * infinity
+ */
+ t_array[i+1] = t_cp;
+ r_map[i+1] = r_cp;
+ }
+ else { /* Adjoins the range below, but can't merge with it
+ */
+ /*
+ * [i-1] J j # J-L => j-l
+ * [i] M x # M-T => x-5 .. x+2
+ * [i+1] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ...
+ * infinity
+ */
+ Move(t_array + i + 1, t_array + i, len - i - 1, UV);
+ Move(r_map + i + 1, r_map + i, len - i - 1, UV);
+ len--;
+ t_array[i] = t_cp;
+ r_map[i] = r_cp;
+ invlist_set_len(t_invlist, len,
+ *(get_invlist_offset_addr(t_invlist)));
+ }
+ }
+ else if (adjacent_to_range_below && adjacent_to_range_above) {
+ /* The new chunk completely fills the gap between the
+ * ranges on either side, but can't merge with either of
+ * them.
+ *
+ * [i-1] J j # J-L => j-l
+ * [i] M z # M => z, N => z+1 ... Q => z+4
+ * [i+1] R x # R => x, S => x+1, T => x+2
+ * [i+2] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ... infinity
+ */
+ r_map[i] = r_cp;
+ }
+ else if (adjacent_to_range_below) {
+ /* The new chunk adjoins the range below, but not the range
+ * above, and can't merge. Let's assume the chunk ends at
+ * O.
+ *
+ * [i-1] J j # J-L => j-l
+ * [i] M z # M => z, N => z+1, O => z+2
+ * [i+1] P -1 # P => -1, Q => -1
+ * [i+2] R x # R => x, S => x+1, T => x+2
+ * [i+3] U y # U => y, V => y+1, ...
+ * ...
+ * [-w] Z -1 # Z => default; as do Z+1, ... infinity
+ */
+ invlist_extend(t_invlist, len + 1);
+ t_array = invlist_array(t_invlist);
+ Renew(r_map, len + 1, UV);
+
+ Move(t_array + i + 1, t_array + i + 2, len - i - 1, UV);
+ Move(r_map + i + 1, r_map + i + 2, len - i - 1, UV);
+ r_map[i] = r_cp;
+ t_array[i+1] = t_cp_end + 1;
+ r_map[i+1] = TR_UNLISTED;
+ len++;
+ invlist_set_len(t_invlist, len,
+ *(get_invlist_offset_addr(t_invlist)));
+ }
+ else if (adjacent_to_range_above) {
+ /* The new chunk adjoins the range above, but not the range
+ * below, and can't merge. Let's assume the new chunk
+ * starts at O
+ *
+ * [i-1] J j # J-L => j-l
+ * [i] M -1 # M => default, N => default
+ * [i+1] O z # O => z, P => z+1, Q => z+2
+ * [i+2] R x # R => x, S => x+1, T => x+2
+ * [i+3] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ... infinity
+ */
+ invlist_extend(t_invlist, len + 1);
+ t_array = invlist_array(t_invlist);
+ Renew(r_map, len + 1, UV);
+
+ Move(t_array + i + 1, t_array + i + 2, len - i - 1, UV);
+ Move(r_map + i + 1, r_map + i + 2, len - i - 1, UV);
+ t_array[i+1] = t_cp;
+ r_map[i+1] = r_cp;
+ len++;
+ invlist_set_len(t_invlist, len,
+ *(get_invlist_offset_addr(t_invlist)));
+ }
+ else {
+ /* The new chunk adjoins neither the range above, nor the
+ * range below. Lets assume it is N..P => n..p
+ *
+ * [i-1] J j # J-L => j-l
+ * [i] M -1 # M => default
+ * [i+1] N n # N..P => n..p
+ * [i+2] Q -1 # Q => default
+ * [i+3] R x # R => x, S => x+1, T => x+2
+ * [i+4] U y # U => y, V => y+1, ...
+ * ...
+ * [-1] Z -1 # Z => default; as do Z+1, ... infinity
+ */
- if (max > 0xffff)
- bits = 32;
- else if (max > 0xff)
- bits = 16;
- else
- bits = 8;
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "Before fixing up: len=%d, i=%d\n",
+ (int) len, (int) i));
+ DEBUG_yv(invmap_dump(t_invlist, r_map));
- swash = MUTABLE_SV(swash_init("utf8", "", listsv, bits, none));
-#ifdef USE_ITHREADS
- cPADOPo->op_padix = pad_alloc(OP_TRANS, SVf_READONLY);
- SvREFCNT_dec(PAD_SVl(cPADOPo->op_padix));
- PAD_SETSV(cPADOPo->op_padix, swash);
- SvPADTMP_on(swash);
- SvREADONLY_on(swash);
-#else
- cSVOPo->op_sv = swash;
-#endif
- SvREFCNT_dec(listsv);
- SvREFCNT_dec(transv);
+ invlist_extend(t_invlist, len + 2);
+ t_array = invlist_array(t_invlist);
+ Renew(r_map, len + 2, UV);
+
+ Move(t_array + i + 1,
+ t_array + i + 2 + 1, len - i - (2 - 1), UV);
+ Move(r_map + i + 1,
+ r_map + i + 2 + 1, len - i - (2 - 1), UV);
+
+ len += 2;
+ invlist_set_len(t_invlist, len,
+ *(get_invlist_offset_addr(t_invlist)));
+
+ t_array[i+1] = t_cp;
+ r_map[i+1] = r_cp;
+
+ t_array[i+2] = t_cp_end + 1;
+ r_map[i+2] = TR_UNLISTED;
+ }
+ DEBUG_yv(PerlIO_printf(Perl_debug_log,
+ "After iteration: span=%" UVuf ", t_range_count=%"
+ UVuf " r_range_count=%" UVuf "\n",
+ span, t_range_count, r_range_count));
+ DEBUG_yv(invmap_dump(t_invlist, r_map));
+ } /* End of this chunk needs to be processed */
+
+ /* Done with this chunk. */
+ t_cp += span;
+ if (t_cp >= IV_MAX) {
+ break;
+ }
+ t_range_count -= span;
+ if (r_cp != TR_SPECIAL_HANDLING) {
+ r_cp += span;
+ r_range_count -= span;
+ }
+ else {
+ r_range_count = 0;
+ }
+
+ } /* End of loop through the search list */
+
+ /* We don't need an exact count, but we do need to know if there is
+ * anything left over in the replacement list. So, just assume it's
+ * one byte per character */
+ if (rend > r) {
+ r_count++;
+ }
+ } /* End of passes */
+
+ SvREFCNT_dec(inverted_tstr);
- if (!del && havefinal && rlen)
- (void)hv_store(MUTABLE_HV(SvRV(swash)), "FINAL", 5,
- newSVuv((UV)final), 0);
+ DEBUG_y(PerlIO_printf(Perl_debug_log, "After everything: \n"));
+ DEBUG_y(invmap_dump(t_invlist, r_map));
- Safefree(tsave);
- Safefree(rsave);
+ /* We now have normalized the input into an inversion map.
+ *
+ * See if the lhs and rhs are equivalent. If so, this tr/// is a no-op
+ * except for the count, and streamlined runtime code can be used */
+ if (!del && !squash) {
+
+ /* They are identical if they point to same address, or if everything
+ * maps to UNLISTED or to itself. This catches things that not looking
+ * at the normalized inversion map doesn't catch, like tr/aa/ab/ or
+ * tr/\x{100}-\x{104}/\x{100}-\x{102}\x{103}-\x{104} */
+ if (r0 != t0) {
+ for (i = 0; i < len; i++) {
+ if (r_map[i] != TR_UNLISTED && r_map[i] != t_array[i]) {
+ goto done_identical_check;
+ }
+ }
+ }
- tlen = tcount;
- rlen = rcount;
- if (r < rend)
- rlen++;
- else if (rlast == 0xffffffff)
- rlen = 0;
+ /* Here have gone through entire list, and didn't find any
+ * non-identical mappings */
+ o->op_private |= OPpTRANS_IDENTICAL;
- goto warnins;
+ done_identical_check: ;
}
+ t_array = invlist_array(t_invlist);
+
+ /* If has components above 255, we generally need to use the inversion map
+ * implementation */
+ if ( can_force_utf8
+ || ( len > 0
+ && t_array[len-1] > 255
+ /* If the final range is 0x100-INFINITY and is a special
+ * mapping, the table implementation can handle it */
+ && ! ( t_array[len-1] == 256
+ && ( r_map[len-1] == TR_UNLISTED
+ || r_map[len-1] == TR_SPECIAL_HANDLING))))
+ {
+ SV* r_map_sv;
+
+ /* A UTF-8 op is generated, indicated by this flag. This op is an
+ * sv_op */
+ o->op_private |= OPpTRANS_USE_SVOP;
+
+ if (can_force_utf8) {
+ o->op_private |= OPpTRANS_CAN_FORCE_UTF8;
+ }
+
+ /* The inversion map is pushed; first the list. */
+ invmap = MUTABLE_AV(newAV());
+ av_push(invmap, t_invlist);
+
+ /* 2nd is the mapping */
+ r_map_sv = newSVpvn((char *) r_map, len * sizeof(UV));
+ av_push(invmap, r_map_sv);
+
+ /* 3rd is the max possible expansion factor */
+ av_push(invmap, newSVnv(max_expansion));
+
+ /* Characters that are in the search list, but not in the replacement
+ * list are mapped to the final character in the replacement list */
+ if (! del && r_count < t_count) {
+ av_push(invmap, newSVuv(final_map));
+ }
+
+#ifdef USE_ITHREADS
+ cPADOPo->op_padix = pad_alloc(OP_TRANS, SVf_READONLY);
+ SvREFCNT_dec(PAD_SVl(cPADOPo->op_padix));
+ PAD_SETSV(cPADOPo->op_padix, (SV *) invmap);
+ SvPADTMP_on(invmap);
+ SvREADONLY_on(invmap);
+#else
+ cSVOPo->op_sv = (SV *) invmap;
+#endif
+
+ }
+ else {
+ OPtrans_map *tbl;
+ unsigned short i;
+
+ /* The OPtrans_map struct already contains one slot; hence the -1. */
+ SSize_t struct_size = sizeof(OPtrans_map)
+ + (256 - 1 + 1)*sizeof(short);
+
/* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup
- * table. Entries with the value -1 indicate chars not to be
- * translated, while -2 indicates a search char without a
+ * table. Entries with the value TR_UNMAPPED indicate chars not to be
+ * translated, while TR_DELETE indicates a search char without a
* corresponding replacement char under /d.
*
- * Normally, the table has 256 slots. However, in the presence of
- * /c, the search charlist has an implicit \x{100}-\x{7fffffff}
- * added, and if there are enough replacement chars to start pairing
- * with the \x{100},... search chars, then a larger (> 256) table
- * is allocated.
- *
- * In addition, regardless of whether under /c, an extra slot at the
- * end is used to store the final repeating char, or -3 under an empty
- * replacement list, or -2 under /d; which makes the runtime code
- * easier.
- *
- * The toker will have already expanded char ranges in t and r.
+ * In addition, an extra slot at the end is used to store the final
+ * repeating char, or TR_R_EMPTY under an empty replacement list, or
+ * TR_DELETE under /d; which makes the runtime code easier.
*/
- /* Initially allocate 257-slot table: 256 for basic (non /c) usage,
- * plus final slot for repeat/-2/-3. Later we realloc if excess > * 0.
- * The OPtrans_map struct already contains one slot; hence the -1.
- */
- struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short);
+ /* Indicate this is an op_pv */
+ o->op_private &= ~OPpTRANS_USE_SVOP;
+
tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1);
tbl->size = 256;
cPVOPo->op_pv = (char*)tbl;
- if (complement) {
- Size_t excess;
-
- /* in this branch, j is a count of 'consumed' (i.e. paired off
- * with a search char) replacement chars (so j <= rlen always)
- */
- for (i = 0; i < tlen; i++)
- tbl->map[t[i]] = -1;
-
- for (i = 0, j = 0; i < 256; i++) {
- if (!tbl->map[i]) {
- if (j == rlen) {
- if (del)
- tbl->map[i] = -2;
- else if (rlen)
- tbl->map[i] = r[j-1];
- else
- tbl->map[i] = (short)i;
- }
- else {
- tbl->map[i] = r[j++];
- }
- if ( tbl->map[i] >= 0
- && UVCHR_IS_INVARIANT((UV)i)
- && !UVCHR_IS_INVARIANT((UV)(tbl->map[i]))
- )
- grows = TRUE;
- }
- }
+ for (i = 0; i < len; i++) {
+ STATIC_ASSERT_DECL(TR_SPECIAL_HANDLING == TR_DELETE);
+ short upper = i >= len - 1 ? 256 : (short) t_array[i+1];
+ short to = (short) r_map[i];
+ short j;
+ bool do_increment = TRUE;
- ASSUME(j <= rlen);
- excess = rlen - j;
+ /* Any code points above our limit should be irrelevant */
+ if (t_array[i] >= tbl->size) break;
- if (excess) {
- /* More replacement chars than search chars:
- * store excess replacement chars at end of main table.
- */
+ /* Set up the map */
+ if (to == (short) TR_SPECIAL_HANDLING && ! del) {
+ to = (short) final_map;
+ do_increment = FALSE;
+ }
+ else if (to < 0) {
+ do_increment = FALSE;
+ }
- struct_size += excess;
- tbl = (OPtrans_map*)PerlMemShared_realloc(tbl,
- struct_size + excess * sizeof(short));
- tbl->size += excess;
- cPVOPo->op_pv = (char*)tbl;
+ /* Create a map for everything in this range. The value increases
+ * except for the special cases */
+ for (j = (short) t_array[i]; j < upper; j++) {
+ tbl->map[j] = to;
+ if (do_increment) to++;
+ }
+ }
- for (i = 0; i < excess; i++)
- tbl->map[i + 256] = r[j+i];
+ tbl->map[tbl->size] = del
+ ? (short) TR_DELETE
+ : (short) rlen
+ ? (short) final_map
+ : (short) TR_R_EMPTY;
+ DEBUG_y(PerlIO_printf(Perl_debug_log,"%s: %d\n", __FILE__, __LINE__));
+ for (i = 0; i < tbl->size; i++) {
+ if (tbl->map[i] < 0) {
+ DEBUG_y(PerlIO_printf(Perl_debug_log," %02x=>%d",
+ (unsigned) i, tbl->map[i]));
}
else {
- /* no more replacement chars than search chars */
- if (!rlen && !del && !squash)
- o->op_private |= OPpTRANS_IDENTICAL;
+ DEBUG_y(PerlIO_printf(Perl_debug_log," %02x=>%02x",
+ (unsigned) i, tbl->map[i]));
+ }
+ if ((i+1) % 8 == 0 || i + 1 == (short) tbl->size) {
+ DEBUG_y(PerlIO_printf(Perl_debug_log,"\n"));
}
+ }
+ DEBUG_y(PerlIO_printf(Perl_debug_log,"Final map 0x%x=>%02x\n",
+ (unsigned) tbl->size, tbl->map[tbl->size]));
+
+ SvREFCNT_dec(t_invlist);
+
+#if 0 /* code that added excess above-255 chars at the end of the table, in
+ case we ever want to not use the inversion map implementation for
+ this */
+
+ ASSUME(j <= rlen);
+ excess = rlen - j;
+
+ if (excess) {
+ /* More replacement chars than search chars:
+ * store excess replacement chars at end of main table.
+ */
+
+ struct_size += excess;
+ tbl = (OPtrans_map*)PerlMemShared_realloc(tbl,
+ struct_size + excess * sizeof(short));
+ tbl->size += excess;
+ cPVOPo->op_pv = (char*)tbl;
- tbl->map[tbl->size] = del ? -2 : rlen ? r[rlen - 1] : -3;
+ for (i = 0; i < excess; i++)
+ tbl->map[i + 256] = r[j+i];
}
else {
- if (!rlen && !del) {
- r = t; rlen = tlen;
- if (!squash)
- o->op_private |= OPpTRANS_IDENTICAL;
- }
- else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) {
- o->op_private |= OPpTRANS_IDENTICAL;
- }
-
- for (i = 0; i < 256; i++)
- tbl->map[i] = -1;
- for (i = 0, j = 0; i < tlen; i++,j++) {
- if (j >= rlen) {
- if (del) {
- if (tbl->map[t[i]] == -1)
- tbl->map[t[i]] = -2;
- continue;
- }
- --j;
- }
- if (tbl->map[t[i]] == -1) {
- if ( UVCHR_IS_INVARIANT(t[i])
- && ! UVCHR_IS_INVARIANT(r[j]))
- grows = TRUE;
- tbl->map[t[i]] = r[j];
- }
- }
- tbl->map[tbl->size] = del ? -1 : rlen ? -1 : -3;
+ /* no more replacement chars than search chars */
}
+#endif
+
+ }
- /* both non-utf8 and utf8 code paths end up here */
+ DEBUG_y(PerlIO_printf(Perl_debug_log,
+ "/d=%d, /s=%d, /c=%d, identical=%d, grows=%d,"
+ " use_svop=%d, can_force_utf8=%d,\nexpansion=%" NVgf "\n",
+ del, squash, complement,
+ cBOOL(o->op_private & OPpTRANS_IDENTICAL),
+ cBOOL(o->op_private & OPpTRANS_USE_SVOP),
+ cBOOL(o->op_private & OPpTRANS_GROWS),
+ cBOOL(o->op_private & OPpTRANS_CAN_FORCE_UTF8),
+ max_expansion));
- warnins:
- if(del && rlen == tlen) {
+ Safefree(r_map);
+
+ if(del && rlen != 0 && r_count == t_count) {
Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator");
- } else if(rlen > tlen && !complement) {
+ } else if(r_count > t_count) {
Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Replacement list is longer than search list");
}
- if (grows)
- o->op_private |= OPpTRANS_GROWS;
op_free(expr);
op_free(repl);
OP *
Perl_newPMOP(pTHX_ I32 type, I32 flags)
{
- dVAR;
PMOP *pmop;
assert((PL_opargs[type] & OA_CLASS_MASK) == OA_PMOP
} else {
SV * const repointer = &PL_sv_undef;
av_push(PL_regex_padav, repointer);
- pmop->op_pmoffset = av_tindex(PL_regex_padav);
+ pmop->op_pmoffset = av_top_index(PL_regex_padav);
PL_regex_pad = AvARRAY(PL_regex_padav);
}
#endif
is_compiletime = 1;
has_code = 0;
if (expr->op_type == OP_LIST) {
- OP *o;
- for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) {
- if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
- has_code = 1;
- assert(!o->op_next);
- if (UNLIKELY(!OpHAS_SIBLING(o))) {
- assert(PL_parser && PL_parser->error_count);
- /* This can happen with qr/ (?{(^{})/. Just fake up
- the op we were expecting to see, to avoid crashing
- elsewhere. */
- op_sibling_splice(expr, o, 0,
- newSVOP(OP_CONST, 0, &PL_sv_no));
- }
- o->op_next = OpSIBLING(o);
- }
- else if (o->op_type != OP_CONST && o->op_type != OP_PUSHMARK)
- is_compiletime = 0;
- }
+ OP *child;
+ for (child = cLISTOPx(expr)->op_first; child; child = OpSIBLING(child)) {
+ if (child->op_type == OP_NULL && (child->op_flags & OPf_SPECIAL)) {
+ has_code = 1;
+ assert(!child->op_next);
+ if (UNLIKELY(!OpHAS_SIBLING(child))) {
+ assert(PL_parser && PL_parser->error_count);
+ /* This can happen with qr/ (?{(^{})/. Just fake up
+ the op we were expecting to see, to avoid crashing
+ elsewhere. */
+ op_sibling_splice(expr, child, 0,
+ newSVOP(OP_CONST, 0, &PL_sv_no));
+ }
+ child->op_next = OpSIBLING(child);
+ }
+ else if (child->op_type != OP_CONST && child->op_type != OP_PUSHMARK)
+ is_compiletime = 0;
+ }
}
else if (expr->op_type != OP_CONST)
is_compiletime = 0;
* also, mark any arrays as LIST/REF */
if (expr->op_type == OP_LIST) {
- OP *o;
- for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) {
+ OP *child;
+ for (child = cLISTOPx(expr)->op_first; child; child = OpSIBLING(child)) {
- if (o->op_type == OP_PADAV || o->op_type == OP_RV2AV) {
- assert( !(o->op_flags & OPf_WANT));
+ if (child->op_type == OP_PADAV || child->op_type == OP_RV2AV) {
+ assert( !(child->op_flags & OPf_WANT));
/* push the array rather than its contents. The regex
* engine will retrieve and join the elements later */
- o->op_flags |= (OPf_WANT_LIST | OPf_REF);
+ child->op_flags |= (OPf_WANT_LIST | OPf_REF);
continue;
}
- if (!(o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)))
- continue;
- o->op_next = NULL; /* undo temporary hack from above */
- scalar(o);
- LINKLIST(o);
- if (cLISTOPo->op_first->op_type == OP_LEAVE) {
- LISTOP *leaveop = cLISTOPx(cLISTOPo->op_first);
- /* skip ENTER */
- assert(leaveop->op_first->op_type == OP_ENTER);
- assert(OpHAS_SIBLING(leaveop->op_first));
- o->op_next = OpSIBLING(leaveop->op_first);
- /* skip leave */
- assert(leaveop->op_flags & OPf_KIDS);
- assert(leaveop->op_last->op_next == (OP*)leaveop);
- leaveop->op_next = NULL; /* stop on last op */
- op_null((OP*)leaveop);
- }
- else {
- /* skip SCOPE */
- OP *scope = cLISTOPo->op_first;
- assert(scope->op_type == OP_SCOPE);
- assert(scope->op_flags & OPf_KIDS);
- scope->op_next = NULL; /* stop on last op */
- op_null(scope);
- }
+ if (!(child->op_type == OP_NULL && (child->op_flags & OPf_SPECIAL)))
+ continue;
+ child->op_next = NULL; /* undo temporary hack from above */
+ scalar(child);
+ LINKLIST(child);
+ if (cLISTOPx(child)->op_first->op_type == OP_LEAVE) {
+ LISTOP *leaveop = cLISTOPx(cLISTOPx(child)->op_first);
+ /* skip ENTER */
+ assert(leaveop->op_first->op_type == OP_ENTER);
+ assert(OpHAS_SIBLING(leaveop->op_first));
+ child->op_next = OpSIBLING(leaveop->op_first);
+ /* skip leave */
+ assert(leaveop->op_flags & OPf_KIDS);
+ assert(leaveop->op_last->op_next == (OP*)leaveop);
+ leaveop->op_next = NULL; /* stop on last op */
+ op_null((OP*)leaveop);
+ }
+ else {
+ /* skip SCOPE */
+ OP *scope = cLISTOPx(child)->op_first;
+ assert(scope->op_type == OP_SCOPE);
+ assert(scope->op_flags & OPf_KIDS);
+ scope->op_next = NULL; /* stop on last op */
+ op_null(scope);
+ }
/* XXX optimize_optree() must be called on o before
* CALL_PEEP(), as currently S_maybe_multiconcat() can't
* to the same optree later (where hopefully it won't do any
* harm as it can't convert an op to multiconcat if it's
* already been converted */
- optimize_optree(o);
-
- /* have to peep the DOs individually as we've removed it from
- * the op_next chain */
- CALL_PEEP(o);
- S_prune_chain_head(&(o->op_next));
- if (is_compiletime)
- /* runtime finalizes as part of finalizing whole tree */
- finalize_optree(o);
- }
+ optimize_optree(child);
+
+ /* have to peep the DOs individually as we've removed it from
+ * the op_next chain */
+ CALL_PEEP(child);
+ S_prune_chain_head(&(child->op_next));
+ if (is_compiletime)
+ /* runtime finalizes as part of finalizing whole tree */
+ finalize_optree(child);
+ }
}
else if (expr->op_type == OP_PADAV || expr->op_type == OP_RV2AV) {
assert( !(expr->op_flags & OPf_WANT));
OP *
Perl_newSVOP(pTHX_ I32 type, I32 flags, SV *sv)
{
- dVAR;
SVOP *svop;
PERL_ARGS_ASSERT_NEWSVOP;
OP *
Perl_newPADOP(pTHX_ I32 type, I32 flags, SV *sv)
{
- dVAR;
PADOP *padop;
PERL_ARGS_ASSERT_NEWPADOP;
OP *
Perl_newPVOP(pTHX_ I32 type, I32 flags, char *pv)
{
- dVAR;
const bool utf8 = cBOOL(flags & SVf_UTF8);
PVOP *pvop;
}
/*
-=head1 Embedding Functions
+=for apidoc_section $embedding
=for apidoc load_module
If C<PERL_LOADMOD_DENY> is set, the module is loaded as if with C<no> rather
than C<use>.
+=for apidoc Amnh||PERL_LOADMOD_DENY
+=for apidoc Amnh||PERL_LOADMOD_NOIMPORT
+=for apidoc Amnh||PERL_LOADMOD_IMPORT_OPS
+
+=for apidoc vload_module
+Like C<L</load_module>> but the arguments are an encapsulated argument list.
+
+=for apidoc load_module_nocontext
+Like C<L</load_module>> but does not take a thread context (C<aTHX>) parameter,
+so is used in situations where the caller doesn't already have the thread
+context.
+
=cut */
void
}
/*
-=head1 Optree construction
+=for apidoc_section $optree_construction
=for apidoc newSLICEOP
static OP *
S_newONCEOP(pTHX_ OP *initop, OP *padop)
{
- dVAR;
const PADOFFSET target = padop->op_targ;
OP *const other = newOP(OP_PADSV,
padop->op_flags
OP *
Perl_newSTATEOP(pTHX_ I32 flags, char *label, OP *o)
{
- dVAR;
const U32 seq = intro_my();
const U32 utf8 = flags & SVf_UTF8;
COP *cop;
STATIC OP *
S_new_logop(pTHX_ I32 type, I32 flags, OP** firstp, OP** otherp)
{
- dVAR;
LOGOP *logop;
OP *o;
OP *first;
OP *
Perl_newCONDOP(pTHX_ I32 flags, OP *first, OP *trueop, OP *falseop)
{
- dVAR;
LOGOP *logop;
OP *start;
OP *o;
Perl_newWHILEOP(pTHX_ I32 flags, I32 debuggable, LOOP *loop,
OP *expr, OP *block, OP *cont, I32 has_my)
{
- dVAR;
OP *redo;
OP *next = NULL;
OP *listop;
OP *
Perl_newFOROP(pTHX_ I32 flags, OP *sv, OP *expr, OP *block, OP *cont)
{
- dVAR;
LOOP *loop;
OP *wop;
PADOFFSET padoff = 0;
* keep it in-place if there's space */
if (loop->op_slabbed
&& OpSLOT(loop)->opslot_size
- < SIZE_TO_PSIZE(sizeof(LOOP)) + OPSLOT_HEADER_P)
+ < SIZE_TO_PSIZE(sizeof(LOOP) + OPSLOT_HEADER))
{
/* no space; allocate new op */
LOOP *tmp;
I32 enter_opcode, I32 leave_opcode,
PADOFFSET entertarg)
{
- dVAR;
LOGOP *enterop;
OP *o;
/*
-=head1 Optree Manipulation Functions
+=for apidoc_section $optree_manipulation
=for apidoc cv_const_sv
if (CvNAMED(*spot))
hek = CvNAME_HEK(*spot);
else {
- dVAR;
U32 hash;
PERL_HASH(hash, PadnamePV(name)+1, PadnameLEN(name)-1);
CvNAME_HEK_set(*spot, hek =
if (!CvNAME_HEK(cv)) {
if (hek) (void)share_hek_hek(hek);
else {
- dVAR;
U32 hash;
PERL_HASH(hash, PadnamePV(name)+1, PadnameLEN(name)-1);
hek = share_hek(PadnamePV(name)+1,
If C<o_is_gv> is false and C<o> is null, then the subroutine will
be anonymous. If C<o_is_gv> is false and C<o> is non-null, then C<o>
-must point to a C<const> op, which will be consumed by this function,
+must point to a C<const> OP, which will be consumed by this function,
and its string value supplies a name for the subroutine. The name may
be qualified or unqualified, and if it is unqualified then a default
stash will be selected in some manner. If C<o_is_gv> is true, then C<o>
any use of the returned pointer. It is the caller's responsibility to
ensure that it knows which of these situations applies.
+=for apidoc newATTRSUB
+Construct a Perl subroutine, also performing some surrounding jobs.
+
+This is the same as L<perlintern/C<newATTRSUB_x>> with its C<o_is_gv> parameter set to
+FALSE. This means that if C<o> is null, the new sub will be anonymous; otherwise
+the name will be derived from C<o> in the way described (as with all other
+details) in L<perlintern/C<newATTRSUB_x>>.
+
+=for apidoc newSUB
+Like C<L</newATTRSUB>>, but without attributes.
+
=cut
*/
assert(CvGV(cv) == gv);
}
else {
- dVAR;
U32 hash;
PERL_HASH(hash, name, namlen);
CvNAME_HEK_set(cv,
if (isGV(gv))
CvGV_set(cv, gv);
else {
- dVAR;
U32 hash;
PERL_HASH(hash, name, namlen);
CvNAME_HEK_set(cv, share_hek(name,
OP *
Perl_oopsAV(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_OOPSAV;
OP *
Perl_oopsHV(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_OOPSHV;
OP *
Perl_newAVREF(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_NEWAVREF;
OP *
Perl_newHVREF(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_NEWHVREF;
Perl_newCVREF(pTHX_ I32 flags, OP *o)
{
if (o->op_type == OP_PADANY) {
- dVAR;
OpTYPE_set(o, OP_PADCV);
}
return newUNOP(OP_RV2CV, flags, scalar(o));
OP *
Perl_newSVREF(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_NEWSVREF;
OP *
Perl_ck_spair(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_CK_SPAIR;
OP *
Perl_ck_eval(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_CK_EVAL;
OP *
Perl_ck_rvconst(pTHX_ OP *o)
{
- dVAR;
SVOP * const kid = (SVOP*)cUNOPo->op_first;
PERL_ARGS_ASSERT_CK_RVCONST;
OP *
Perl_ck_ftst(pTHX_ OP *o)
{
- dVAR;
const I32 type = o->op_type;
PERL_ARGS_ASSERT_CK_FTST;
OP *
Perl_ck_smartmatch(pTHX_ OP *o)
{
- dVAR;
PERL_ARGS_ASSERT_CK_SMARTMATCH;
if (0 == (o->op_flags & OPf_SPECIAL)) {
OP *first = cBINOPo->op_first;
OP *
Perl_ck_sassign(pTHX_ OP *o)
{
- dVAR;
OP * const kid = cBINOPo->op_first;
PERL_ARGS_ASSERT_CK_SASSIGN;
SV * const sv = kid->op_sv;
U32 const was_readonly = SvREADONLY(sv);
if (kid->op_private & OPpCONST_BARE) {
- dVAR;
const char *end;
HEK *hek;
SvREFCNT_dec_NN(sv);
}
else {
- dVAR;
HEK *hek;
if (was_readonly) SvREADONLY_off(sv);
PERL_HASH(hash, s, len);
OP *
Perl_ck_select(pTHX_ OP *o)
{
- dVAR;
OP* kid;
PERL_ARGS_ASSERT_CK_SELECT;
OP *
Perl_ck_split(pTHX_ OP *o)
{
- dVAR;
OP *kid;
OP *sibs;
A null pointer is returned as usual if there is no statically-determinable
subroutine.
+=for apidoc Amnh||OPpEARLY_CV
+=for apidoc Amnh||OPpENTERSUB_AMPER
+=for apidoc Amnh||RV2CVOPCV_MARK_EARLY
+=for apidoc Amnh||RV2CVOPCV_RETURN_NAME_GV
+
=cut
*/
continue;
case '_':
/* _ must be at the end */
- if (proto[1] && !strchr(";@%", proto[1]))
+ if (proto[1] && !memCHRs(";@%", proto[1]))
goto oops;
/* FALLTHROUGH */
case '$':
only the C<CALL_CHECKER_REQUIRE_GV> bit currently has a defined meaning
(for which see above). All other bits should be clear.
+=for apidoc Amnh||CALL_CHECKER_REQUIRE_GV
+
=for apidoc cv_get_call_checker
The original form of L</cv_get_call_checker_flags>, which does not return
if (CvISXSUB(cv) || !CvROOT(cv))
S_entersub_alloc_targ(aTHX_ o);
if (!namegv) {
- /* The original call checker API guarantees that a GV will be
+ /* The original call checker API guarantees that a GV will
be provided with the right name. So, if the old API was
used (or the REQUIRE_GV flag was passed), we have to reify
the CV’s GV, unless this is an anonymous sub. This is not
OP *
Perl_ck_each(pTHX_ OP *o)
{
- dVAR;
OP *kid = o->op_flags & OPf_KIDS ? cUNOPo->op_first : NULL;
const unsigned orig_type = o->op_type;
}
+OP *
+Perl_ck_isa(pTHX_ OP *o)
+{
+ OP *classop = cBINOPo->op_last;
+
+ PERL_ARGS_ASSERT_CK_ISA;
+
+ /* Convert barename into PV */
+ if(classop->op_type == OP_CONST && classop->op_private & OPpCONST_BARE) {
+ /* TODO: Optionally convert package to raw HV here */
+ classop->op_private &= ~(OPpCONST_BARE|OPpCONST_STRICT);
+ }
+
+ return o;
+}
+
/*
---------------------------------------------------------
goto do_next;
case OP_UNDEF:
- /* undef counts as a scalar on the RHS:
- * (undef, $x) = ...; # only 1 scalar on LHS: always safe
+ /* undef on LHS following a var is significant, e.g.
+ * my $x = 1;
+ * @a = (($x, undef) = (2 => $x));
+ * # @a shoul be (2,1) not (2,2)
+ *
+ * undef on RHS counts as a scalar:
* ($x, $y) = (undef, $x); # 2 scalars on RHS: unsafe
*/
- if (rhs)
+ if ((!rhs && *scalars_p) || rhs)
(*scalars_p)++;
flags = AAS_SAFE_SCALAR;
break;
}
/* if its an unrecognised, non-dangerous op, assume that it
- * it the cause of at least one safe scalar */
+ * is the cause of at least one safe scalar */
(*scalars_p)++;
flags = AAS_SAFE_SCALAR;
break;
STATIC void
S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints)
{
- dVAR;
int pass;
UNOP_AUX_item *arg_buf = NULL;
bool reset_start_targ = FALSE; /* start->op_targ needs zeroing */
bool next_is_hash = FALSE; /* is the next lookup to be a hash? */
bool is_last = FALSE; /* no more derefs to follow */
bool maybe_aelemfast = FALSE; /* we can replace with aelemfast? */
+ UV action_word = 0; /* all actions so far */
UNOP_AUX_item *arg = arg_buf;
UNOP_AUX_item *action_ptr = arg_buf;
- if (pass)
- action_ptr->uv = 0;
- arg++;
+ arg++; /* reserve slot for first action word */
switch (action) {
case MDEREF_HV_gvsv_vivify_rv2hv_helem:
arg--;
}
- if (pass)
- action_ptr->uv |= (action << (action_ix * MDEREF_SHIFT));
+ action_word |= (action << (action_ix * MDEREF_SHIFT));
action_ix++;
action_count++;
- /* if there's no space for the next action, create a new slot
+ /* if there's no space for the next action, reserve a new slot
* for it *before* we start adding args for that action */
if ((action_ix + 1) * MDEREF_SHIFT > UVSIZE*8) {
- action_ptr = arg;
if (pass)
- arg->uv = 0;
+ action_ptr->uv = action_word;
+ action_word = 0;
+ action_ptr = arg;
arg++;
action_ix = 0;
}
/* success! */
+ if (!action_ix)
+ /* slot reserved for next action word not now needed */
+ arg--;
+ else if (pass)
+ action_ptr->uv = action_word;
+
if (pass) {
OP *mderef;
OP *p, *q;
void
Perl_rpeep(pTHX_ OP *o)
{
- dVAR;
OP* oldop = NULL;
OP* oldoldop = NULL;
OP** defer_queue[MAX_DEFERRED]; /* small queue of deferred branches */
case OP_AND:
case OP_OR:
case OP_DOR:
+ case OP_CMPCHAIN_AND:
while (cLOGOP->op_other->op_type == OP_NULL)
cLOGOP->op_other = cLOGOP->op_other->op_next;
while (o->op_next && ( o->op_type == o->op_next->op_type
}
/*
-=head1 Custom Operators
+=for apidoc_section $custom
=for apidoc Perl_custom_op_xop
Return the XOP structure for a given custom op. This macro should be
else
xop = INT2PTR(XOP *, SvIV(HeVAL(he)));
}
+
{
XOPRETANY any;
if(field == XOPe_xop_ptr) {
any.xop_peep = xop->xop_peep;
break;
default:
- NOT_REACHED; /* NOTREACHED */
+ field_panic:
+ Perl_croak(aTHX_
+ "panic: custom_op_get_field(): invalid field %d\n",
+ (int)field);
break;
}
} else {
any.xop_peep = XOPd_xop_peep;
break;
default:
- NOT_REACHED; /* NOTREACHED */
+ goto field_panic;
break;
}
}
}
- /* On some platforms (HP-UX, IA64) gcc emits a warning for this function:
- * op.c: In function 'Perl_custom_op_get_field':
- * op.c:...: warning: 'any.xop_name' may be used uninitialized in this function [-Wmaybe-uninitialized]
- * This is because on those platforms (with -DEBUGGING) NOT_REACHED
- * expands to assert(0), which expands to ((0) ? (void)0 :
- * __assert(...)), and gcc doesn't know that __assert can never return. */
return any;
}
}
}
/*
-=head1 Hook manipulation
+=for apidoc_section $hook
These functions provide convenient and thread-safe means of manipulating
hook variables.
Perl_wrap_op_checker(pTHX_ Optype opcode,
Perl_check_t new_checker, Perl_check_t *old_checker_p)
{
- dVAR;
PERL_UNUSED_CONTEXT;
PERL_ARGS_ASSERT_WRAP_OP_CHECKER;