3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
38 /* Missing proto on LynxOS */
39 char *gconvert(double, int, int, char *);
42 #ifdef PERL_UTF8_CACHE_ASSERT
43 /* if adding more checks watch out for the following tests:
44 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
45 * lib/utf8.t lib/Unicode/Collate/t/index.t
48 # define ASSERT_UTF8_CACHE(cache) \
49 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
50 assert((cache)[2] <= (cache)[3]); \
51 assert((cache)[3] <= (cache)[1]);} \
54 # define ASSERT_UTF8_CACHE(cache) NOOP
57 #ifdef PERL_OLD_COPY_ON_WRITE
58 #define SV_COW_NEXT_SV(sv) INT2PTR(SV *,SvUVX(sv))
59 #define SV_COW_NEXT_SV_SET(current,next) SvUV_set(current, PTR2UV(next))
60 /* This is a pessimistic view. Scalar must be purely a read-write PV to copy-
64 /* ============================================================================
66 =head1 Allocation and deallocation of SVs.
68 An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
69 sv, av, hv...) contains type and reference count information, and for
70 many types, a pointer to the body (struct xrv, xpv, xpviv...), which
71 contains fields specific to each type. Some types store all they need
72 in the head, so don't have a body.
74 In all but the most memory-paranoid configuations (ex: PURIFY), heads
75 and bodies are allocated out of arenas, which by default are
76 approximately 4K chunks of memory parcelled up into N heads or bodies.
77 Sv-bodies are allocated by their sv-type, guaranteeing size
78 consistency needed to allocate safely from arrays.
80 For SV-heads, the first slot in each arena is reserved, and holds a
81 link to the next arena, some flags, and a note of the number of slots.
82 Snaked through each arena chain is a linked list of free items; when
83 this becomes empty, an extra arena is allocated and divided up into N
84 items which are threaded into the free list.
86 SV-bodies are similar, but they use arena-sets by default, which
87 separate the link and info from the arena itself, and reclaim the 1st
88 slot in the arena. SV-bodies are further described later.
90 The following global variables are associated with arenas:
92 PL_sv_arenaroot pointer to list of SV arenas
93 PL_sv_root pointer to list of free SV structures
95 PL_body_arenas head of linked-list of body arenas
96 PL_body_roots[] array of pointers to list of free bodies of svtype
97 arrays are indexed by the svtype needed
99 A few special SV heads are not allocated from an arena, but are
100 instead directly created in the interpreter structure, eg PL_sv_undef.
101 The size of arenas can be changed from the default by setting
102 PERL_ARENA_SIZE appropriately at compile time.
104 The SV arena serves the secondary purpose of allowing still-live SVs
105 to be located and destroyed during final cleanup.
107 At the lowest level, the macros new_SV() and del_SV() grab and free
108 an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
109 to return the SV to the free list with error checking.) new_SV() calls
110 more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
111 SVs in the free list have their SvTYPE field set to all ones.
113 At the time of very final cleanup, sv_free_arenas() is called from
114 perl_destruct() to physically free all the arenas allocated since the
115 start of the interpreter.
117 The function visit() scans the SV arenas list, and calls a specified
118 function for each SV it finds which is still live - ie which has an SvTYPE
119 other than all 1's, and a non-zero SvREFCNT. visit() is used by the
120 following functions (specified as [function that calls visit()] / [function
121 called by visit() for each SV]):
123 sv_report_used() / do_report_used()
124 dump all remaining SVs (debugging aid)
126 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
127 do_clean_named_io_objs()
128 Attempt to free all objects pointed to by RVs,
129 and, unless DISABLE_DESTRUCTOR_KLUDGE is defined,
130 try to do the same for all objects indirectly
131 referenced by typeglobs too. Called once from
132 perl_destruct(), prior to calling sv_clean_all()
135 sv_clean_all() / do_clean_all()
136 SvREFCNT_dec(sv) each remaining SV, possibly
137 triggering an sv_free(). It also sets the
138 SVf_BREAK flag on the SV to indicate that the
139 refcnt has been artificially lowered, and thus
140 stopping sv_free() from giving spurious warnings
141 about SVs which unexpectedly have a refcnt
142 of zero. called repeatedly from perl_destruct()
143 until there are no SVs left.
145 =head2 Arena allocator API Summary
147 Private API to rest of sv.c
151 new_XPVNV(), del_XPVGV(),
156 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
160 * ========================================================================= */
163 * "A time to plant, and a time to uproot what was planted..."
167 # define MEM_LOG_NEW_SV(sv, file, line, func) \
168 Perl_mem_log_new_sv(sv, file, line, func)
169 # define MEM_LOG_DEL_SV(sv, file, line, func) \
170 Perl_mem_log_del_sv(sv, file, line, func)
172 # define MEM_LOG_NEW_SV(sv, file, line, func) NOOP
173 # define MEM_LOG_DEL_SV(sv, file, line, func) NOOP
176 #ifdef DEBUG_LEAKING_SCALARS
177 # define FREE_SV_DEBUG_FILE(sv) Safefree((sv)->sv_debug_file)
178 # define DEBUG_SV_SERIAL(sv) \
179 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) del_SV\n", \
180 PTR2UV(sv), (long)(sv)->sv_debug_serial))
182 # define FREE_SV_DEBUG_FILE(sv)
183 # define DEBUG_SV_SERIAL(sv) NOOP
187 # define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
188 # define SvARENA_CHAIN_SET(sv,val) (sv)->sv_u.svu_rv = MUTABLE_SV((val))
189 /* Whilst I'd love to do this, it seems that things like to check on
191 # define POSION_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
193 # define POSION_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
194 PoisonNew(&SvREFCNT(sv), 1, U32)
196 # define SvARENA_CHAIN(sv) SvANY(sv)
197 # define SvARENA_CHAIN_SET(sv,val) SvANY(sv) = (void *)(val)
198 # define POSION_SV_HEAD(sv)
201 /* Mark an SV head as unused, and add to free list.
203 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
204 * its refcount artificially decremented during global destruction, so
205 * there may be dangling pointers to it. The last thing we want in that
206 * case is for it to be reused. */
208 #define plant_SV(p) \
210 const U32 old_flags = SvFLAGS(p); \
211 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
212 DEBUG_SV_SERIAL(p); \
213 FREE_SV_DEBUG_FILE(p); \
215 SvFLAGS(p) = SVTYPEMASK; \
216 if (!(old_flags & SVf_BREAK)) { \
217 SvARENA_CHAIN_SET(p, PL_sv_root); \
223 #define uproot_SV(p) \
226 PL_sv_root = MUTABLE_SV(SvARENA_CHAIN(p)); \
231 /* make some more SVs by adding another arena */
238 char *chunk; /* must use New here to match call to */
239 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
240 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
245 /* new_SV(): return a new, empty SV head */
247 #ifdef DEBUG_LEAKING_SCALARS
248 /* provide a real function for a debugger to play with */
250 S_new_SV(pTHX_ const char *file, int line, const char *func)
257 sv = S_more_sv(aTHX);
261 sv->sv_debug_optype = PL_op ? PL_op->op_type : 0;
262 sv->sv_debug_line = (U16) (PL_parser && PL_parser->copline != NOLINE
268 sv->sv_debug_inpad = 0;
269 sv->sv_debug_parent = NULL;
270 sv->sv_debug_file = PL_curcop ? savepv(CopFILE(PL_curcop)): NULL;
272 sv->sv_debug_serial = PL_sv_serial++;
274 MEM_LOG_NEW_SV(sv, file, line, func);
275 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) new_SV (from %s:%d [%s])\n",
276 PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func));
280 # define new_SV(p) (p)=S_new_SV(aTHX_ __FILE__, __LINE__, FUNCTION__)
288 (p) = S_more_sv(aTHX); \
292 MEM_LOG_NEW_SV(p, __FILE__, __LINE__, FUNCTION__); \
297 /* del_SV(): return an empty SV head to the free list */
310 S_del_sv(pTHX_ SV *p)
314 PERL_ARGS_ASSERT_DEL_SV;
319 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
320 const SV * const sv = sva + 1;
321 const SV * const svend = &sva[SvREFCNT(sva)];
322 if (p >= sv && p < svend) {
328 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
329 "Attempt to free non-arena SV: 0x%"UVxf
330 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
337 #else /* ! DEBUGGING */
339 #define del_SV(p) plant_SV(p)
341 #endif /* DEBUGGING */
345 =head1 SV Manipulation Functions
347 =for apidoc sv_add_arena
349 Given a chunk of memory, link it to the head of the list of arenas,
350 and split it into a list of free SVs.
356 S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
359 SV *const sva = MUTABLE_SV(ptr);
363 PERL_ARGS_ASSERT_SV_ADD_ARENA;
365 /* The first SV in an arena isn't an SV. */
366 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
367 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
368 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
370 PL_sv_arenaroot = sva;
371 PL_sv_root = sva + 1;
373 svend = &sva[SvREFCNT(sva) - 1];
376 SvARENA_CHAIN_SET(sv, (sv + 1));
380 /* Must always set typemask because it's always checked in on cleanup
381 when the arenas are walked looking for objects. */
382 SvFLAGS(sv) = SVTYPEMASK;
385 SvARENA_CHAIN_SET(sv, 0);
389 SvFLAGS(sv) = SVTYPEMASK;
392 /* visit(): call the named function for each non-free SV in the arenas
393 * whose flags field matches the flags/mask args. */
396 S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
402 PERL_ARGS_ASSERT_VISIT;
404 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
405 register const SV * const svend = &sva[SvREFCNT(sva)];
407 for (sv = sva + 1; sv < svend; ++sv) {
408 if (SvTYPE(sv) != SVTYPEMASK
409 && (sv->sv_flags & mask) == flags
422 /* called by sv_report_used() for each live SV */
425 do_report_used(pTHX_ SV *const sv)
427 if (SvTYPE(sv) != SVTYPEMASK) {
428 PerlIO_printf(Perl_debug_log, "****\n");
435 =for apidoc sv_report_used
437 Dump the contents of all SVs not yet freed. (Debugging aid).
443 Perl_sv_report_used(pTHX)
446 visit(do_report_used, 0, 0);
452 /* called by sv_clean_objs() for each live SV */
455 do_clean_objs(pTHX_ SV *const ref)
460 SV * const target = SvRV(ref);
461 if (SvOBJECT(target)) {
462 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
463 if (SvWEAKREF(ref)) {
464 sv_del_backref(target, ref);
470 SvREFCNT_dec(target);
475 /* XXX Might want to check arrays, etc. */
479 #ifndef DISABLE_DESTRUCTOR_KLUDGE
481 /* clear any slots in a GV which hold objects - except IO;
482 * called by sv_clean_objs() for each live GV */
485 do_clean_named_objs(pTHX_ SV *const sv)
489 assert(SvTYPE(sv) == SVt_PVGV);
490 assert(isGV_with_GP(sv));
494 /* freeing GP entries may indirectly free the current GV;
495 * hold onto it while we mess with the GP slots */
498 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
499 DEBUG_D((PerlIO_printf(Perl_debug_log,
500 "Cleaning named glob SV object:\n "), sv_dump(obj)));
504 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
505 DEBUG_D((PerlIO_printf(Perl_debug_log,
506 "Cleaning named glob AV object:\n "), sv_dump(obj)));
510 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
511 DEBUG_D((PerlIO_printf(Perl_debug_log,
512 "Cleaning named glob HV object:\n "), sv_dump(obj)));
516 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
517 DEBUG_D((PerlIO_printf(Perl_debug_log,
518 "Cleaning named glob CV object:\n "), sv_dump(obj)));
522 SvREFCNT_dec(sv); /* undo the inc above */
525 /* clear any IO slots in a GV which hold objects (except stderr, defout);
526 * called by sv_clean_objs() for each live GV */
529 do_clean_named_io_objs(pTHX_ SV *const sv)
533 assert(SvTYPE(sv) == SVt_PVGV);
534 assert(isGV_with_GP(sv));
535 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
539 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
540 DEBUG_D((PerlIO_printf(Perl_debug_log,
541 "Cleaning named glob IO object:\n "), sv_dump(obj)));
545 SvREFCNT_dec(sv); /* undo the inc above */
550 =for apidoc sv_clean_objs
552 Attempt to destroy all objects not yet freed
558 Perl_sv_clean_objs(pTHX)
562 PL_in_clean_objs = TRUE;
563 visit(do_clean_objs, SVf_ROK, SVf_ROK);
564 #ifndef DISABLE_DESTRUCTOR_KLUDGE
565 /* Some barnacles may yet remain, clinging to typeglobs.
566 * Run the non-IO destructors first: they may want to output
567 * error messages, close files etc */
568 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
569 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
570 olddef = PL_defoutgv;
571 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
572 if (olddef && isGV_with_GP(olddef))
573 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
574 olderr = PL_stderrgv;
575 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
576 if (olderr && isGV_with_GP(olderr))
577 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
578 SvREFCNT_dec(olddef);
580 PL_in_clean_objs = FALSE;
583 /* called by sv_clean_all() for each live SV */
586 do_clean_all(pTHX_ SV *const sv)
589 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
590 /* don't clean pid table and strtab */
593 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) ));
594 SvFLAGS(sv) |= SVf_BREAK;
599 =for apidoc sv_clean_all
601 Decrement the refcnt of each remaining SV, possibly triggering a
602 cleanup. This function may have to be called multiple times to free
603 SVs which are in complex self-referential hierarchies.
609 Perl_sv_clean_all(pTHX)
613 PL_in_clean_all = TRUE;
614 cleaned = visit(do_clean_all, 0,0);
619 ARENASETS: a meta-arena implementation which separates arena-info
620 into struct arena_set, which contains an array of struct
621 arena_descs, each holding info for a single arena. By separating
622 the meta-info from the arena, we recover the 1st slot, formerly
623 borrowed for list management. The arena_set is about the size of an
624 arena, avoiding the needless malloc overhead of a naive linked-list.
626 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
627 memory in the last arena-set (1/2 on average). In trade, we get
628 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
629 smaller types). The recovery of the wasted space allows use of
630 small arenas for large, rare body types, by changing array* fields
631 in body_details_by_type[] below.
634 char *arena; /* the raw storage, allocated aligned */
635 size_t size; /* its size ~4k typ */
636 svtype utype; /* bodytype stored in arena */
641 /* Get the maximum number of elements in set[] such that struct arena_set
642 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
643 therefore likely to be 1 aligned memory page. */
645 #define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
646 - 2 * sizeof(int)) / sizeof (struct arena_desc))
649 struct arena_set* next;
650 unsigned int set_size; /* ie ARENAS_PER_SET */
651 unsigned int curr; /* index of next available arena-desc */
652 struct arena_desc set[ARENAS_PER_SET];
656 =for apidoc sv_free_arenas
658 Deallocate the memory used by all arenas. Note that all the individual SV
659 heads and bodies within the arenas must already have been freed.
664 Perl_sv_free_arenas(pTHX)
671 /* Free arenas here, but be careful about fake ones. (We assume
672 contiguity of the fake ones with the corresponding real ones.) */
674 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
675 svanext = MUTABLE_SV(SvANY(sva));
676 while (svanext && SvFAKE(svanext))
677 svanext = MUTABLE_SV(SvANY(svanext));
684 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
687 struct arena_set *current = aroot;
690 assert(aroot->set[i].arena);
691 Safefree(aroot->set[i].arena);
699 i = PERL_ARENA_ROOTS_SIZE;
701 PL_body_roots[i] = 0;
708 Here are mid-level routines that manage the allocation of bodies out
709 of the various arenas. There are 5 kinds of arenas:
711 1. SV-head arenas, which are discussed and handled above
712 2. regular body arenas
713 3. arenas for reduced-size bodies
716 Arena types 2 & 3 are chained by body-type off an array of
717 arena-root pointers, which is indexed by svtype. Some of the
718 larger/less used body types are malloced singly, since a large
719 unused block of them is wasteful. Also, several svtypes dont have
720 bodies; the data fits into the sv-head itself. The arena-root
721 pointer thus has a few unused root-pointers (which may be hijacked
722 later for arena types 4,5)
724 3 differs from 2 as an optimization; some body types have several
725 unused fields in the front of the structure (which are kept in-place
726 for consistency). These bodies can be allocated in smaller chunks,
727 because the leading fields arent accessed. Pointers to such bodies
728 are decremented to point at the unused 'ghost' memory, knowing that
729 the pointers are used with offsets to the real memory.
732 =head1 SV-Body Allocation
734 Allocation of SV-bodies is similar to SV-heads, differing as follows;
735 the allocation mechanism is used for many body types, so is somewhat
736 more complicated, it uses arena-sets, and has no need for still-live
739 At the outermost level, (new|del)_X*V macros return bodies of the
740 appropriate type. These macros call either (new|del)_body_type or
741 (new|del)_body_allocated macro pairs, depending on specifics of the
742 type. Most body types use the former pair, the latter pair is used to
743 allocate body types with "ghost fields".
745 "ghost fields" are fields that are unused in certain types, and
746 consequently don't need to actually exist. They are declared because
747 they're part of a "base type", which allows use of functions as
748 methods. The simplest examples are AVs and HVs, 2 aggregate types
749 which don't use the fields which support SCALAR semantics.
751 For these types, the arenas are carved up into appropriately sized
752 chunks, we thus avoid wasted memory for those unaccessed members.
753 When bodies are allocated, we adjust the pointer back in memory by the
754 size of the part not allocated, so it's as if we allocated the full
755 structure. (But things will all go boom if you write to the part that
756 is "not there", because you'll be overwriting the last members of the
757 preceding structure in memory.)
759 We calculate the correction using the STRUCT_OFFSET macro on the first
760 member present. If the allocated structure is smaller (no initial NV
761 actually allocated) then the net effect is to subtract the size of the NV
762 from the pointer, to return a new pointer as if an initial NV were actually
763 allocated. (We were using structures named *_allocated for this, but
764 this turned out to be a subtle bug, because a structure without an NV
765 could have a lower alignment constraint, but the compiler is allowed to
766 optimised accesses based on the alignment constraint of the actual pointer
767 to the full structure, for example, using a single 64 bit load instruction
768 because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
770 This is the same trick as was used for NV and IV bodies. Ironically it
771 doesn't need to be used for NV bodies any more, because NV is now at
772 the start of the structure. IV bodies don't need it either, because
773 they are no longer allocated.
775 In turn, the new_body_* allocators call S_new_body(), which invokes
776 new_body_inline macro, which takes a lock, and takes a body off the
777 linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
778 necessary to refresh an empty list. Then the lock is released, and
779 the body is returned.
781 Perl_more_bodies allocates a new arena, and carves it up into an array of N
782 bodies, which it strings into a linked list. It looks up arena-size
783 and body-size from the body_details table described below, thus
784 supporting the multiple body-types.
786 If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
787 the (new|del)_X*V macros are mapped directly to malloc/free.
789 For each sv-type, struct body_details bodies_by_type[] carries
790 parameters which control these aspects of SV handling:
792 Arena_size determines whether arenas are used for this body type, and if
793 so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
794 zero, forcing individual mallocs and frees.
796 Body_size determines how big a body is, and therefore how many fit into
797 each arena. Offset carries the body-pointer adjustment needed for
798 "ghost fields", and is used in *_allocated macros.
800 But its main purpose is to parameterize info needed in
801 Perl_sv_upgrade(). The info here dramatically simplifies the function
802 vs the implementation in 5.8.8, making it table-driven. All fields
803 are used for this, except for arena_size.
805 For the sv-types that have no bodies, arenas are not used, so those
806 PL_body_roots[sv_type] are unused, and can be overloaded. In
807 something of a special case, SVt_NULL is borrowed for HE arenas;
808 PL_body_roots[HE_SVSLOT=SVt_NULL] is filled by S_more_he, but the
809 bodies_by_type[SVt_NULL] slot is not used, as the table is not
814 struct body_details {
815 U8 body_size; /* Size to allocate */
816 U8 copy; /* Size of structure to copy (may be shorter) */
818 unsigned int type : 4; /* We have space for a sanity check. */
819 unsigned int cant_upgrade : 1; /* Cannot upgrade this type */
820 unsigned int zero_nv : 1; /* zero the NV when upgrading from this */
821 unsigned int arena : 1; /* Allocated from an arena */
822 size_t arena_size; /* Size of arena to allocate */
830 /* With -DPURFIY we allocate everything directly, and don't use arenas.
831 This seems a rather elegant way to simplify some of the code below. */
832 #define HASARENA FALSE
834 #define HASARENA TRUE
836 #define NOARENA FALSE
838 /* Size the arenas to exactly fit a given number of bodies. A count
839 of 0 fits the max number bodies into a PERL_ARENA_SIZE.block,
840 simplifying the default. If count > 0, the arena is sized to fit
841 only that many bodies, allowing arenas to be used for large, rare
842 bodies (XPVFM, XPVIO) without undue waste. The arena size is
843 limited by PERL_ARENA_SIZE, so we can safely oversize the
846 #define FIT_ARENA0(body_size) \
847 ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
848 #define FIT_ARENAn(count,body_size) \
849 ( count * body_size <= PERL_ARENA_SIZE) \
850 ? count * body_size \
851 : FIT_ARENA0 (body_size)
852 #define FIT_ARENA(count,body_size) \
854 ? FIT_ARENAn (count, body_size) \
855 : FIT_ARENA0 (body_size)
857 /* Calculate the length to copy. Specifically work out the length less any
858 final padding the compiler needed to add. See the comment in sv_upgrade
859 for why copying the padding proved to be a bug. */
861 #define copy_length(type, last_member) \
862 STRUCT_OFFSET(type, last_member) \
863 + sizeof (((type*)SvANY((const SV *)0))->last_member)
865 static const struct body_details bodies_by_type[] = {
866 /* HEs use this offset for their arena. */
867 { 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
869 /* The bind placeholder pretends to be an RV for now.
870 Also it's marked as "can't upgrade" to stop anyone using it before it's
872 { 0, 0, 0, SVt_BIND, TRUE, NONV, NOARENA, 0 },
874 /* IVs are in the head, so the allocation size is 0. */
876 sizeof(IV), /* This is used to copy out the IV body. */
877 STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
878 NOARENA /* IVS don't need an arena */, 0
881 /* 8 bytes on most ILP32 with IEEE doubles */
882 { sizeof(NV), sizeof(NV),
883 STRUCT_OFFSET(XPVNV, xnv_u),
884 SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
886 /* 8 bytes on most ILP32 with IEEE doubles */
887 { sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
888 copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
889 + STRUCT_OFFSET(XPV, xpv_cur),
890 SVt_PV, FALSE, NONV, HASARENA,
891 FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
894 { sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
895 copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
896 + STRUCT_OFFSET(XPV, xpv_cur),
897 SVt_PVIV, FALSE, NONV, HASARENA,
898 FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },
901 { sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
902 copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
903 + STRUCT_OFFSET(XPV, xpv_cur),
904 SVt_PVNV, FALSE, HADNV, HASARENA,
905 FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },
908 { sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
909 HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },
915 SVt_REGEXP, FALSE, NONV, HASARENA,
916 FIT_ARENA(0, sizeof(regexp))
920 { sizeof(XPVGV), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
921 HASARENA, FIT_ARENA(0, sizeof(XPVGV)) },
924 { sizeof(XPVLV), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
925 HASARENA, FIT_ARENA(0, sizeof(XPVLV)) },
928 copy_length(XPVAV, xav_alloc),
930 SVt_PVAV, TRUE, NONV, HASARENA,
931 FIT_ARENA(0, sizeof(XPVAV)) },
934 copy_length(XPVHV, xhv_max),
936 SVt_PVHV, TRUE, NONV, HASARENA,
937 FIT_ARENA(0, sizeof(XPVHV)) },
943 SVt_PVCV, TRUE, NONV, HASARENA,
944 FIT_ARENA(0, sizeof(XPVCV)) },
949 SVt_PVFM, TRUE, NONV, NOARENA,
950 FIT_ARENA(20, sizeof(XPVFM)) },
952 /* XPVIO is 84 bytes, fits 48x */
956 SVt_PVIO, TRUE, NONV, HASARENA,
957 FIT_ARENA(24, sizeof(XPVIO)) },
960 #define new_body_allocated(sv_type) \
961 (void *)((char *)S_new_body(aTHX_ sv_type) \
962 - bodies_by_type[sv_type].offset)
964 /* return a thing to the free list */
966 #define del_body(thing, root) \
968 void ** const thing_copy = (void **)thing; \
969 *thing_copy = *root; \
970 *root = (void*)thing_copy; \
975 #define new_XNV() safemalloc(sizeof(XPVNV))
976 #define new_XPVNV() safemalloc(sizeof(XPVNV))
977 #define new_XPVMG() safemalloc(sizeof(XPVMG))
979 #define del_XPVGV(p) safefree(p)
983 #define new_XNV() new_body_allocated(SVt_NV)
984 #define new_XPVNV() new_body_allocated(SVt_PVNV)
985 #define new_XPVMG() new_body_allocated(SVt_PVMG)
987 #define del_XPVGV(p) del_body(p + bodies_by_type[SVt_PVGV].offset, \
988 &PL_body_roots[SVt_PVGV])
992 /* no arena for you! */
994 #define new_NOARENA(details) \
995 safemalloc((details)->body_size + (details)->offset)
996 #define new_NOARENAZ(details) \
997 safecalloc((details)->body_size + (details)->offset, 1)
1000 Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
1001 const size_t arena_size)
1004 void ** const root = &PL_body_roots[sv_type];
1005 struct arena_desc *adesc;
1006 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
1010 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
1011 #if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT_PRIVATE)
1012 static bool done_sanity_check;
1014 /* PERL_GLOBAL_STRUCT_PRIVATE cannot coexist with global
1015 * variables like done_sanity_check. */
1016 if (!done_sanity_check) {
1017 unsigned int i = SVt_LAST;
1019 done_sanity_check = TRUE;
1022 assert (bodies_by_type[i].type == i);
1028 /* may need new arena-set to hold new arena */
1029 if (!aroot || aroot->curr >= aroot->set_size) {
1030 struct arena_set *newroot;
1031 Newxz(newroot, 1, struct arena_set);
1032 newroot->set_size = ARENAS_PER_SET;
1033 newroot->next = aroot;
1035 PL_body_arenas = (void *) newroot;
1036 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
1039 /* ok, now have arena-set with at least 1 empty/available arena-desc */
1040 curr = aroot->curr++;
1041 adesc = &(aroot->set[curr]);
1042 assert(!adesc->arena);
1044 Newx(adesc->arena, good_arena_size, char);
1045 adesc->size = good_arena_size;
1046 adesc->utype = sv_type;
1047 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %"UVuf"\n",
1048 curr, (void*)adesc->arena, (UV)good_arena_size));
1050 start = (char *) adesc->arena;
1052 /* Get the address of the byte after the end of the last body we can fit.
1053 Remember, this is integer division: */
1054 end = start + good_arena_size / body_size * body_size;
1056 /* computed count doesnt reflect the 1st slot reservation */
1057 #if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
1058 DEBUG_m(PerlIO_printf(Perl_debug_log,
1059 "arena %p end %p arena-size %d (from %d) type %d "
1061 (void*)start, (void*)end, (int)good_arena_size,
1062 (int)arena_size, sv_type, (int)body_size,
1063 (int)good_arena_size / (int)body_size));
1065 DEBUG_m(PerlIO_printf(Perl_debug_log,
1066 "arena %p end %p arena-size %d type %d size %d ct %d\n",
1067 (void*)start, (void*)end,
1068 (int)arena_size, sv_type, (int)body_size,
1069 (int)good_arena_size / (int)body_size));
1071 *root = (void *)start;
1074 /* Where the next body would start: */
1075 char * const next = start + body_size;
1078 /* This is the last body: */
1079 assert(next == end);
1081 *(void **)start = 0;
1085 *(void**) start = (void *)next;
1090 /* grab a new thing from the free list, allocating more if necessary.
1091 The inline version is used for speed in hot routines, and the
1092 function using it serves the rest (unless PURIFY).
1094 #define new_body_inline(xpv, sv_type) \
1096 void ** const r3wt = &PL_body_roots[sv_type]; \
1097 xpv = (PTR_TBL_ENT_t*) (*((void **)(r3wt)) \
1098 ? *((void **)(r3wt)) : Perl_more_bodies(aTHX_ sv_type, \
1099 bodies_by_type[sv_type].body_size,\
1100 bodies_by_type[sv_type].arena_size)); \
1101 *(r3wt) = *(void**)(xpv); \
1107 S_new_body(pTHX_ const svtype sv_type)
1111 new_body_inline(xpv, sv_type);
1117 static const struct body_details fake_rv =
1118 { 0, 0, 0, SVt_IV, FALSE, NONV, NOARENA, 0 };
1121 =for apidoc sv_upgrade
1123 Upgrade an SV to a more complex form. Generally adds a new body type to the
1124 SV, then copies across as much information as possible from the old body.
1125 You generally want to use the C<SvUPGRADE> macro wrapper. See also C<svtype>.
1131 Perl_sv_upgrade(pTHX_ register SV *const sv, svtype new_type)
1136 const svtype old_type = SvTYPE(sv);
1137 const struct body_details *new_type_details;
1138 const struct body_details *old_type_details
1139 = bodies_by_type + old_type;
1140 SV *referant = NULL;
1142 PERL_ARGS_ASSERT_SV_UPGRADE;
1144 if (old_type == new_type)
1147 /* This clause was purposefully added ahead of the early return above to
1148 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
1149 inference by Nick I-S that it would fix other troublesome cases. See
1150 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
1152 Given that shared hash key scalars are no longer PVIV, but PV, there is
1153 no longer need to unshare so as to free up the IVX slot for its proper
1154 purpose. So it's safe to move the early return earlier. */
1156 if (new_type != SVt_PV && SvIsCOW(sv)) {
1157 sv_force_normal_flags(sv, 0);
1160 old_body = SvANY(sv);
1162 /* Copying structures onto other structures that have been neatly zeroed
1163 has a subtle gotcha. Consider XPVMG
1165 +------+------+------+------+------+-------+-------+
1166 | NV | CUR | LEN | IV | MAGIC | STASH |
1167 +------+------+------+------+------+-------+-------+
1168 0 4 8 12 16 20 24 28
1170 where NVs are aligned to 8 bytes, so that sizeof that structure is
1171 actually 32 bytes long, with 4 bytes of padding at the end:
1173 +------+------+------+------+------+-------+-------+------+
1174 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1175 +------+------+------+------+------+-------+-------+------+
1176 0 4 8 12 16 20 24 28 32
1178 so what happens if you allocate memory for this structure:
1180 +------+------+------+------+------+-------+-------+------+------+...
1181 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1182 +------+------+------+------+------+-------+-------+------+------+...
1183 0 4 8 12 16 20 24 28 32 36
1185 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1186 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1187 started out as zero once, but it's quite possible that it isn't. So now,
1188 rather than a nicely zeroed GP, you have it pointing somewhere random.
1191 (In fact, GP ends up pointing at a previous GP structure, because the
1192 principle cause of the padding in XPVMG getting garbage is a copy of
1193 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1194 this happens to be moot because XPVGV has been re-ordered, with GP
1195 no longer after STASH)
1197 So we are careful and work out the size of used parts of all the
1205 referant = SvRV(sv);
1206 old_type_details = &fake_rv;
1207 if (new_type == SVt_NV)
1208 new_type = SVt_PVNV;
1210 if (new_type < SVt_PVIV) {
1211 new_type = (new_type == SVt_NV)
1212 ? SVt_PVNV : SVt_PVIV;
1217 if (new_type < SVt_PVNV) {
1218 new_type = SVt_PVNV;
1222 assert(new_type > SVt_PV);
1223 assert(SVt_IV < SVt_PV);
1224 assert(SVt_NV < SVt_PV);
1231 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1232 there's no way that it can be safely upgraded, because perl.c
1233 expects to Safefree(SvANY(PL_mess_sv)) */
1234 assert(sv != PL_mess_sv);
1235 /* This flag bit is used to mean other things in other scalar types.
1236 Given that it only has meaning inside the pad, it shouldn't be set
1237 on anything that can get upgraded. */
1238 assert(!SvPAD_TYPED(sv));
1241 if (old_type_details->cant_upgrade)
1242 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1243 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
1246 if (old_type > new_type)
1247 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1248 (int)old_type, (int)new_type);
1250 new_type_details = bodies_by_type + new_type;
1252 SvFLAGS(sv) &= ~SVTYPEMASK;
1253 SvFLAGS(sv) |= new_type;
1255 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1256 the return statements above will have triggered. */
1257 assert (new_type != SVt_NULL);
1260 assert(old_type == SVt_NULL);
1261 SvANY(sv) = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv));
1265 assert(old_type == SVt_NULL);
1266 SvANY(sv) = new_XNV();
1271 assert(new_type_details->body_size);
1274 assert(new_type_details->arena);
1275 assert(new_type_details->arena_size);
1276 /* This points to the start of the allocated area. */
1277 new_body_inline(new_body, new_type);
1278 Zero(new_body, new_type_details->body_size, char);
1279 new_body = ((char *)new_body) - new_type_details->offset;
1281 /* We always allocated the full length item with PURIFY. To do this
1282 we fake things so that arena is false for all 16 types.. */
1283 new_body = new_NOARENAZ(new_type_details);
1285 SvANY(sv) = new_body;
1286 if (new_type == SVt_PVAV) {
1290 if (old_type_details->body_size) {
1293 /* It will have been zeroed when the new body was allocated.
1294 Lets not write to it, in case it confuses a write-back
1300 #ifndef NODEFAULT_SHAREKEYS
1301 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1303 HvMAX(sv) = 7; /* (start with 8 buckets) */
1306 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1307 The target created by newSVrv also is, and it can have magic.
1308 However, it never has SvPVX set.
1310 if (old_type == SVt_IV) {
1312 } else if (old_type >= SVt_PV) {
1313 assert(SvPVX_const(sv) == 0);
1316 if (old_type >= SVt_PVMG) {
1317 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
1318 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
1320 sv->sv_u.svu_array = NULL; /* or svu_hash */
1326 /* This ensures that SvTHINKFIRST(sv) is true, and hence that
1327 sv_force_normal_flags(sv) is called. */
1330 /* XXX Is this still needed? Was it ever needed? Surely as there is
1331 no route from NV to PVIV, NOK can never be true */
1332 assert(!SvNOKp(sv));
1343 assert(new_type_details->body_size);
1344 /* We always allocated the full length item with PURIFY. To do this
1345 we fake things so that arena is false for all 16 types.. */
1346 if(new_type_details->arena) {
1347 /* This points to the start of the allocated area. */
1348 new_body_inline(new_body, new_type);
1349 Zero(new_body, new_type_details->body_size, char);
1350 new_body = ((char *)new_body) - new_type_details->offset;
1352 new_body = new_NOARENAZ(new_type_details);
1354 SvANY(sv) = new_body;
1356 if (old_type_details->copy) {
1357 /* There is now the potential for an upgrade from something without
1358 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1359 int offset = old_type_details->offset;
1360 int length = old_type_details->copy;
1362 if (new_type_details->offset > old_type_details->offset) {
1363 const int difference
1364 = new_type_details->offset - old_type_details->offset;
1365 offset += difference;
1366 length -= difference;
1368 assert (length >= 0);
1370 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1374 #ifndef NV_ZERO_IS_ALLBITS_ZERO
1375 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
1376 * correct 0.0 for us. Otherwise, if the old body didn't have an
1377 * NV slot, but the new one does, then we need to initialise the
1378 * freshly created NV slot with whatever the correct bit pattern is
1380 if (old_type_details->zero_nv && !new_type_details->zero_nv
1381 && !isGV_with_GP(sv))
1385 if (new_type == SVt_PVIO) {
1386 IO * const io = MUTABLE_IO(sv);
1387 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
1390 /* Clear the stashcache because a new IO could overrule a package
1392 hv_clear(PL_stashcache);
1394 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
1395 IoPAGE_LEN(sv) = 60;
1397 if (old_type < SVt_PV) {
1398 /* referant will be NULL unless the old type was SVt_IV emulating
1400 sv->sv_u.svu_rv = referant;
1404 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1405 (unsigned long)new_type);
1408 if (old_type > SVt_IV) {
1412 /* Note that there is an assumption that all bodies of types that
1413 can be upgraded came from arenas. Only the more complex non-
1414 upgradable types are allowed to be directly malloc()ed. */
1415 assert(old_type_details->arena);
1416 del_body((void*)((char*)old_body + old_type_details->offset),
1417 &PL_body_roots[old_type]);
1423 =for apidoc sv_backoff
1425 Remove any string offset. You should normally use the C<SvOOK_off> macro
1432 Perl_sv_backoff(pTHX_ register SV *const sv)
1435 const char * const s = SvPVX_const(sv);
1437 PERL_ARGS_ASSERT_SV_BACKOFF;
1438 PERL_UNUSED_CONTEXT;
1441 assert(SvTYPE(sv) != SVt_PVHV);
1442 assert(SvTYPE(sv) != SVt_PVAV);
1444 SvOOK_offset(sv, delta);
1446 SvLEN_set(sv, SvLEN(sv) + delta);
1447 SvPV_set(sv, SvPVX(sv) - delta);
1448 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
1449 SvFLAGS(sv) &= ~SVf_OOK;
1456 Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1457 upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1458 Use the C<SvGROW> wrapper instead.
1464 Perl_sv_grow(pTHX_ register SV *const sv, register STRLEN newlen)
1468 PERL_ARGS_ASSERT_SV_GROW;
1470 if (PL_madskills && newlen >= 0x100000) {
1471 PerlIO_printf(Perl_debug_log,
1472 "Allocation too large: %"UVxf"\n", (UV)newlen);
1474 #ifdef HAS_64K_LIMIT
1475 if (newlen >= 0x10000) {
1476 PerlIO_printf(Perl_debug_log,
1477 "Allocation too large: %"UVxf"\n", (UV)newlen);
1480 #endif /* HAS_64K_LIMIT */
1483 if (SvTYPE(sv) < SVt_PV) {
1484 sv_upgrade(sv, SVt_PV);
1485 s = SvPVX_mutable(sv);
1487 else if (SvOOK(sv)) { /* pv is offset? */
1489 s = SvPVX_mutable(sv);
1490 if (newlen > SvLEN(sv))
1491 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
1492 #ifdef HAS_64K_LIMIT
1493 if (newlen >= 0x10000)
1498 s = SvPVX_mutable(sv);
1500 if (newlen > SvLEN(sv)) { /* need more room? */
1501 STRLEN minlen = SvCUR(sv);
1502 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
1503 if (newlen < minlen)
1505 #ifndef Perl_safesysmalloc_size
1506 newlen = PERL_STRLEN_ROUNDUP(newlen);
1508 if (SvLEN(sv) && s) {
1509 s = (char*)saferealloc(s, newlen);
1512 s = (char*)safemalloc(newlen);
1513 if (SvPVX_const(sv) && SvCUR(sv)) {
1514 Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char);
1518 #ifdef Perl_safesysmalloc_size
1519 /* Do this here, do it once, do it right, and then we will never get
1520 called back into sv_grow() unless there really is some growing
1522 SvLEN_set(sv, Perl_safesysmalloc_size(s));
1524 SvLEN_set(sv, newlen);
1531 =for apidoc sv_setiv
1533 Copies an integer into the given SV, upgrading first if necessary.
1534 Does not handle 'set' magic. See also C<sv_setiv_mg>.
1540 Perl_sv_setiv(pTHX_ register SV *const sv, const IV i)
1544 PERL_ARGS_ASSERT_SV_SETIV;
1546 SV_CHECK_THINKFIRST_COW_DROP(sv);
1547 switch (SvTYPE(sv)) {
1550 sv_upgrade(sv, SVt_IV);
1553 sv_upgrade(sv, SVt_PVIV);
1557 if (!isGV_with_GP(sv))
1564 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1568 (void)SvIOK_only(sv); /* validate number */
1574 =for apidoc sv_setiv_mg
1576 Like C<sv_setiv>, but also handles 'set' magic.
1582 Perl_sv_setiv_mg(pTHX_ register SV *const sv, const IV i)
1584 PERL_ARGS_ASSERT_SV_SETIV_MG;
1591 =for apidoc sv_setuv
1593 Copies an unsigned integer into the given SV, upgrading first if necessary.
1594 Does not handle 'set' magic. See also C<sv_setuv_mg>.
1600 Perl_sv_setuv(pTHX_ register SV *const sv, const UV u)
1602 PERL_ARGS_ASSERT_SV_SETUV;
1604 /* With these two if statements:
1605 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
1608 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1610 If you wish to remove them, please benchmark to see what the effect is
1612 if (u <= (UV)IV_MAX) {
1613 sv_setiv(sv, (IV)u);
1622 =for apidoc sv_setuv_mg
1624 Like C<sv_setuv>, but also handles 'set' magic.
1630 Perl_sv_setuv_mg(pTHX_ register SV *const sv, const UV u)
1632 PERL_ARGS_ASSERT_SV_SETUV_MG;
1639 =for apidoc sv_setnv
1641 Copies a double into the given SV, upgrading first if necessary.
1642 Does not handle 'set' magic. See also C<sv_setnv_mg>.
1648 Perl_sv_setnv(pTHX_ register SV *const sv, const NV num)
1652 PERL_ARGS_ASSERT_SV_SETNV;
1654 SV_CHECK_THINKFIRST_COW_DROP(sv);
1655 switch (SvTYPE(sv)) {
1658 sv_upgrade(sv, SVt_NV);
1662 sv_upgrade(sv, SVt_PVNV);
1666 if (!isGV_with_GP(sv))
1673 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
1678 (void)SvNOK_only(sv); /* validate number */
1683 =for apidoc sv_setnv_mg
1685 Like C<sv_setnv>, but also handles 'set' magic.
1691 Perl_sv_setnv_mg(pTHX_ register SV *const sv, const NV num)
1693 PERL_ARGS_ASSERT_SV_SETNV_MG;
1699 /* Print an "isn't numeric" warning, using a cleaned-up,
1700 * printable version of the offending string
1704 S_not_a_number(pTHX_ SV *const sv)
1711 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1714 dsv = newSVpvs_flags("", SVs_TEMP);
1715 pv = sv_uni_display(dsv, sv, 10, 0);
1718 const char * const limit = tmpbuf + sizeof(tmpbuf) - 8;
1719 /* each *s can expand to 4 chars + "...\0",
1720 i.e. need room for 8 chars */
1722 const char *s = SvPVX_const(sv);
1723 const char * const end = s + SvCUR(sv);
1724 for ( ; s < end && d < limit; s++ ) {
1726 if (ch & 128 && !isPRINT_LC(ch)) {
1735 else if (ch == '\r') {
1739 else if (ch == '\f') {
1743 else if (ch == '\\') {
1747 else if (ch == '\0') {
1751 else if (isPRINT_LC(ch))
1768 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1769 "Argument \"%s\" isn't numeric in %s", pv,
1772 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1773 "Argument \"%s\" isn't numeric", pv);
1777 =for apidoc looks_like_number
1779 Test if the content of an SV looks like a number (or is a number).
1780 C<Inf> and C<Infinity> are treated as numbers (so will not issue a
1781 non-numeric warning), even if your atof() doesn't grok them.
1787 Perl_looks_like_number(pTHX_ SV *const sv)
1789 register const char *sbegin;
1792 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1795 sbegin = SvPVX_const(sv);
1798 else if (SvPOKp(sv))
1799 sbegin = SvPV_const(sv, len);
1801 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
1802 return grok_number(sbegin, len, NULL);
1806 S_glob_2number(pTHX_ GV * const gv)
1808 const U32 wasfake = SvFLAGS(gv) & SVf_FAKE;
1809 SV *const buffer = sv_newmortal();
1811 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1813 /* FAKE globs can get coerced, so need to turn this off temporarily if it
1816 gv_efullname3(buffer, gv, "*");
1817 SvFLAGS(gv) |= wasfake;
1819 /* We know that all GVs stringify to something that is not-a-number,
1820 so no need to test that. */
1821 if (ckWARN(WARN_NUMERIC))
1822 not_a_number(buffer);
1823 /* We just want something true to return, so that S_sv_2iuv_common
1824 can tail call us and return true. */
1828 /* Actually, ISO C leaves conversion of UV to IV undefined, but
1829 until proven guilty, assume that things are not that bad... */
1834 As 64 bit platforms often have an NV that doesn't preserve all bits of
1835 an IV (an assumption perl has been based on to date) it becomes necessary
1836 to remove the assumption that the NV always carries enough precision to
1837 recreate the IV whenever needed, and that the NV is the canonical form.
1838 Instead, IV/UV and NV need to be given equal rights. So as to not lose
1839 precision as a side effect of conversion (which would lead to insanity
1840 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
1841 1) to distinguish between IV/UV/NV slots that have cached a valid
1842 conversion where precision was lost and IV/UV/NV slots that have a
1843 valid conversion which has lost no precision
1844 2) to ensure that if a numeric conversion to one form is requested that
1845 would lose precision, the precise conversion (or differently
1846 imprecise conversion) is also performed and cached, to prevent
1847 requests for different numeric formats on the same SV causing
1848 lossy conversion chains. (lossless conversion chains are perfectly
1853 SvIOKp is true if the IV slot contains a valid value
1854 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1855 SvNOKp is true if the NV slot contains a valid value
1856 SvNOK is true only if the NV value is accurate
1859 while converting from PV to NV, check to see if converting that NV to an
1860 IV(or UV) would lose accuracy over a direct conversion from PV to
1861 IV(or UV). If it would, cache both conversions, return NV, but mark
1862 SV as IOK NOKp (ie not NOK).
1864 While converting from PV to IV, check to see if converting that IV to an
1865 NV would lose accuracy over a direct conversion from PV to NV. If it
1866 would, cache both conversions, flag similarly.
1868 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
1869 correctly because if IV & NV were set NV *always* overruled.
1870 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
1871 changes - now IV and NV together means that the two are interchangeable:
1872 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
1874 The benefit of this is that operations such as pp_add know that if
1875 SvIOK is true for both left and right operands, then integer addition
1876 can be used instead of floating point (for cases where the result won't
1877 overflow). Before, floating point was always used, which could lead to
1878 loss of precision compared with integer addition.
1880 * making IV and NV equal status should make maths accurate on 64 bit
1882 * may speed up maths somewhat if pp_add and friends start to use
1883 integers when possible instead of fp. (Hopefully the overhead in
1884 looking for SvIOK and checking for overflow will not outweigh the
1885 fp to integer speedup)
1886 * will slow down integer operations (callers of SvIV) on "inaccurate"
1887 values, as the change from SvIOK to SvIOKp will cause a call into
1888 sv_2iv each time rather than a macro access direct to the IV slot
1889 * should speed up number->string conversion on integers as IV is
1890 favoured when IV and NV are equally accurate
1892 ####################################################################
1893 You had better be using SvIOK_notUV if you want an IV for arithmetic:
1894 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
1895 On the other hand, SvUOK is true iff UV.
1896 ####################################################################
1898 Your mileage will vary depending your CPU's relative fp to integer
1902 #ifndef NV_PRESERVES_UV
1903 # define IS_NUMBER_UNDERFLOW_IV 1
1904 # define IS_NUMBER_UNDERFLOW_UV 2
1905 # define IS_NUMBER_IV_AND_UV 2
1906 # define IS_NUMBER_OVERFLOW_IV 4
1907 # define IS_NUMBER_OVERFLOW_UV 5
1909 /* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
1911 /* For sv_2nv these three cases are "SvNOK and don't bother casting" */
1913 S_sv_2iuv_non_preserve(pTHX_ register SV *const sv
1921 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
1923 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
1924 if (SvNVX(sv) < (NV)IV_MIN) {
1925 (void)SvIOKp_on(sv);
1927 SvIV_set(sv, IV_MIN);
1928 return IS_NUMBER_UNDERFLOW_IV;
1930 if (SvNVX(sv) > (NV)UV_MAX) {
1931 (void)SvIOKp_on(sv);
1934 SvUV_set(sv, UV_MAX);
1935 return IS_NUMBER_OVERFLOW_UV;
1937 (void)SvIOKp_on(sv);
1939 /* Can't use strtol etc to convert this string. (See truth table in
1941 if (SvNVX(sv) <= (UV)IV_MAX) {
1942 SvIV_set(sv, I_V(SvNVX(sv)));
1943 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
1944 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
1946 /* Integer is imprecise. NOK, IOKp */
1948 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
1951 SvUV_set(sv, U_V(SvNVX(sv)));
1952 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
1953 if (SvUVX(sv) == UV_MAX) {
1954 /* As we know that NVs don't preserve UVs, UV_MAX cannot
1955 possibly be preserved by NV. Hence, it must be overflow.
1957 return IS_NUMBER_OVERFLOW_UV;
1959 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
1961 /* Integer is imprecise. NOK, IOKp */
1963 return IS_NUMBER_OVERFLOW_IV;
1965 #endif /* !NV_PRESERVES_UV*/
1968 S_sv_2iuv_common(pTHX_ SV *const sv)
1972 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
1975 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
1976 * without also getting a cached IV/UV from it at the same time
1977 * (ie PV->NV conversion should detect loss of accuracy and cache
1978 * IV or UV at same time to avoid this. */
1979 /* IV-over-UV optimisation - choose to cache IV if possible */
1981 if (SvTYPE(sv) == SVt_NV)
1982 sv_upgrade(sv, SVt_PVNV);
1984 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
1985 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
1986 certainly cast into the IV range at IV_MAX, whereas the correct
1987 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
1989 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1990 if (Perl_isnan(SvNVX(sv))) {
1996 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
1997 SvIV_set(sv, I_V(SvNVX(sv)));
1998 if (SvNVX(sv) == (NV) SvIVX(sv)
1999 #ifndef NV_PRESERVES_UV
2000 && (((UV)1 << NV_PRESERVES_UV_BITS) >
2001 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
2002 /* Don't flag it as "accurately an integer" if the number
2003 came from a (by definition imprecise) NV operation, and
2004 we're outside the range of NV integer precision */
2008 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2010 /* scalar has trailing garbage, eg "42a" */
2012 DEBUG_c(PerlIO_printf(Perl_debug_log,
2013 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n",
2019 /* IV not precise. No need to convert from PV, as NV
2020 conversion would already have cached IV if it detected
2021 that PV->IV would be better than PV->NV->IV
2022 flags already correct - don't set public IOK. */
2023 DEBUG_c(PerlIO_printf(Perl_debug_log,
2024 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n",
2029 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2030 but the cast (NV)IV_MIN rounds to a the value less (more
2031 negative) than IV_MIN which happens to be equal to SvNVX ??
2032 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2033 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2034 (NV)UVX == NVX are both true, but the values differ. :-(
2035 Hopefully for 2s complement IV_MIN is something like
2036 0x8000000000000000 which will be exact. NWC */
2039 SvUV_set(sv, U_V(SvNVX(sv)));
2041 (SvNVX(sv) == (NV) SvUVX(sv))
2042 #ifndef NV_PRESERVES_UV
2043 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2044 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2045 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2046 /* Don't flag it as "accurately an integer" if the number
2047 came from a (by definition imprecise) NV operation, and
2048 we're outside the range of NV integer precision */
2054 DEBUG_c(PerlIO_printf(Perl_debug_log,
2055 "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n",
2061 else if (SvPOKp(sv) && SvLEN(sv)) {
2063 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2064 /* We want to avoid a possible problem when we cache an IV/ a UV which
2065 may be later translated to an NV, and the resulting NV is not
2066 the same as the direct translation of the initial string
2067 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2068 be careful to ensure that the value with the .456 is around if the
2069 NV value is requested in the future).
2071 This means that if we cache such an IV/a UV, we need to cache the
2072 NV as well. Moreover, we trade speed for space, and do not
2073 cache the NV if we are sure it's not needed.
2076 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2077 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2078 == IS_NUMBER_IN_UV) {
2079 /* It's definitely an integer, only upgrade to PVIV */
2080 if (SvTYPE(sv) < SVt_PVIV)
2081 sv_upgrade(sv, SVt_PVIV);
2083 } else if (SvTYPE(sv) < SVt_PVNV)
2084 sv_upgrade(sv, SVt_PVNV);
2086 /* If NVs preserve UVs then we only use the UV value if we know that
2087 we aren't going to call atof() below. If NVs don't preserve UVs
2088 then the value returned may have more precision than atof() will
2089 return, even though value isn't perfectly accurate. */
2090 if ((numtype & (IS_NUMBER_IN_UV
2091 #ifdef NV_PRESERVES_UV
2094 )) == IS_NUMBER_IN_UV) {
2095 /* This won't turn off the public IOK flag if it was set above */
2096 (void)SvIOKp_on(sv);
2098 if (!(numtype & IS_NUMBER_NEG)) {
2100 if (value <= (UV)IV_MAX) {
2101 SvIV_set(sv, (IV)value);
2103 /* it didn't overflow, and it was positive. */
2104 SvUV_set(sv, value);
2108 /* 2s complement assumption */
2109 if (value <= (UV)IV_MIN) {
2110 SvIV_set(sv, -(IV)value);
2112 /* Too negative for an IV. This is a double upgrade, but
2113 I'm assuming it will be rare. */
2114 if (SvTYPE(sv) < SVt_PVNV)
2115 sv_upgrade(sv, SVt_PVNV);
2119 SvNV_set(sv, -(NV)value);
2120 SvIV_set(sv, IV_MIN);
2124 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2125 will be in the previous block to set the IV slot, and the next
2126 block to set the NV slot. So no else here. */
2128 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2129 != IS_NUMBER_IN_UV) {
2130 /* It wasn't an (integer that doesn't overflow the UV). */
2131 SvNV_set(sv, Atof(SvPVX_const(sv)));
2133 if (! numtype && ckWARN(WARN_NUMERIC))
2136 #if defined(USE_LONG_DOUBLE)
2137 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n",
2138 PTR2UV(sv), SvNVX(sv)));
2140 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n",
2141 PTR2UV(sv), SvNVX(sv)));
2144 #ifdef NV_PRESERVES_UV
2145 (void)SvIOKp_on(sv);
2147 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2148 SvIV_set(sv, I_V(SvNVX(sv)));
2149 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2152 NOOP; /* Integer is imprecise. NOK, IOKp */
2154 /* UV will not work better than IV */
2156 if (SvNVX(sv) > (NV)UV_MAX) {
2158 /* Integer is inaccurate. NOK, IOKp, is UV */
2159 SvUV_set(sv, UV_MAX);
2161 SvUV_set(sv, U_V(SvNVX(sv)));
2162 /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs
2163 NV preservse UV so can do correct comparison. */
2164 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2167 NOOP; /* Integer is imprecise. NOK, IOKp, is UV */
2172 #else /* NV_PRESERVES_UV */
2173 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2174 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
2175 /* The IV/UV slot will have been set from value returned by
2176 grok_number above. The NV slot has just been set using
2179 assert (SvIOKp(sv));
2181 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2182 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2183 /* Small enough to preserve all bits. */
2184 (void)SvIOKp_on(sv);
2186 SvIV_set(sv, I_V(SvNVX(sv)));
2187 if ((NV)(SvIVX(sv)) == SvNVX(sv))
2189 /* Assumption: first non-preserved integer is < IV_MAX,
2190 this NV is in the preserved range, therefore: */
2191 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))
2193 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX);
2197 0 0 already failed to read UV.
2198 0 1 already failed to read UV.
2199 1 0 you won't get here in this case. IV/UV
2200 slot set, public IOK, Atof() unneeded.
2201 1 1 already read UV.
2202 so there's no point in sv_2iuv_non_preserve() attempting
2203 to use atol, strtol, strtoul etc. */
2205 sv_2iuv_non_preserve (sv, numtype);
2207 sv_2iuv_non_preserve (sv);
2211 #endif /* NV_PRESERVES_UV */
2212 /* It might be more code efficient to go through the entire logic above
2213 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2214 gets complex and potentially buggy, so more programmer efficient
2215 to do it this way, by turning off the public flags: */
2217 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
2221 if (isGV_with_GP(sv))
2222 return glob_2number(MUTABLE_GV(sv));
2224 if (!(SvFLAGS(sv) & SVs_PADTMP)) {
2225 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2228 if (SvTYPE(sv) < SVt_IV)
2229 /* Typically the caller expects that sv_any is not NULL now. */
2230 sv_upgrade(sv, SVt_IV);
2231 /* Return 0 from the caller. */
2238 =for apidoc sv_2iv_flags
2240 Return the integer value of an SV, doing any necessary string
2241 conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2242 Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2248 Perl_sv_2iv_flags(pTHX_ register SV *const sv, const I32 flags)
2253 if (SvGMAGICAL(sv) || (SvTYPE(sv) == SVt_PVGV && SvVALID(sv))) {
2254 /* FBMs use the same flag bit as SVf_IVisUV, so must let them
2255 cache IVs just in case. In practice it seems that they never
2256 actually anywhere accessible by user Perl code, let alone get used
2257 in anything other than a string context. */
2258 if (flags & SV_GMAGIC)
2263 return I_V(SvNVX(sv));
2265 if (SvPOKp(sv) && SvLEN(sv)) {
2268 = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2270 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2271 == IS_NUMBER_IN_UV) {
2272 /* It's definitely an integer */
2273 if (numtype & IS_NUMBER_NEG) {
2274 if (value < (UV)IV_MIN)
2277 if (value < (UV)IV_MAX)
2282 if (ckWARN(WARN_NUMERIC))
2285 return I_V(Atof(SvPVX_const(sv)));
2290 assert(SvTYPE(sv) >= SVt_PVMG);
2291 /* This falls through to the report_uninit inside S_sv_2iuv_common. */
2292 } else if (SvTHINKFIRST(sv)) {
2297 if (flags & SV_SKIP_OVERLOAD)
2299 tmpstr=AMG_CALLun(sv,numer);
2300 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2301 return SvIV(tmpstr);
2304 return PTR2IV(SvRV(sv));
2307 sv_force_normal_flags(sv, 0);
2309 if (SvREADONLY(sv) && !SvOK(sv)) {
2310 if (ckWARN(WARN_UNINITIALIZED))
2316 if (S_sv_2iuv_common(aTHX_ sv))
2319 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n",
2320 PTR2UV(sv),SvIVX(sv)));
2321 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
2325 =for apidoc sv_2uv_flags
2327 Return the unsigned integer value of an SV, doing any necessary string
2328 conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2329 Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
2335 Perl_sv_2uv_flags(pTHX_ register SV *const sv, const I32 flags)
2340 if (SvGMAGICAL(sv) || (SvTYPE(sv) == SVt_PVGV && SvVALID(sv))) {
2341 /* FBMs use the same flag bit as SVf_IVisUV, so must let them
2342 cache IVs just in case. */
2343 if (flags & SV_GMAGIC)
2348 return U_V(SvNVX(sv));
2349 if (SvPOKp(sv) && SvLEN(sv)) {
2352 = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2354 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2355 == IS_NUMBER_IN_UV) {
2356 /* It's definitely an integer */
2357 if (!(numtype & IS_NUMBER_NEG))
2361 if (ckWARN(WARN_NUMERIC))
2364 return U_V(Atof(SvPVX_const(sv)));
2369 assert(SvTYPE(sv) >= SVt_PVMG);
2370 /* This falls through to the report_uninit inside S_sv_2iuv_common. */
2371 } else if (SvTHINKFIRST(sv)) {
2376 if (flags & SV_SKIP_OVERLOAD)
2378 tmpstr = AMG_CALLun(sv,numer);
2379 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2380 return SvUV(tmpstr);
2383 return PTR2UV(SvRV(sv));
2386 sv_force_normal_flags(sv, 0);
2388 if (SvREADONLY(sv) && !SvOK(sv)) {
2389 if (ckWARN(WARN_UNINITIALIZED))
2395 if (S_sv_2iuv_common(aTHX_ sv))
2399 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n",
2400 PTR2UV(sv),SvUVX(sv)));
2401 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
2405 =for apidoc sv_2nv_flags
2407 Return the num value of an SV, doing any necessary string or integer
2408 conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2409 Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
2415 Perl_sv_2nv_flags(pTHX_ register SV *const sv, const I32 flags)
2420 if (SvGMAGICAL(sv) || (SvTYPE(sv) == SVt_PVGV && SvVALID(sv))) {
2421 /* FBMs use the same flag bit as SVf_IVisUV, so must let them
2422 cache IVs just in case. */
2423 if (flags & SV_GMAGIC)
2427 if ((SvPOKp(sv) && SvLEN(sv)) && !SvIOKp(sv)) {
2428 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
2429 !grok_number(SvPVX_const(sv), SvCUR(sv), NULL))
2431 return Atof(SvPVX_const(sv));
2435 return (NV)SvUVX(sv);
2437 return (NV)SvIVX(sv);
2442 assert(SvTYPE(sv) >= SVt_PVMG);
2443 /* This falls through to the report_uninit near the end of the
2445 } else if (SvTHINKFIRST(sv)) {
2450 if (flags & SV_SKIP_OVERLOAD)
2452 tmpstr = AMG_CALLun(sv,numer);
2453 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2454 return SvNV(tmpstr);
2457 return PTR2NV(SvRV(sv));
2460 sv_force_normal_flags(sv, 0);
2462 if (SvREADONLY(sv) && !SvOK(sv)) {
2463 if (ckWARN(WARN_UNINITIALIZED))
2468 if (SvTYPE(sv) < SVt_NV) {
2469 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2470 sv_upgrade(sv, SVt_NV);
2471 #ifdef USE_LONG_DOUBLE
2473 STORE_NUMERIC_LOCAL_SET_STANDARD();
2474 PerlIO_printf(Perl_debug_log,
2475 "0x%"UVxf" num(%" PERL_PRIgldbl ")\n",
2476 PTR2UV(sv), SvNVX(sv));
2477 RESTORE_NUMERIC_LOCAL();
2481 STORE_NUMERIC_LOCAL_SET_STANDARD();
2482 PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n",
2483 PTR2UV(sv), SvNVX(sv));
2484 RESTORE_NUMERIC_LOCAL();
2488 else if (SvTYPE(sv) < SVt_PVNV)
2489 sv_upgrade(sv, SVt_PVNV);
2494 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
2495 #ifdef NV_PRESERVES_UV
2501 /* Only set the public NV OK flag if this NV preserves the IV */
2502 /* Check it's not 0xFFFFFFFFFFFFFFFF */
2504 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
2505 : (SvIVX(sv) == I_V(SvNVX(sv))))
2511 else if (SvPOKp(sv) && SvLEN(sv)) {
2513 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2514 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
2516 #ifdef NV_PRESERVES_UV
2517 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2518 == IS_NUMBER_IN_UV) {
2519 /* It's definitely an integer */
2520 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
2522 SvNV_set(sv, Atof(SvPVX_const(sv)));
2528 SvNV_set(sv, Atof(SvPVX_const(sv)));
2529 /* Only set the public NV OK flag if this NV preserves the value in
2530 the PV at least as well as an IV/UV would.
2531 Not sure how to do this 100% reliably. */
2532 /* if that shift count is out of range then Configure's test is
2533 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2535 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2536 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2537 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
2538 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2539 /* Can't use strtol etc to convert this string, so don't try.
2540 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
2543 /* value has been set. It may not be precise. */
2544 if ((numtype & IS_NUMBER_NEG) && (value > (UV)IV_MIN)) {
2545 /* 2s complement assumption for (UV)IV_MIN */
2546 SvNOK_on(sv); /* Integer is too negative. */
2551 if (numtype & IS_NUMBER_NEG) {
2552 SvIV_set(sv, -(IV)value);
2553 } else if (value <= (UV)IV_MAX) {
2554 SvIV_set(sv, (IV)value);
2556 SvUV_set(sv, value);
2560 if (numtype & IS_NUMBER_NOT_INT) {
2561 /* I believe that even if the original PV had decimals,
2562 they are lost beyond the limit of the FP precision.
2563 However, neither is canonical, so both only get p
2564 flags. NWC, 2000/11/25 */
2565 /* Both already have p flags, so do nothing */
2567 const NV nv = SvNVX(sv);
2568 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2569 if (SvIVX(sv) == I_V(nv)) {
2572 /* It had no "." so it must be integer. */
2576 /* between IV_MAX and NV(UV_MAX).
2577 Could be slightly > UV_MAX */
2579 if (numtype & IS_NUMBER_NOT_INT) {
2580 /* UV and NV both imprecise. */
2582 const UV nv_as_uv = U_V(nv);
2584 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2593 /* It might be more code efficient to go through the entire logic above
2594 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2595 gets complex and potentially buggy, so more programmer efficient
2596 to do it this way, by turning off the public flags: */
2598 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
2599 #endif /* NV_PRESERVES_UV */
2602 if (isGV_with_GP(sv)) {
2603 glob_2number(MUTABLE_GV(sv));
2607 if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED))
2609 assert (SvTYPE(sv) >= SVt_NV);
2610 /* Typically the caller expects that sv_any is not NULL now. */
2611 /* XXX Ilya implies that this is a bug in callers that assume this
2612 and ideally should be fixed. */
2615 #if defined(USE_LONG_DOUBLE)
2617 STORE_NUMERIC_LOCAL_SET_STANDARD();
2618 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n",
2619 PTR2UV(sv), SvNVX(sv));
2620 RESTORE_NUMERIC_LOCAL();
2624 STORE_NUMERIC_LOCAL_SET_STANDARD();
2625 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n",
2626 PTR2UV(sv), SvNVX(sv));
2627 RESTORE_NUMERIC_LOCAL();
2636 Return an SV with the numeric value of the source SV, doing any necessary
2637 reference or overload conversion. You must use the C<SvNUM(sv)> macro to
2638 access this function.
2644 Perl_sv_2num(pTHX_ register SV *const sv)
2646 PERL_ARGS_ASSERT_SV_2NUM;
2651 SV * const tmpsv = AMG_CALLun(sv,numer);
2652 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
2653 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2654 return sv_2num(tmpsv);
2656 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2659 /* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2660 * UV as a string towards the end of buf, and return pointers to start and
2663 * We assume that buf is at least TYPE_CHARS(UV) long.
2667 S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
2669 char *ptr = buf + TYPE_CHARS(UV);
2670 char * const ebuf = ptr;
2673 PERL_ARGS_ASSERT_UIV_2BUF;
2685 *--ptr = '0' + (char)(uv % 10);
2694 =for apidoc sv_2pv_flags
2696 Returns a pointer to the string value of an SV, and sets *lp to its length.
2697 If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a string
2699 Normally invoked via the C<SvPV_flags> macro. C<sv_2pv()> and C<sv_2pv_nomg>
2700 usually end up here too.
2706 Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags)
2716 if (SvGMAGICAL(sv)) {
2717 if (flags & SV_GMAGIC)
2722 if (flags & SV_MUTABLE_RETURN)
2723 return SvPVX_mutable(sv);
2724 if (flags & SV_CONST_RETURN)
2725 return (char *)SvPVX_const(sv);
2728 if (SvIOKp(sv) || SvNOKp(sv)) {
2729 char tbuf[64]; /* Must fit sprintf/Gconvert of longest IV/NV */
2734 ? my_snprintf(tbuf, sizeof(tbuf), "%"UVuf, (UV)SvUVX(sv))
2735 : my_snprintf(tbuf, sizeof(tbuf), "%"IVdf, (IV)SvIVX(sv));
2737 Gconvert(SvNVX(sv), NV_DIG, 0, tbuf);
2744 #ifdef FIXNEGATIVEZERO
2745 if (len == 2 && tbuf[0] == '-' && tbuf[1] == '0') {
2751 SvUPGRADE(sv, SVt_PV);
2754 s = SvGROW_mutable(sv, len + 1);
2757 return (char*)memcpy(s, tbuf, len + 1);
2763 assert(SvTYPE(sv) >= SVt_PVMG);
2764 /* This falls through to the report_uninit near the end of the
2766 } else if (SvTHINKFIRST(sv)) {
2771 if (flags & SV_SKIP_OVERLOAD)
2773 tmpstr = AMG_CALLun(sv,string);
2774 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
2775 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2777 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2781 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
2782 if (flags & SV_CONST_RETURN) {
2783 pv = (char *) SvPVX_const(tmpstr);
2785 pv = (flags & SV_MUTABLE_RETURN)
2786 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
2789 *lp = SvCUR(tmpstr);
2791 pv = sv_2pv_flags(tmpstr, lp, flags);
2804 SV *const referent = SvRV(sv);
2808 retval = buffer = savepvn("NULLREF", len);
2809 } else if (SvTYPE(referent) == SVt_REGEXP) {
2810 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
2815 /* If the regex is UTF-8 we want the containing scalar to
2816 have an UTF-8 flag too */
2822 if ((seen_evals = RX_SEEN_EVALS(re)))
2823 PL_reginterp_cnt += seen_evals;
2826 *lp = RX_WRAPLEN(re);
2828 return RX_WRAPPED(re);
2830 const char *const typestr = sv_reftype(referent, 0);
2831 const STRLEN typelen = strlen(typestr);
2832 UV addr = PTR2UV(referent);
2833 const char *stashname = NULL;
2834 STRLEN stashnamelen = 0; /* hush, gcc */
2835 const char *buffer_end;
2837 if (SvOBJECT(referent)) {
2838 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
2841 stashname = HEK_KEY(name);
2842 stashnamelen = HEK_LEN(name);
2844 if (HEK_UTF8(name)) {
2850 stashname = "__ANON__";
2853 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
2854 + 2 * sizeof(UV) + 2 /* )\0 */;
2856 len = typelen + 3 /* (0x */
2857 + 2 * sizeof(UV) + 2 /* )\0 */;
2860 Newx(buffer, len, char);
2861 buffer_end = retval = buffer + len;
2863 /* Working backwards */
2867 *--retval = PL_hexdigit[addr & 15];
2868 } while (addr >>= 4);
2874 memcpy(retval, typestr, typelen);
2878 retval -= stashnamelen;
2879 memcpy(retval, stashname, stashnamelen);
2881 /* retval may not neccesarily have reached the start of the
2883 assert (retval >= buffer);
2885 len = buffer_end - retval - 1; /* -1 for that \0 */
2893 if (SvREADONLY(sv) && !SvOK(sv)) {
2896 if (flags & SV_UNDEF_RETURNS_NULL)
2898 if (ckWARN(WARN_UNINITIALIZED))
2903 if (SvIOK(sv) || ((SvIOKp(sv) && !SvNOKp(sv)))) {
2904 /* I'm assuming that if both IV and NV are equally valid then
2905 converting the IV is going to be more efficient */
2906 const U32 isUIOK = SvIsUV(sv);
2907 char buf[TYPE_CHARS(UV)];
2911 if (SvTYPE(sv) < SVt_PVIV)
2912 sv_upgrade(sv, SVt_PVIV);
2913 ptr = uiv_2buf(buf, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
2915 /* inlined from sv_setpvn */
2916 s = SvGROW_mutable(sv, len + 1);
2917 Move(ptr, s, len, char);
2921 else if (SvNOKp(sv)) {
2923 if (SvTYPE(sv) < SVt_PVNV)
2924 sv_upgrade(sv, SVt_PVNV);
2925 /* The +20 is pure guesswork. Configure test needed. --jhi */
2926 s = SvGROW_mutable(sv, NV_DIG + 20);
2927 /* some Xenix systems wipe out errno here */
2929 if (SvNVX(sv) == 0.0)
2930 my_strlcpy(s, "0", SvLEN(sv));
2934 Gconvert(SvNVX(sv), NV_DIG, 0, s);
2937 #ifdef FIXNEGATIVEZERO
2938 if (*s == '-' && s[1] == '0' && !s[2]) {
2950 if (isGV_with_GP(sv)) {
2951 GV *const gv = MUTABLE_GV(sv);
2952 const U32 wasfake = SvFLAGS(gv) & SVf_FAKE;
2953 SV *const buffer = sv_newmortal();
2955 /* FAKE globs can get coerced, so need to turn this off temporarily
2958 gv_efullname3(buffer, gv, "*");
2959 SvFLAGS(gv) |= wasfake;
2961 if (SvPOK(buffer)) {
2963 *lp = SvCUR(buffer);
2965 return SvPVX(buffer);
2976 if (flags & SV_UNDEF_RETURNS_NULL)
2978 if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED))
2980 if (SvTYPE(sv) < SVt_PV)
2981 /* Typically the caller expects that sv_any is not NULL now. */
2982 sv_upgrade(sv, SVt_PV);
2986 const STRLEN len = s - SvPVX_const(sv);
2992 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n",
2993 PTR2UV(sv),SvPVX_const(sv)));
2994 if (flags & SV_CONST_RETURN)
2995 return (char *)SvPVX_const(sv);
2996 if (flags & SV_MUTABLE_RETURN)
2997 return SvPVX_mutable(sv);
3002 =for apidoc sv_copypv
3004 Copies a stringified representation of the source SV into the
3005 destination SV. Automatically performs any necessary mg_get and
3006 coercion of numeric values into strings. Guaranteed to preserve
3007 UTF8 flag even from overloaded objects. Similar in nature to
3008 sv_2pv[_flags] but operates directly on an SV instead of just the
3009 string. Mostly uses sv_2pv_flags to do its work, except when that
3010 would lose the UTF-8'ness of the PV.
3016 Perl_sv_copypv(pTHX_ SV *const dsv, register SV *const ssv)
3019 const char * const s = SvPV_const(ssv,len);
3021 PERL_ARGS_ASSERT_SV_COPYPV;
3023 sv_setpvn(dsv,s,len);
3031 =for apidoc sv_2pvbyte
3033 Return a pointer to the byte-encoded representation of the SV, and set *lp
3034 to its length. May cause the SV to be downgraded from UTF-8 as a
3037 Usually accessed via the C<SvPVbyte> macro.
3043 Perl_sv_2pvbyte(pTHX_ register SV *const sv, STRLEN *const lp)
3045 PERL_ARGS_ASSERT_SV_2PVBYTE;
3047 sv_utf8_downgrade(sv,0);
3048 return lp ? SvPV(sv,*lp) : SvPV_nolen(sv);
3052 =for apidoc sv_2pvutf8
3054 Return a pointer to the UTF-8-encoded representation of the SV, and set *lp
3055 to its length. May cause the SV to be upgraded to UTF-8 as a side-effect.
3057 Usually accessed via the C<SvPVutf8> macro.
3063 Perl_sv_2pvutf8(pTHX_ register SV *const sv, STRLEN *const lp)
3065 PERL_ARGS_ASSERT_SV_2PVUTF8;
3067 sv_utf8_upgrade(sv);
3068 return lp ? SvPV(sv,*lp) : SvPV_nolen(sv);
3073 =for apidoc sv_2bool
3075 This function is only called on magical items, and is only used by
3076 sv_true() or its macro equivalent.
3082 Perl_sv_2bool(pTHX_ register SV *const sv)
3086 PERL_ARGS_ASSERT_SV_2BOOL;
3094 SV * const tmpsv = AMG_CALLun(sv,bool_);
3095 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
3096 return cBOOL(SvTRUE(tmpsv));
3098 return SvRV(sv) != 0;
3101 register XPV* const Xpvtmp = (XPV*)SvANY(sv);
3103 (*sv->sv_u.svu_pv > '0' ||
3104 Xpvtmp->xpv_cur > 1 ||
3105 (Xpvtmp->xpv_cur && *sv->sv_u.svu_pv != '0')))
3112 return SvIVX(sv) != 0;
3115 return SvNVX(sv) != 0.0;
3117 if (isGV_with_GP(sv))
3127 =for apidoc sv_utf8_upgrade
3129 Converts the PV of an SV to its UTF-8-encoded form.
3130 Forces the SV to string form if it is not already.
3131 Will C<mg_get> on C<sv> if appropriate.
3132 Always sets the SvUTF8 flag to avoid future validity checks even
3133 if the whole string is the same in UTF-8 as not.
3134 Returns the number of bytes in the converted string
3136 This is not as a general purpose byte encoding to Unicode interface:
3137 use the Encode extension for that.
3139 =for apidoc sv_utf8_upgrade_nomg
3141 Like sv_utf8_upgrade, but doesn't do magic on C<sv>
3143 =for apidoc sv_utf8_upgrade_flags
3145 Converts the PV of an SV to its UTF-8-encoded form.
3146 Forces the SV to string form if it is not already.
3147 Always sets the SvUTF8 flag to avoid future validity checks even
3148 if all the bytes are invariant in UTF-8. If C<flags> has C<SV_GMAGIC> bit set,
3149 will C<mg_get> on C<sv> if appropriate, else not.
3150 Returns the number of bytes in the converted string
3151 C<sv_utf8_upgrade> and
3152 C<sv_utf8_upgrade_nomg> are implemented in terms of this function.
3154 This is not as a general purpose byte encoding to Unicode interface:
3155 use the Encode extension for that.
3159 The grow version is currently not externally documented. It adds a parameter,
3160 extra, which is the number of unused bytes the string of 'sv' is guaranteed to
3161 have free after it upon return. This allows the caller to reserve extra space
3162 that it intends to fill, to avoid extra grows.
3164 Also externally undocumented for the moment is the flag SV_FORCE_UTF8_UPGRADE,
3165 which can be used to tell this function to not first check to see if there are
3166 any characters that are different in UTF-8 (variant characters) which would
3167 force it to allocate a new string to sv, but to assume there are. Typically
3168 this flag is used by a routine that has already parsed the string to find that
3169 there are such characters, and passes this information on so that the work
3170 doesn't have to be repeated.
3172 (One might think that the calling routine could pass in the position of the
3173 first such variant, so it wouldn't have to be found again. But that is not the
3174 case, because typically when the caller is likely to use this flag, it won't be
3175 calling this routine unless it finds something that won't fit into a byte.
3176 Otherwise it tries to not upgrade and just use bytes. But some things that
3177 do fit into a byte are variants in utf8, and the caller may not have been
3178 keeping track of these.)
3180 If the routine itself changes the string, it adds a trailing NUL. Such a NUL
3181 isn't guaranteed due to having other routines do the work in some input cases,
3182 or if the input is already flagged as being in utf8.
3184 The speed of this could perhaps be improved for many cases if someone wanted to
3185 write a fast function that counts the number of variant characters in a string,
3186 especially if it could return the position of the first one.
3191 Perl_sv_utf8_upgrade_flags_grow(pTHX_ register SV *const sv, const I32 flags, STRLEN extra)
3195 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
3197 if (sv == &PL_sv_undef)
3201 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3202 (void) sv_2pv_flags(sv,&len, flags);
3204 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3208 (void) SvPV_force(sv,len);
3213 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3218 sv_force_normal_flags(sv, 0);
3221 if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) {
3222 sv_recode_to_utf8(sv, PL_encoding);
3223 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3227 if (SvCUR(sv) == 0) {
3228 if (extra) SvGROW(sv, extra);
3229 } else { /* Assume Latin-1/EBCDIC */
3230 /* This function could be much more efficient if we
3231 * had a FLAG in SVs to signal if there are any variant
3232 * chars in the PV. Given that there isn't such a flag
3233 * make the loop as fast as possible (although there are certainly ways
3234 * to speed this up, eg. through vectorization) */
3235 U8 * s = (U8 *) SvPVX_const(sv);
3236 U8 * e = (U8 *) SvEND(sv);
3238 STRLEN two_byte_count = 0;
3240 if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
3242 /* See if really will need to convert to utf8. We mustn't rely on our
3243 * incoming SV being well formed and having a trailing '\0', as certain
3244 * code in pp_formline can send us partially built SVs. */
3248 if (NATIVE_IS_INVARIANT(ch)) continue;
3250 t--; /* t already incremented; re-point to first variant */
3255 /* utf8 conversion not needed because all are invariants. Mark as
3256 * UTF-8 even if no variant - saves scanning loop */
3262 /* Here, the string should be converted to utf8, either because of an
3263 * input flag (two_byte_count = 0), or because a character that
3264 * requires 2 bytes was found (two_byte_count = 1). t points either to
3265 * the beginning of the string (if we didn't examine anything), or to
3266 * the first variant. In either case, everything from s to t - 1 will
3267 * occupy only 1 byte each on output.
3269 * There are two main ways to convert. One is to create a new string
3270 * and go through the input starting from the beginning, appending each
3271 * converted value onto the new string as we go along. It's probably
3272 * best to allocate enough space in the string for the worst possible
3273 * case rather than possibly running out of space and having to
3274 * reallocate and then copy what we've done so far. Since everything
3275 * from s to t - 1 is invariant, the destination can be initialized
3276 * with these using a fast memory copy
3278 * The other way is to figure out exactly how big the string should be
3279 * by parsing the entire input. Then you don't have to make it big
3280 * enough to handle the worst possible case, and more importantly, if
3281 * the string you already have is large enough, you don't have to
3282 * allocate a new string, you can copy the last character in the input
3283 * string to the final position(s) that will be occupied by the
3284 * converted string and go backwards, stopping at t, since everything
3285 * before that is invariant.
3287 * There are advantages and disadvantages to each method.
3289 * In the first method, we can allocate a new string, do the memory
3290 * copy from the s to t - 1, and then proceed through the rest of the
3291 * string byte-by-byte.
3293 * In the second method, we proceed through the rest of the input
3294 * string just calculating how big the converted string will be. Then
3295 * there are two cases:
3296 * 1) if the string has enough extra space to handle the converted
3297 * value. We go backwards through the string, converting until we
3298 * get to the position we are at now, and then stop. If this
3299 * position is far enough along in the string, this method is
3300 * faster than the other method. If the memory copy were the same
3301 * speed as the byte-by-byte loop, that position would be about
3302 * half-way, as at the half-way mark, parsing to the end and back
3303 * is one complete string's parse, the same amount as starting
3304 * over and going all the way through. Actually, it would be
3305 * somewhat less than half-way, as it's faster to just count bytes
3306 * than to also copy, and we don't have the overhead of allocating
3307 * a new string, changing the scalar to use it, and freeing the
3308 * existing one. But if the memory copy is fast, the break-even
3309 * point is somewhere after half way. The counting loop could be
3310 * sped up by vectorization, etc, to move the break-even point
3311 * further towards the beginning.
3312 * 2) if the string doesn't have enough space to handle the converted
3313 * value. A new string will have to be allocated, and one might
3314 * as well, given that, start from the beginning doing the first
3315 * method. We've spent extra time parsing the string and in
3316 * exchange all we've gotten is that we know precisely how big to
3317 * make the new one. Perl is more optimized for time than space,
3318 * so this case is a loser.
3319 * So what I've decided to do is not use the 2nd method unless it is
3320 * guaranteed that a new string won't have to be allocated, assuming
3321 * the worst case. I also decided not to put any more conditions on it
3322 * than this, for now. It seems likely that, since the worst case is
3323 * twice as big as the unknown portion of the string (plus 1), we won't
3324 * be guaranteed enough space, causing us to go to the first method,
3325 * unless the string is short, or the first variant character is near
3326 * the end of it. In either of these cases, it seems best to use the
3327 * 2nd method. The only circumstance I can think of where this would
3328 * be really slower is if the string had once had much more data in it
3329 * than it does now, but there is still a substantial amount in it */
3332 STRLEN invariant_head = t - s;
3333 STRLEN size = invariant_head + (e - t) * 2 + 1 + extra;
3334 if (SvLEN(sv) < size) {
3336 /* Here, have decided to allocate a new string */
3341 Newx(dst, size, U8);
3343 /* If no known invariants at the beginning of the input string,
3344 * set so starts from there. Otherwise, can use memory copy to
3345 * get up to where we are now, and then start from here */
3347 if (invariant_head <= 0) {
3350 Copy(s, dst, invariant_head, char);
3351 d = dst + invariant_head;
3355 const UV uv = NATIVE8_TO_UNI(*t++);
3356 if (UNI_IS_INVARIANT(uv))
3357 *d++ = (U8)UNI_TO_NATIVE(uv);
3359 *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
3360 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
3364 SvPV_free(sv); /* No longer using pre-existing string */
3365 SvPV_set(sv, (char*)dst);
3366 SvCUR_set(sv, d - dst);
3367 SvLEN_set(sv, size);
3370 /* Here, have decided to get the exact size of the string.
3371 * Currently this happens only when we know that there is
3372 * guaranteed enough space to fit the converted string, so
3373 * don't have to worry about growing. If two_byte_count is 0,
3374 * then t points to the first byte of the string which hasn't
3375 * been examined yet. Otherwise two_byte_count is 1, and t
3376 * points to the first byte in the string that will expand to
3377 * two. Depending on this, start examining at t or 1 after t.
3380 U8 *d = t + two_byte_count;
3383 /* Count up the remaining bytes that expand to two */
3386 const U8 chr = *d++;
3387 if (! NATIVE_IS_INVARIANT(chr)) two_byte_count++;
3390 /* The string will expand by just the number of bytes that
3391 * occupy two positions. But we are one afterwards because of
3392 * the increment just above. This is the place to put the
3393 * trailing NUL, and to set the length before we decrement */
3395 d += two_byte_count;
3396 SvCUR_set(sv, d - s);
3400 /* Having decremented d, it points to the position to put the
3401 * very last byte of the expanded string. Go backwards through
3402 * the string, copying and expanding as we go, stopping when we
3403 * get to the part that is invariant the rest of the way down */
3407 const U8 ch = NATIVE8_TO_UNI(*e--);
3408 if (UNI_IS_INVARIANT(ch)) {
3409 *d-- = UNI_TO_NATIVE(ch);
3411 *d-- = (U8)UTF8_EIGHT_BIT_LO(ch);
3412 *d-- = (U8)UTF8_EIGHT_BIT_HI(ch);
3419 /* Mark as UTF-8 even if no variant - saves scanning loop */
3425 =for apidoc sv_utf8_downgrade
3427 Attempts to convert the PV of an SV from characters to bytes.
3428 If the PV contains a character that cannot fit
3429 in a byte, this conversion will fail;
3430 in this case, either returns false or, if C<fail_ok> is not
3433 This is not as a general purpose Unicode to byte encoding interface:
3434 use the Encode extension for that.
3440 Perl_sv_utf8_downgrade(pTHX_ register SV *const sv, const bool fail_ok)
3444 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE;
3446 if (SvPOKp(sv) && SvUTF8(sv)) {
3452 sv_force_normal_flags(sv, 0);
3454 s = (U8 *) SvPV(sv, len);
3455 if (!utf8_to_bytes(s, &len)) {
3460 Perl_croak(aTHX_ "Wide character in %s",
3463 Perl_croak(aTHX_ "Wide character");
3474 =for apidoc sv_utf8_encode
3476 Converts the PV of an SV to UTF-8, but then turns the C<SvUTF8>
3477 flag off so that it looks like octets again.
3483 Perl_sv_utf8_encode(pTHX_ register SV *const sv)
3485 PERL_ARGS_ASSERT_SV_UTF8_ENCODE;
3488 sv_force_normal_flags(sv, 0);
3490 if (SvREADONLY(sv)) {
3491 Perl_croak_no_modify(aTHX);
3493 (void) sv_utf8_upgrade(sv);
3498 =for apidoc sv_utf8_decode
3500 If the PV of the SV is an octet sequence in UTF-8
3501 and contains a multiple-byte character, the C<SvUTF8> flag is turned on
3502 so that it looks like a character. If the PV contains only single-byte
3503 characters, the C<SvUTF8> flag stays being off.
3504 Scans PV for validity and returns false if the PV is invalid UTF-8.
3510 Perl_sv_utf8_decode(pTHX_ register SV *const sv)
3512 PERL_ARGS_ASSERT_SV_UTF8_DECODE;
3518 /* The octets may have got themselves encoded - get them back as
3521 if (!sv_utf8_downgrade(sv, TRUE))
3524 /* it is actually just a matter of turning the utf8 flag on, but
3525 * we want to make sure everything inside is valid utf8 first.
3527 c = (const U8 *) SvPVX_const(sv);
3528 if (!is_utf8_string(c, SvCUR(sv)+1))
3530 e = (const U8 *) SvEND(sv);
3533 if (!UTF8_IS_INVARIANT(ch)) {
3543 =for apidoc sv_setsv
3545 Copies the contents of the source SV C<ssv> into the destination SV
3546 C<dsv>. The source SV may be destroyed if it is mortal, so don't use this
3547 function if the source SV needs to be reused. Does not handle 'set' magic.
3548 Loosely speaking, it performs a copy-by-value, obliterating any previous
3549 content of the destination.
3551 You probably want to use one of the assortment of wrappers, such as
3552 C<SvSetSV>, C<SvSetSV_nosteal>, C<SvSetMagicSV> and
3553 C<SvSetMagicSV_nosteal>.
3555 =for apidoc sv_setsv_flags
3557 Copies the contents of the source SV C<ssv> into the destination SV
3558 C<dsv>. The source SV may be destroyed if it is mortal, so don't use this
3559 function if the source SV needs to be reused. Does not handle 'set' magic.
3560 Loosely speaking, it performs a copy-by-value, obliterating any previous
3561 content of the destination.
3562 If the C<flags> parameter has the C<SV_GMAGIC> bit set, will C<mg_get> on
3563 C<ssv> if appropriate, else not. If the C<flags> parameter has the
3564 C<NOSTEAL> bit set then the buffers of temps will not be stolen. <sv_setsv>
3565 and C<sv_setsv_nomg> are implemented in terms of this function.
3567 You probably want to use one of the assortment of wrappers, such as
3568 C<SvSetSV>, C<SvSetSV_nosteal>, C<SvSetMagicSV> and
3569 C<SvSetMagicSV_nosteal>.
3571 This is the primary function for copying scalars, and most other
3572 copy-ish functions and macros use this underneath.
3578 S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype)
3580 I32 mro_changes = 0; /* 1 = method, 2 = isa */
3582 PERL_ARGS_ASSERT_GLOB_ASSIGN_GLOB;
3584 if (dtype != SVt_PVGV) {
3585 const char * const name = GvNAME(sstr);
3586 const STRLEN len = GvNAMELEN(sstr);
3588 if (dtype >= SVt_PV) {
3594 SvUPGRADE(dstr, SVt_PVGV);
3595 (void)SvOK_off(dstr);
3596 /* FIXME - why are we doing this, then turning it off and on again
3598 isGV_with_GP_on(dstr);
3600 GvSTASH(dstr) = GvSTASH(sstr);
3602 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dstr)), dstr);
3603 gv_name_set(MUTABLE_GV(dstr), name, len, GV_ADD);
3604 SvFAKE_on(dstr); /* can coerce to non-glob */
3607 if(GvGP(MUTABLE_GV(sstr))) {
3608 /* If source has method cache entry, clear it */
3610 SvREFCNT_dec(GvCV(sstr));
3614 /* If source has a real method, then a method is
3616 else if(GvCV((const GV *)sstr)) {
3621 /* If dest already had a real method, that's a change as well */
3622 if(!mro_changes && GvGP(MUTABLE_GV(dstr)) && GvCVu((const GV *)dstr)) {
3626 if(strEQ(GvNAME((const GV *)dstr),"ISA"))
3629 gp_free(MUTABLE_GV(dstr));
3630 isGV_with_GP_off(dstr);
3631 (void)SvOK_off(dstr);
3632 isGV_with_GP_on(dstr);
3633 GvINTRO_off(dstr); /* one-shot flag */
3634 GvGP(dstr) = gp_ref(GvGP(sstr));
3635 if (SvTAINTED(sstr))
3637 if (GvIMPORTED(dstr) != GVf_IMPORTED
3638 && CopSTASH_ne(PL_curcop, GvSTASH(dstr)))
3640 GvIMPORTED_on(dstr);
3643 if(mro_changes == 2) mro_isa_changed_in(GvSTASH(dstr));
3644 else if(mro_changes) mro_method_changed_in(GvSTASH(dstr));
3649 S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr)
3651 SV * const sref = SvREFCNT_inc(SvRV(sstr));
3653 const int intro = GvINTRO(dstr);
3656 const U32 stype = SvTYPE(sref);
3658 PERL_ARGS_ASSERT_GLOB_ASSIGN_REF;
3661 GvINTRO_off(dstr); /* one-shot flag */
3662 GvLINE(dstr) = CopLINE(PL_curcop);
3663 GvEGV(dstr) = MUTABLE_GV(dstr);
3668 location = (SV **) &GvCV(dstr);
3669 import_flag = GVf_IMPORTED_CV;
3672 location = (SV **) &GvHV(dstr);
3673 import_flag = GVf_IMPORTED_HV;
3676 location = (SV **) &GvAV(dstr);
3677 import_flag = GVf_IMPORTED_AV;
3680 location = (SV **) &GvIOp(dstr);
3683 location = (SV **) &GvFORM(dstr);
3686 location = &GvSV(dstr);
3687 import_flag = GVf_IMPORTED_SV;
3690 if (stype == SVt_PVCV) {
3691 /*if (GvCVGEN(dstr) && (GvCV(dstr) != (const CV *)sref || GvCVGEN(dstr))) {*/
3692 if (GvCVGEN(dstr)) {
3693 SvREFCNT_dec(GvCV(dstr));
3695 GvCVGEN(dstr) = 0; /* Switch off cacheness. */
3698 SAVEGENERICSV(*location);
3702 if (stype == SVt_PVCV && (*location != sref || GvCVGEN(dstr))) {
3703 CV* const cv = MUTABLE_CV(*location);
3705 if (!GvCVGEN((const GV *)dstr) &&
3706 (CvROOT(cv) || CvXSUB(cv)))
3708 /* Redefining a sub - warning is mandatory if
3709 it was a const and its value changed. */
3710 if (CvCONST(cv) && CvCONST((const CV *)sref)
3712 == cv_const_sv((const CV *)sref)) {
3714 /* They are 2 constant subroutines generated from
3715 the same constant. This probably means that
3716 they are really the "same" proxy subroutine
3717 instantiated in 2 places. Most likely this is
3718 when a constant is exported twice. Don't warn.
3721 else if (ckWARN(WARN_REDEFINE)
3723 && (!CvCONST((const CV *)sref)
3724 || sv_cmp(cv_const_sv(cv),
3725 cv_const_sv((const CV *)
3727 Perl_warner(aTHX_ packWARN(WARN_REDEFINE),
3730 ? "Constant subroutine %s::%s redefined"
3731 : "Subroutine %s::%s redefined"),
3732 HvNAME_get(GvSTASH((const GV *)dstr)),
3733 GvENAME(MUTABLE_GV(dstr)));
3737 cv_ckproto_len(cv, (const GV *)dstr,
3738 SvPOK(sref) ? SvPVX_const(sref) : NULL,
3739 SvPOK(sref) ? SvCUR(sref) : 0);
3741 GvCVGEN(dstr) = 0; /* Switch off cacheness. */
3742 GvASSUMECV_on(dstr);
3743 if(GvSTASH(dstr)) mro_method_changed_in(GvSTASH(dstr)); /* sub foo { 1 } sub bar { 2 } *bar = \&foo */
3746 if (import_flag && !(GvFLAGS(dstr) & import_flag)
3747 && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) {
3748 GvFLAGS(dstr) |= import_flag;
3750 if (stype == SVt_PVAV && strEQ(GvNAME((GV*)dstr), "ISA")) {
3751 sv_magic(sref, dstr, PERL_MAGIC_isa, NULL, 0);
3752 mro_isa_changed_in(GvSTASH(dstr));
3757 if (SvTAINTED(sstr))
3763 Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
3766 register U32 sflags;
3768 register svtype stype;
3770 PERL_ARGS_ASSERT_SV_SETSV_FLAGS;
3775 if (SvIS_FREED(dstr)) {
3776 Perl_croak(aTHX_ "panic: attempt to copy value %" SVf
3777 " to a freed scalar %p", SVfARG(sstr), (void *)dstr);
3779 SV_CHECK_THINKFIRST_COW_DROP(dstr);
3781 sstr = &PL_sv_undef;
3782 if (SvIS_FREED(sstr)) {
3783 Perl_croak(aTHX_ "panic: attempt to copy freed scalar %p to %p",
3784 (void*)sstr, (void*)dstr);
3786 stype = SvTYPE(sstr);
3787 dtype = SvTYPE(dstr);
3789 (void)SvAMAGIC_off(dstr);
3792 /* need to nuke the magic */
3796 /* There's a lot of redundancy below but we're going for speed here */
3801 if (dtype != SVt_PVGV) {
3802 (void)SvOK_off(dstr);
3810 sv_upgrade(dstr, SVt_IV);
3814 sv_upgrade(dstr, SVt_PVIV);
3817 goto end_of_first_switch;
3819 (void)SvIOK_only(dstr);
3820 SvIV_set(dstr, SvIVX(sstr));
3823 /* SvTAINTED can only be true if the SV has taint magic, which in
3824 turn means that the SV type is PVMG (or greater). This is the
3825 case statement for SVt_IV, so this cannot be true (whatever gcov
3827 assert(!SvTAINTED(sstr));
3832 if (dtype < SVt_PV && dtype != SVt_IV)
3833 sv_upgrade(dstr, SVt_IV);
3841 sv_upgrade(dstr, SVt_NV);
3845 sv_upgrade(dstr, SVt_PVNV);
3848 goto end_of_first_switch;
3850 SvNV_set(dstr, SvNVX(sstr));
3851 (void)SvNOK_only(dstr);
3852 /* SvTAINTED can only be true if the SV has taint magic, which in
3853 turn means that the SV type is PVMG (or greater). This is the
3854 case statement for SVt_NV, so this cannot be true (whatever gcov
3856 assert(!SvTAINTED(sstr));
3862 #ifdef PERL_OLD_COPY_ON_WRITE
3863 if ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS) {
3864 if (dtype < SVt_PVIV)
3865 sv_upgrade(dstr, SVt_PVIV);
3872 sv_upgrade(dstr, SVt_PV);
3875 if (dtype < SVt_PVIV)
3876 sv_upgrade(dstr, SVt_PVIV);
3879 if (dtype < SVt_PVNV)
3880 sv_upgrade(dstr, SVt_PVNV);
3884 const char * const type = sv_reftype(sstr,0);
3886 Perl_croak(aTHX_ "Bizarre copy of %s in %s", type, OP_DESC(PL_op));
3888 Perl_croak(aTHX_ "Bizarre copy of %s", type);
3893 if (dtype < SVt_REGEXP)
3894 sv_upgrade(dstr, SVt_REGEXP);
3897 /* case SVt_BIND: */
3900 if (isGV_with_GP(sstr) && dtype <= SVt_PVGV) {
3901 glob_assign_glob(dstr, sstr, dtype);
3904 /* SvVALID means that this PVGV is playing at being an FBM. */
3908 if (SvGMAGICAL(sstr) && (flags & SV_GMAGIC)) {
3910 if (SvTYPE(sstr) != stype) {
3911 stype = SvTYPE(sstr);
3912 if (isGV_with_GP(sstr) && stype == SVt_PVGV && dtype <= SVt_PVGV) {
3913 glob_assign_glob(dstr, sstr, dtype);
3918 if (stype == SVt_PVLV)
3919 SvUPGRADE(dstr, SVt_PVNV);
3921 SvUPGRADE(dstr, (svtype)stype);
3923 end_of_first_switch:
3925 /* dstr may have been upgraded. */
3926 dtype = SvTYPE(dstr);
3927 sflags = SvFLAGS(sstr);
3929 if (dtype == SVt_PVCV || dtype == SVt_PVFM) {
3930 /* Assigning to a subroutine sets the prototype. */
3933 const char *const ptr = SvPV_const(sstr, len);
3935 SvGROW(dstr, len + 1);
3936 Copy(ptr, SvPVX(dstr), len + 1, char);
3937 SvCUR_set(dstr, len);
3939 SvFLAGS(dstr) |= sflags & SVf_UTF8;
3943 } else if (dtype == SVt_PVAV || dtype == SVt_PVHV) {
3944 const char * const type = sv_reftype(dstr,0);
3946 Perl_croak(aTHX_ "Cannot copy to %s in %s", type, OP_DESC(PL_op));
3948 Perl_croak(aTHX_ "Cannot copy to %s", type);
3949 } else if (sflags & SVf_ROK) {
3950 if (isGV_with_GP(dstr) && dtype == SVt_PVGV
3951 && SvTYPE(SvRV(sstr)) == SVt_PVGV && isGV_with_GP(SvRV(sstr))) {
3954 if (GvIMPORTED(dstr) != GVf_IMPORTED
3955 && CopSTASH_ne(PL_curcop, GvSTASH(dstr)))
3957 GvIMPORTED_on(dstr);
3962 glob_assign_glob(dstr, sstr, dtype);
3966 if (dtype >= SVt_PV) {
3967 if (dtype == SVt_PVGV && isGV_with_GP(dstr)) {
3968 glob_assign_ref(dstr, sstr);
3971 if (SvPVX_const(dstr)) {
3977 (void)SvOK_off(dstr);
3978 SvRV_set(dstr, SvREFCNT_inc(SvRV(sstr)));
3979 SvFLAGS(dstr) |= sflags & SVf_ROK;
3980 assert(!(sflags & SVp_NOK));
3981 assert(!(sflags & SVp_IOK));
3982 assert(!(sflags & SVf_NOK));
3983 assert(!(sflags & SVf_IOK));
3985 else if (dtype == SVt_PVGV && isGV_with_GP(dstr)) {
3986 if (!(sflags & SVf_OK)) {
3987 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3988 "Undefined value assigned to typeglob");
3991 GV *gv = gv_fetchsv(sstr, GV_ADD, SVt_PVGV);
3992 if (dstr != (const SV *)gv) {
3994 gp_free(MUTABLE_GV(dstr));
3995 GvGP(dstr) = gp_ref(GvGP(gv));
3999 else if (dtype == SVt_REGEXP && stype == SVt_REGEXP) {
4000 reg_temp_copy((REGEXP*)dstr, (REGEXP*)sstr);
4002 else if (sflags & SVp_POK) {
4006 * Check to see if we can just swipe the string. If so, it's a
4007 * possible small lose on short strings, but a big win on long ones.
4008 * It might even be a win on short strings if SvPVX_const(dstr)
4009 * has to be allocated and SvPVX_const(sstr) has to be freed.
4010 * Likewise if we can set up COW rather than doing an actual copy, we
4011 * drop to the else clause, as the swipe code and the COW setup code
4012 * have much in common.
4015 /* Whichever path we take through the next code, we want this true,
4016 and doing it now facilitates the COW check. */
4017 (void)SvPOK_only(dstr);
4020 /* If we're already COW then this clause is not true, and if COW
4021 is allowed then we drop down to the else and make dest COW
4022 with us. If caller hasn't said that we're allowed to COW
4023 shared hash keys then we don't do the COW setup, even if the
4024 source scalar is a shared hash key scalar. */
4025 (((flags & SV_COW_SHARED_HASH_KEYS)
4026 ? (sflags & (SVf_FAKE|SVf_READONLY)) != (SVf_FAKE|SVf_READONLY)
4027 : 1 /* If making a COW copy is forbidden then the behaviour we
4028 desire is as if the source SV isn't actually already
4029 COW, even if it is. So we act as if the source flags
4030 are not COW, rather than actually testing them. */
4032 #ifndef PERL_OLD_COPY_ON_WRITE
4033 /* The change that added SV_COW_SHARED_HASH_KEYS makes the logic
4034 when PERL_OLD_COPY_ON_WRITE is defined a little wrong.
4035 Conceptually PERL_OLD_COPY_ON_WRITE being defined should
4036 override SV_COW_SHARED_HASH_KEYS, because it means "always COW"
4037 but in turn, it's somewhat dead code, never expected to go
4038 live, but more kept as a placeholder on how to do it better
4039 in a newer implementation. */
4040 /* If we are COW and dstr is a suitable target then we drop down
4041 into the else and make dest a COW of us. */
4042 || (SvFLAGS(dstr) & CAN_COW_MASK) != CAN_COW_FLAGS
4047 (sflags & SVs_TEMP) && /* slated for free anyway? */
4048 !(sflags & SVf_OOK) && /* and not involved in OOK hack? */
4049 (!(flags & SV_NOSTEAL)) &&
4050 /* and we're allowed to steal temps */
4051 SvREFCNT(sstr) == 1 && /* and no other references to it? */
4052 SvLEN(sstr)) /* and really is a string */
4053 #ifdef PERL_OLD_COPY_ON_WRITE
4054 && ((flags & SV_COW_SHARED_HASH_KEYS)
4055 ? (!((sflags & CAN_COW_MASK) == CAN_COW_FLAGS
4056 && (SvFLAGS(dstr) & CAN_COW_MASK) == CAN_COW_FLAGS
4057 && SvTYPE(sstr) >= SVt_PVIV && SvTYPE(sstr) != SVt_PVFM))
4061 /* Failed the swipe test, and it's not a shared hash key either.
4062 Have to copy the string. */
4063 STRLEN len = SvCUR(sstr);
4064 SvGROW(dstr, len + 1); /* inlined from sv_setpvn */
4065 Move(SvPVX_const(sstr),SvPVX(dstr),len,char);
4066 SvCUR_set(dstr, len);
4067 *SvEND(dstr) = '\0';
4069 /* If PERL_OLD_COPY_ON_WRITE is not defined, then isSwipe will always
4071 /* Either it's a shared hash key, or it's suitable for
4072 copy-on-write or we can swipe the string. */
4074 PerlIO_printf(Perl_debug_log, "Copy on write: sstr --> dstr\n");
4078 #ifdef PERL_OLD_COPY_ON_WRITE
4080 if ((sflags & (SVf_FAKE | SVf_READONLY))
4081 != (SVf_FAKE | SVf_READONLY)) {
4082 SvREADONLY_on(sstr);
4084 /* Make the source SV into a loop of 1.
4085 (about to become 2) */
4086 SV_COW_NEXT_SV_SET(sstr, sstr);
4090 /* Initial code is common. */
4091 if (SvPVX_const(dstr)) { /* we know that dtype >= SVt_PV */
4096 /* making another shared SV. */
4097 STRLEN cur = SvCUR(sstr);
4098 STRLEN len = SvLEN(sstr);
4099 #ifdef PERL_OLD_COPY_ON_WRITE
4101 assert (SvTYPE(dstr) >= SVt_PVIV);
4102 /* SvIsCOW_normal */
4103 /* splice us in between source and next-after-source. */
4104 SV_COW_NEXT_SV_SET(dstr, SV_COW_NEXT_SV(sstr));
4105 SV_COW_NEXT_SV_SET(sstr, dstr);
4106 SvPV_set(dstr, SvPVX_mutable(sstr));
4110 /* SvIsCOW_shared_hash */
4111 DEBUG_C(PerlIO_printf(Perl_debug_log,
4112 "Copy on write: Sharing hash\n"));
4114 assert (SvTYPE(dstr) >= SVt_PV);
4116 HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sstr)))));
4118 SvLEN_set(dstr, len);
4119 SvCUR_set(dstr, cur);
4120 SvREADONLY_on(dstr);
4124 { /* Passes the swipe test. */
4125 SvPV_set(dstr, SvPVX_mutable(sstr));
4126 SvLEN_set(dstr, SvLEN(sstr));
4127 SvCUR_set(dstr, SvCUR(sstr));
4130 (void)SvOK_off(sstr); /* NOTE: nukes most SvFLAGS on sstr */
4131 SvPV_set(sstr, NULL);
4137 if (sflags & SVp_NOK) {
4138 SvNV_set(dstr, SvNVX(sstr));
4140 if (sflags & SVp_IOK) {
4141 SvIV_set(dstr, SvIVX(sstr));
4142 /* Must do this otherwise some other overloaded use of 0x80000000
4143 gets confused. I guess SVpbm_VALID */
4144 if (sflags & SVf_IVisUV)
4147 SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8);
4149 const MAGIC * const smg = SvVSTRING_mg(sstr);
4151 sv_magic(dstr, NULL, PERL_MAGIC_vstring,
4152 smg->mg_ptr, smg->mg_len);
4153 SvRMAGICAL_on(dstr);
4157 else if (sflags & (SVp_IOK|SVp_NOK)) {
4158 (void)SvOK_off(dstr);
4159 SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_IVisUV|SVf_NOK|SVp_NOK);
4160 if (sflags & SVp_IOK) {
4161 /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */
4162 SvIV_set(dstr, SvIVX(sstr));
4164 if (sflags & SVp_NOK) {
4165 SvNV_set(dstr, SvNVX(sstr));
4169 if (isGV_with_GP(sstr)) {
4170 /* This stringification rule for globs is spread in 3 places.
4171 This feels bad. FIXME. */
4172 const U32 wasfake = sflags & SVf_FAKE;
4174 /* FAKE globs can get coerced, so need to turn this off
4175 temporarily if it is on. */
4177 gv_efullname3(dstr, MUTABLE_GV(sstr), "*");
4178 SvFLAGS(sstr) |= wasfake;
4181 (void)SvOK_off(dstr);
4183 if (SvTAINTED(sstr))
4188 =for apidoc sv_setsv_mg
4190 Like C<sv_setsv>, but also handles 'set' magic.
4196 Perl_sv_setsv_mg(pTHX_ SV *const dstr, register SV *const sstr)
4198 PERL_ARGS_ASSERT_SV_SETSV_MG;
4200 sv_setsv(dstr,sstr);
4204 #ifdef PERL_OLD_COPY_ON_WRITE
4206 Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr)
4208 STRLEN cur = SvCUR(sstr);
4209 STRLEN len = SvLEN(sstr);
4210 register char *new_pv;
4212 PERL_ARGS_ASSERT_SV_SETSV_COW;
4215 PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n",
4216 (void*)sstr, (void*)dstr);
4223 if (SvTHINKFIRST(dstr))
4224 sv_force_normal_flags(dstr, SV_COW_DROP_PV);
4225 else if (SvPVX_const(dstr))
4226 Safefree(SvPVX_const(dstr));
4230 SvUPGRADE(dstr, SVt_PVIV);
4232 assert (SvPOK(sstr));
4233 assert (SvPOKp(sstr));
4234 assert (!SvIOK(sstr));
4235 assert (!SvIOKp(sstr));
4236 assert (!SvNOK(sstr));
4237 assert (!SvNOKp(sstr));
4239 if (SvIsCOW(sstr)) {
4241 if (SvLEN(sstr) == 0) {
4242 /* source is a COW shared hash key. */
4243 DEBUG_C(PerlIO_printf(Perl_debug_log,
4244 "Fast copy on write: Sharing hash\n"));
4245 new_pv = HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sstr))));
4248 SV_COW_NEXT_SV_SET(dstr, SV_COW_NEXT_SV(sstr));
4250 assert ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS);
4251 SvUPGRADE(sstr, SVt_PVIV);
4252 SvREADONLY_on(sstr);
4254 DEBUG_C(PerlIO_printf(Perl_debug_log,
4255 "Fast copy on write: Converting sstr to COW\n"));
4256 SV_COW_NEXT_SV_SET(dstr, sstr);
4258 SV_COW_NEXT_SV_SET(sstr, dstr);
4259 new_pv = SvPVX_mutable(sstr);
4262 SvPV_set(dstr, new_pv);
4263 SvFLAGS(dstr) = (SVt_PVIV|SVf_POK|SVp_POK|SVf_FAKE|SVf_READONLY);
4266 SvLEN_set(dstr, len);
4267 SvCUR_set(dstr, cur);
4276 =for apidoc sv_setpvn
4278 Copies a string into an SV. The C<len> parameter indicates the number of
4279 bytes to be copied. If the C<ptr> argument is NULL the SV will become
4280 undefined. Does not handle 'set' magic. See C<sv_setpvn_mg>.
4286 Perl_sv_setpvn(pTHX_ register SV *const sv, register const char *const ptr, register const STRLEN len)
4289 register char *dptr;
4291 PERL_ARGS_ASSERT_SV_SETPVN;
4293 SV_CHECK_THINKFIRST_COW_DROP(sv);
4299 /* len is STRLEN which is unsigned, need to copy to signed */
4302 Perl_croak(aTHX_ "panic: sv_setpvn called with negative strlen");
4304 SvUPGRADE(sv, SVt_PV);
4306 dptr = SvGROW(sv, len + 1);
4307 Move(ptr,dptr,len,char);
4310 (void)SvPOK_only_UTF8(sv); /* validate pointer */
4315 =for apidoc sv_setpvn_mg
4317 Like C<sv_setpvn>, but also handles 'set' magic.
4323 Perl_sv_setpvn_mg(pTHX_ register SV *const sv, register const char *const ptr, register const STRLEN len)
4325 PERL_ARGS_ASSERT_SV_SETPVN_MG;
4327 sv_setpvn(sv,ptr,len);
4332 =for apidoc sv_setpv
4334 Copies a string into an SV. The string must be null-terminated. Does not
4335 handle 'set' magic. See C<sv_setpv_mg>.
4341 Perl_sv_setpv(pTHX_ register SV *const sv, register const char *const ptr)
4344 register STRLEN len;
4346 PERL_ARGS_ASSERT_SV_SETPV;
4348 SV_CHECK_THINKFIRST_COW_DROP(sv);
4354 SvUPGRADE(sv, SVt_PV);
4356 SvGROW(sv, len + 1);
4357 Move(ptr,SvPVX(sv),len+1,char);
4359 (void)SvPOK_only_UTF8(sv); /* validate pointer */
4364 =for apidoc sv_setpv_mg
4366 Like C<sv_setpv>, but also handles 'set' magic.
4372 Perl_sv_setpv_mg(pTHX_ register SV *const sv, register const char *const ptr)
4374 PERL_ARGS_ASSERT_SV_SETPV_MG;
4381 =for apidoc sv_usepvn_flags
4383 Tells an SV to use C<ptr> to find its string value. Normally the
4384 string is stored inside the SV but sv_usepvn allows the SV to use an
4385 outside string. The C<ptr> should point to memory that was allocated
4386 by C<malloc>. The string length, C<len>, must be supplied. By default
4387 this function will realloc (i.e. move) the memory pointed to by C<ptr>,
4388 so that pointer should not be freed or used by the programmer after
4389 giving it to sv_usepvn, and neither should any pointers from "behind"
4390 that pointer (e.g. ptr + 1) be used.
4392 If C<flags> & SV_SMAGIC is true, will call SvSETMAGIC. If C<flags> &
4393 SV_HAS_TRAILING_NUL is true, then C<ptr[len]> must be NUL, and the realloc
4394 will be skipped. (i.e. the buffer is actually at least 1 byte longer than
4395 C<len>, and already meets the requirements for storing in C<SvPVX>)
4401 Perl_sv_usepvn_flags(pTHX_ SV *const sv, char *ptr, const STRLEN len, const U32 flags)
4406 PERL_ARGS_ASSERT_SV_USEPVN_FLAGS;
4408 SV_CHECK_THINKFIRST_COW_DROP(sv);
4409 SvUPGRADE(sv, SVt_PV);
4412 if (flags & SV_SMAGIC)
4416 if (SvPVX_const(sv))
4420 if (flags & SV_HAS_TRAILING_NUL)
4421 assert(ptr[len] == '\0');
4424 allocate = (flags & SV_HAS_TRAILING_NUL)
4426 #ifdef Perl_safesysmalloc_size
4429 PERL_STRLEN_ROUNDUP(len + 1);
4431 if (flags & SV_HAS_TRAILING_NUL) {
4432 /* It's long enough - do nothing.
4433 Specfically Perl_newCONSTSUB is relying on this. */
4436 /* Force a move to shake out bugs in callers. */
4437 char *new_ptr = (char*)safemalloc(allocate);
4438 Copy(ptr, new_ptr, len, char);
4439 PoisonFree(ptr,len,char);
4443 ptr = (char*) saferealloc (ptr, allocate);
4446 #ifdef Perl_safesysmalloc_size
4447 SvLEN_set(sv, Perl_safesysmalloc_size(ptr));
4449 SvLEN_set(sv, allocate);
4453 if (!(flags & SV_HAS_TRAILING_NUL)) {
4456 (void)SvPOK_only_UTF8(sv); /* validate pointer */
4458 if (flags & SV_SMAGIC)
4462 #ifdef PERL_OLD_COPY_ON_WRITE
4463 /* Need to do this *after* making the SV normal, as we need the buffer
4464 pointer to remain valid until after we've copied it. If we let go too early,
4465 another thread could invalidate it by unsharing last of the same hash key
4466 (which it can do by means other than releasing copy-on-write Svs)
4467 or by changing the other copy-on-write SVs in the loop. */
4469 S_sv_release_COW(pTHX_ register SV *sv, const char *pvx, SV *after)
4471 PERL_ARGS_ASSERT_SV_RELEASE_COW;
4473 { /* this SV was SvIsCOW_normal(sv) */
4474 /* we need to find the SV pointing to us. */
4475 SV *current = SV_COW_NEXT_SV(after);
4477 if (current == sv) {
4478 /* The SV we point to points back to us (there were only two of us
4480 Hence other SV is no longer copy on write either. */
4482 SvREADONLY_off(after);
4484 /* We need to follow the pointers around the loop. */
4486 while ((next = SV_COW_NEXT_SV(current)) != sv) {
4489 /* don't loop forever if the structure is bust, and we have
4490 a pointer into a closed loop. */
4491 assert (current != after);
4492 assert (SvPVX_const(current) == pvx);
4494 /* Make the SV before us point to the SV after us. */
4495 SV_COW_NEXT_SV_SET(current, after);
4501 =for apidoc sv_force_normal_flags
4503 Undo various types of fakery on an SV: if the PV is a shared string, make
4504 a private copy; if we're a ref, stop refing; if we're a glob, downgrade to
4505 an xpvmg; if we're a copy-on-write scalar, this is the on-write time when
4506 we do the copy, and is also used locally. If C<SV_COW_DROP_PV> is set
4507 then a copy-on-write scalar drops its PV buffer (if any) and becomes
4508 SvPOK_off rather than making a copy. (Used where this scalar is about to be
4509 set to some other value.) In addition, the C<flags> parameter gets passed to
4510 C<sv_unref_flags()> when unrefing. C<sv_force_normal> calls this function
4511 with flags set to 0.
4517 Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags)
4521 PERL_ARGS_ASSERT_SV_FORCE_NORMAL_FLAGS;
4523 #ifdef PERL_OLD_COPY_ON_WRITE
4524 if (SvREADONLY(sv)) {
4526 const char * const pvx = SvPVX_const(sv);
4527 const STRLEN len = SvLEN(sv);
4528 const STRLEN cur = SvCUR(sv);
4529 /* next COW sv in the loop. If len is 0 then this is a shared-hash
4530 key scalar, so we mustn't attempt to call SV_COW_NEXT_SV(), as
4531 we'll fail an assertion. */
4532 SV * const next = len ? SV_COW_NEXT_SV(sv) : 0;
4535 PerlIO_printf(Perl_debug_log,
4536 "Copy on write: Force normal %ld\n",
4542 /* This SV doesn't own the buffer, so need to Newx() a new one: */
4545 if (flags & SV_COW_DROP_PV) {
4546 /* OK, so we don't need to copy our buffer. */
4549 SvGROW(sv, cur + 1);
4550 Move(pvx,SvPVX(sv),cur,char);
4555 sv_release_COW(sv, pvx, next);
4557 unshare_hek(SvSHARED_HEK_FROM_PV(pvx));
4563 else if (IN_PERL_RUNTIME)
4564 Perl_croak_no_modify(aTHX);
4567 if (SvREADONLY(sv)) {
4569 const char * const pvx = SvPVX_const(sv);
4570 const STRLEN len = SvCUR(sv);
4575 SvGROW(sv, len + 1);
4576 Move(pvx,SvPVX(sv),len,char);
4578 unshare_hek(SvSHARED_HEK_FROM_PV(pvx));
4580 else if (IN_PERL_RUNTIME)
4581 Perl_croak_no_modify(aTHX);
4585 sv_unref_flags(sv, flags);
4586 else if (SvFAKE(sv) && SvTYPE(sv) == SVt_PVGV)
4588 else if (SvFAKE(sv) && SvTYPE(sv) == SVt_REGEXP) {
4589 /* Need to downgrade the REGEXP to a simple(r) scalar. This is analagous
4590 to sv_unglob. We only need it here, so inline it. */
4591 const svtype new_type = SvMAGIC(sv) || SvSTASH(sv) ? SVt_PVMG : SVt_PV;
4592 SV *const temp = newSV_type(new_type);
4593 void *const temp_p = SvANY(sv);
4595 if (new_type == SVt_PVMG) {
4596 SvMAGIC_set(temp, SvMAGIC(sv));
4597 SvMAGIC_set(sv, NULL);
4598 SvSTASH_set(temp, SvSTASH(sv));
4599 SvSTASH_set(sv, NULL);
4601 SvCUR_set(temp, SvCUR(sv));
4602 /* Remember that SvPVX is in the head, not the body. */
4604 SvLEN_set(temp, SvLEN(sv));
4605 /* This signals "buffer is owned by someone else" in sv_clear,
4606 which is the least effort way to stop it freeing the buffer.
4608 SvLEN_set(sv, SvLEN(sv)+1);
4610 /* Their buffer is already owned by someone else. */
4611 SvPVX(sv) = savepvn(SvPVX(sv), SvCUR(sv));
4612 SvLEN_set(temp, SvCUR(sv)+1);
4615 /* Now swap the rest of the bodies. */
4617 SvFLAGS(sv) &= ~(SVf_FAKE|SVTYPEMASK);
4618 SvFLAGS(sv) |= new_type;
4619 SvANY(sv) = SvANY(temp);
4621 SvFLAGS(temp) &= ~(SVTYPEMASK);
4622 SvFLAGS(temp) |= SVt_REGEXP|SVf_FAKE;
4623 SvANY(temp) = temp_p;
4632 Efficient removal of characters from the beginning of the string buffer.
4633 SvPOK(sv) must be true and the C<ptr> must be a pointer to somewhere inside
4634 the string buffer. The C<ptr> becomes the first character of the adjusted
4635 string. Uses the "OOK hack".
4636 Beware: after this function returns, C<ptr> and SvPVX_const(sv) may no longer
4637 refer to the same chunk of data.
4643 Perl_sv_chop(pTHX_ register SV *const sv, register const char *const ptr)
4649 const U8 *real_start;
4653 PERL_ARGS_ASSERT_SV_CHOP;
4655 if (!ptr || !SvPOKp(sv))
4657 delta = ptr - SvPVX_const(sv);
4659 /* Nothing to do. */
4662 /* SvPVX(sv) may move in SV_CHECK_THINKFIRST(sv), but after this line,
4663 nothing uses the value of ptr any more. */
4664 max_delta = SvLEN(sv) ? SvLEN(sv) : SvCUR(sv);
4665 if (ptr <= SvPVX_const(sv))
4666 Perl_croak(aTHX_ "panic: sv_chop ptr=%p, start=%p, end=%p",
4667 ptr, SvPVX_const(sv), SvPVX_const(sv) + max_delta);
4668 SV_CHECK_THINKFIRST(sv);
4669 if (delta > max_delta)
4670 Perl_croak(aTHX_ "panic: sv_chop ptr=%p (was %p), start=%p, end=%p",
4671 SvPVX_const(sv) + delta, ptr, SvPVX_const(sv),
4672 SvPVX_const(sv) + max_delta);
4675 if (!SvLEN(sv)) { /* make copy of shared string */
4676 const char *pvx = SvPVX_const(sv);
4677 const STRLEN len = SvCUR(sv);
4678 SvGROW(sv, len + 1);
4679 Move(pvx,SvPVX(sv),len,char);
4682 SvFLAGS(sv) |= SVf_OOK;
4685 SvOOK_offset(sv, old_delta);
4687 SvLEN_set(sv, SvLEN(sv) - delta);
4688 SvCUR_set(sv, SvCUR(sv) - delta);
4689 SvPV_set(sv, SvPVX(sv) + delta);
4691 p = (U8 *)SvPVX_const(sv);