X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/c2b527b3ea8007dff526018f2f60dba8ff99b294..HEAD:/sv.h diff --git a/sv.h b/sv.h index d8ace32..240986c 100644 --- a/sv.h +++ b/sv.h @@ -320,12 +320,11 @@ C can only be used with expressions without side effects, you don't need the return value, and you know C is not C. =for apidoc SvREFCNT_dec -Decrements the reference count of the given SV. C may be C. +=for apidoc_item SvREFCNT_dec_NN -=for apidoc SvREFCNT_dec_NN -Same as C, but can only be used if you know C -is not C. Since we don't have to check the NULLness, it's faster -and smaller. +These decrement the reference count of the given SV. + +C may only be used when C is known to not be C. =for apidoc Am|svtype|SvTYPE|SV* sv Returns the type of the SV. See C>. @@ -748,7 +747,7 @@ Returns a boolean indicating whether the SV contains a v-string. =for apidoc Am|U32|SvOOK|SV* sv Returns a U32 indicating whether the pointer to the string buffer is offset. This hack is used internally to speed up removal of characters from the -beginning of a C. When C is true, then the start of the +beginning of a C>. When C is true, then the start of the allocated string buffer is actually C bytes before C. This offset used to be stored in C, but is now stored within the spare part of the buffer. @@ -802,7 +801,9 @@ compiler will complain if you were to try to modify the contents of the string, (unless you cast away const yourself). =for apidoc Am|STRLEN|SvCUR|SV* sv -Returns the length of the string which is in the SV. See C>. +Returns the length, in bytes, of the PV inside the SV. +Note that this may not match Perl's C; for that, use +C. See C> also. =for apidoc Am|STRLEN|SvLEN|SV* sv Returns the size of the string buffer in the SV, not including any part @@ -856,8 +857,8 @@ Set the value of the MAGIC pointer in C to val. See C>. Set the value of the STASH pointer in C to val. See C>. =for apidoc Am|void|SvCUR_set|SV* sv|STRLEN len -Set the current length of the string which is in the SV. See C> -and C>. +Sets the current length, in bytes, of the C string which is in the SV. +See C> and C>. =for apidoc Am|void|SvLEN_set|SV* sv|STRLEN len Set the size of the string buffer for the SV. See C>. @@ -923,7 +924,7 @@ Set the size of the string buffer for the SV. See C>. =for apidoc Am|U32|SvUTF8|SV* sv Returns a U32 value indicating the UTF-8 status of an SV. If things are set-up properly, this indicates whether or not the SV contains UTF-8 encoded data. -You should use this I a call to C or one of its variants, in +You should use this I a call to C> or one of its variants, in case any call to string overloading updates the internal flag. If you want to take into account the L pragma, use C> @@ -1384,6 +1385,17 @@ object type. Exposed to perl code via Internals::SvREADONLY(). STMT_START { assert(SvTYPE(sv) >= SVt_PV); \ SvCUR_set(sv, (val) - SvPVX(sv)); } STMT_END +/* +=for apidoc Am|void|SvPV_renew|SV* sv|STRLEN len +Low level micro optimization of C>. It is generally better to use +C instead. This is because C ignores potential issues that +C handles. C needs to have a real C that is unencombered by +things like COW. Using C or +C before calling this should clean it up, but +why not just use C if you're not sure about the provenance? + +=cut +*/ #define SvPV_renew(sv,n) \ STMT_START { SvLEN_set(sv, n); \ SvPV_set((sv), (MEM_WRAP_CHECK_(n,char) \ @@ -1543,9 +1555,23 @@ attention to precisely which outputs are influenced by which inputs. /* =for apidoc Am|char*|SvPV_force|SV* sv|STRLEN len -Like C but will force the SV into containing a string (C), and -only a string (C), by hook or by crook. You need force if you are -going to update the C directly. Processes get magic. +=for apidoc_item ||SvPV_force_nolen|SV* sv +=for apidoc_item ||SvPVx_force|SV* sv|STRLEN len +=for apidoc_item ||SvPV_force_nomg|SV* sv|STRLEN len +=for apidoc_item ||SvPV_force_nomg_nolen|SV * sv +=for apidoc_item ||SvPV_force_mutable|SV * sv|STRLEN len +=for apidoc_item ||SvPV_force_flags|SV * sv|STRLEN len|U32 flags +=for apidoc_item ||SvPV_force_flags_nolen|SV * sv|U32 flags +=for apidoc_item ||SvPV_force_flags_mutable|SV * sv|STRLEN len|U32 flags +=for apidoc_item ||SvPVbyte_force +=for apidoc_item ||SvPVbytex_force +=for apidoc_item ||SvPVutf8_force +=for apidoc_item ||SvPVutf8x_force + +These are like C>, returning the string in the SV, but will force the +SV into containing a string (C>), and only a string +(C>), by hook or by crook. You need to use one of these +C routines if you are going to update the C> directly. Note that coercing an arbitrary scalar into a plain PV will potentially strip useful data from it. For example if the SV was C, then the @@ -1553,39 +1579,134 @@ referent will have its reference count decremented, and the SV itself may be converted to an C scalar with a string buffer containing a value such as C<"ARRAY(0x1234)">. -=for apidoc Am|char*|SvPV_force_nomg|SV* sv|STRLEN len -Like C, but doesn't process get magic. +The differences between the forms are: + +The forms with C in their names allow you to use the C parameter +to specify to perform 'get' magic (by setting the C flag) or to skip +'get' magic (by clearing it). The other forms do perform 'get' magic, except +for the ones with C in their names, which skip 'get' magic. + +The forms that take a C parameter will set that variable to the byte +length of the resultant string (these are macros, so don't use C<&len>). + +The forms with C in their names indicate they don't have a C +parameter. They should be used only when it is known that the PV is a C +string, terminated by a NUL byte, and without intermediate NUL characters; or +when you don't care about its length. + +The forms with C in their names are effectively the same as those without, +but the name emphasizes that the string is modifiable by the caller, which it is +in all the forms. + +C is like C, but converts C to UTF-8 first if +not already UTF-8. + +C is like C, but guarantees to evaluate C +only once; use the more efficient C otherwise. + +C is like C, but converts C to byte +representation first if currently encoded as UTF-8. If the SV cannot be +downgraded from UTF-8, this croaks. + +C is like C, but guarantees to evaluate C +only once; use the more efficient C otherwise. =for apidoc Am|char*|SvPV|SV* sv|STRLEN len -Returns a pointer to the string in the SV, or a stringified form of -the SV if the SV does not contain a string. The SV may cache the -stringified version becoming C. Handles 'get' magic. The -C variable will be set to the length of the string (this is a macro, so -don't use C<&len>). See also C> for a version which guarantees to -evaluate C only once. - -Note that there is no guarantee that the return value of C is -equal to C, or that C contains valid data, or that -successive calls to C will return the same pointer value each -time. This is due to the way that things like overloading and -Copy-On-Write are handled. In these cases, the return value may point to -a temporary buffer or similar. If you absolutely need the C field to -be valid (for example, if you intend to write to it), then see -C>. - -=for apidoc Am|char*|SvPVx|SV* sv|STRLEN len -A version of C which guarantees to evaluate C only once. -Only use this if C is an expression with side effects, otherwise use the -more efficient C. - -=for apidoc Am|char*|SvPV_nomg|SV* sv|STRLEN len -Like C but doesn't process magic. - -=for apidoc Am|char*|SvPV_nolen|SV* sv -Like C but doesn't set a length variable. - -=for apidoc Am|char*|SvPV_nomg_nolen|SV* sv -Like C but doesn't process magic. +=for apidoc_item |char*|SvPVx|SV* sv|STRLEN len +=for apidoc_item |char*|SvPV_nomg|SV* sv|STRLEN len +=for apidoc_item |char*|SvPV_nolen|SV* sv +=for apidoc_item |char*|SvPVx_nolen|SV* sv +=for apidoc_item |char*|SvPV_nomg_nolen|SV* sv +=for apidoc_item |char*|SvPV_mutable|SV* sv|STRLEN len +=for apidoc_item |const char*|SvPV_const|SV* sv|STRLEN len +=for apidoc_item |const char*|SvPVx_const|SV* sv|STRLEN len +=for apidoc_item |const char*|SvPV_nolen_const|SV* sv +=for apidoc_item |const char*|SvPVx_nolen_const|SV* sv +=for apidoc_item |const char*|SvPV_nomg_const|SV* sv|STRLEN len +=for apidoc_item |const char*|SvPV_nomg_const_nolen|SV* sv +=for apidoc_item |char *|SvPV_flags|SV * sv|STRLEN len|U32 flags +=for apidoc_item |const char *|SvPV_flags_const|SV * sv|STRLEN len|U32 flags +=for apidoc_item |char *|SvPV_flags_mutable|SV * sv|STRLEN len|U32 flags +=for apidoc_item |char*|SvPVbyte|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVbyte_nomg|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVbyte_nolen|SV* sv +=for apidoc_item |char*|SvPVbytex_nolen|SV* sv +=for apidoc_item |char*|SvPVbytex|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVbyte_or_null|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVbyte_or_null_nomg|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVutf8|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVutf8x|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVutf8_nomg|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVutf8_nolen|SV* sv +=for apidoc_item |char*|SvPVutf8_or_null|SV* sv|STRLEN len +=for apidoc_item |char*|SvPVutf8_or_null_nomg|SV* sv|STRLEN len + +All these return a pointer to the string in C, or a stringified form of +C if it does not contain a string. The SV may cache the stringified +version becoming C. + +This is a very basic and common operation, so there are lots of slightly +different versions of it. + +Note that there is no guarantee that the return value of C, for +example, is equal to C, or that C contains valid data, or +that successive calls to C (or another of these forms) will return +the same pointer value each time. This is due to the way that things like +overloading and Copy-On-Write are handled. In these cases, the return value +may point to a temporary buffer or similar. If you absolutely need the +C field to be valid (for example, if you intend to write to it), then +see C>. + +The differences between the forms are: + +The forms with neither C nor C in their names (e.g., C or +C) can expose the SV's internal string buffer. If +that buffer consists entirely of bytes 0-255 and includes any bytes above +127, then you B consult C to determine the actual code points +the string is meant to contain. Generally speaking, it is probably safer to +prefer C, C, and the like. See +L for more details. + +The forms with C in their names allow you to use the C parameter +to specify to process 'get' magic (by setting the C flag) or to skip +'get' magic (by clearing it). The other forms process 'get' magic, except for +the ones with C in their names, which skip 'get' magic. + +The forms that take a C parameter will set that variable to the byte +length of the resultant string (these are macros, so don't use C<&len>). + +The forms with C in their names indicate they don't have a C +parameter. They should be used only when it is known that the PV is a C +string, terminated by a NUL byte, and without intermediate NUL characters; or +when you don't care about its length. + +The forms with C in their names return S> so that the +compiler will hopefully complain if you were to try to modify the contents of +the string (unless you cast away const yourself). + +The other forms return a mutable pointer so that the string is modifiable by +the caller; this is emphasized for the ones with C in their names. + +The forms whose name ends in C are the same as the corresponding form +without the C, but the C form is guaranteed to evaluate C exactly +once, with a slight loss of efficiency. Use this if C is an expression +with side effects. + +C is like C, but converts C to UTF-8 first if not already +UTF-8. Similiarly, the other forms with C in their names correspond to +their respective forms without. + +C and C don't have corresponding +non-C forms. Instead they are like C, but when C is +undef, they return C. + +C is like C, but converts C to byte representation first if +currently encoded as UTF-8. If C cannot be downgraded from UTF-8, it +croaks. Similiarly, the other forms with C in their names correspond to +their respective forms without. + +C doesn't have a corresponding non-C form. Instead it +is like C, but when C is undef, it returns C. =for apidoc Am|IV|SvIV|SV* sv =for apidoc_item SvIVx @@ -1632,24 +1753,30 @@ efficient C. C is the same as C, but does not perform 'get' magic. -=for apidoc Am|bool|SvTRUE|SV* sv -Returns a boolean indicating whether Perl would evaluate the SV as true or -false. See C> for a defined/undefined test. Handles 'get' magic -unless the scalar is already C, C or C (the public, not the -private flags). +=for apidoc SvTRUE +=for apidoc_item SvTRUEx +=for apidoc_item SvTRUE_nomg +=for apidoc_item SvTRUE_NN +=for apidoc_item SvTRUE_nomg_NN + +These return a boolean indicating whether Perl would evaluate the SV as true or +false. See C> for a defined/undefined test. -As of Perl 5.32, this is guaranteed to evaluate C only once. Prior to that -release, use C> for single evaluation. +As of Perl 5.32, all are guaranteed to evaluate C only once. Prior to that +release, only C guaranteed single evaluation; now C is +identical to C. -=for apidoc Am|bool|SvTRUE_nomg|SV* sv -Returns a boolean indicating whether Perl would evaluate the SV as true or -false. See C> for a defined/undefined test. Does not handle 'get' magic. +C and C do not perform 'get' magic; the others do +unless the scalar is already C, C, or C (the public, not +the private flags). -=for apidoc Am|bool|SvTRUEx|SV* sv -Identical to C>. Prior to 5.32, they differed in that only this one -was guaranteed to evaluate C only once; in 5.32 they both evaluated it -once, but C was slightly slower on some platforms; now they are -identical. +C is like C>, but C is assumed to be +non-null (NN). If there is a possibility that it is NULL, use plain +C. + +C is like C>, but C is assumed to be +non-null (NN). If there is a possibility that it is NULL, use plain +C. =for apidoc Am|char*|SvPVutf8_force|SV* sv|STRLEN len Like C, but converts C to UTF-8 first if necessary. @@ -1690,26 +1817,6 @@ Like C, but does not process get magic. Like C, but converts C to byte representation first if necessary. If the SV cannot be downgraded from UTF-8, this croaks. -=for apidoc Am|char*|SvPVutf8x_force|SV* sv|STRLEN len -Like C, but converts C to UTF-8 first if necessary. -Guarantees to evaluate C only once; use the more efficient C -otherwise. - -=for apidoc Am|char*|SvPVutf8x|SV* sv|STRLEN len -Like C, but converts C to UTF-8 first if necessary. -Guarantees to evaluate C only once; use the more efficient C -otherwise. - -=for apidoc Am|char*|SvPVbytex_force|SV* sv|STRLEN len -Like C, but converts C to byte representation first if necessary. -Guarantees to evaluate C only once; use the more efficient C -otherwise. If the SV cannot be downgraded from UTF-8, this croaks. - -=for apidoc Am|char*|SvPVbytex|SV* sv|STRLEN len -Like C, but converts C to byte representation first if necessary. -Guarantees to evaluate C only once; use the more efficient C -otherwise. If the SV cannot be downgraded from UTF-8, this croaks. - =for apidoc Am|U32|SvIsCOW|SV* sv Returns a U32 value indicating whether the SV is Copy-On-Write (either shared hash key scalars, or full Copy On Write scalars if 5.9.0 is configured for @@ -1856,25 +1963,9 @@ scalar. #define SvPVutf8x_force(sv, len) sv_pvutf8n_force(sv, &len) #define SvPVbytex_force(sv, len) sv_pvbyten_force(sv, &len) -#define SvTRUE(sv) Perl_SvTRUE(aTHX_ sv) #define SvTRUEx(sv) SvTRUE(sv) -#define SvTRUE_nomg(sv) (LIKELY(sv) && SvTRUE_nomg_NN(sv)) -#define SvTRUE_NN(sv) (SvGETMAGIC(sv), SvTRUE_nomg_NN(sv)) -#define SvTRUE_nomg_NN(sv) (SvTRUE_common(sv, sv_2bool_nomg(sv))) - -#define SvTRUE_common(sv,fallback) ( \ - SvIMMORTAL_INTERP(sv) \ - ? SvIMMORTAL_TRUE(sv) \ - : !SvOK(sv) \ - ? 0 \ - : SvPOK(sv) \ - ? SvPVXtrue(sv) \ - : SvIOK(sv) \ - ? (SvIVX(sv) != 0 /* cast to bool */) \ - : (SvROK(sv) && !( SvOBJECT(SvRV(sv)) \ - && HvAMAGIC(SvSTASH(SvRV(sv))))) \ - ? TRUE \ - : (fallback)) +#define SvTRUEx_nomg(sv) SvTRUE_nomg(sv) +#define SvTRUE_nomg_NN(sv) SvTRUE_common(sv, TRUE) #if defined(PERL_USE_GCC_BRACE_GROUPS) @@ -1888,7 +1979,6 @@ scalar. # define SvPVutf8x(sv, len) ({SV *_sv = (sv); SvPVutf8(_sv, len); }) # define SvPVbytex(sv, len) ({SV *_sv = (sv); SvPVbyte(_sv, len); }) # define SvPVbytex_nolen(sv) ({SV *_sv = (sv); SvPVbyte_nolen(_sv); }) -# define SvTRUEx_nomg(sv) ({SV *_sv = (sv); SvTRUE_nomg(_sv); }) #else /* __GNUC__ */ @@ -1905,7 +1995,6 @@ scalar. # define SvPVutf8x(sv, len) ((PL_Sv = (sv)), SvPVutf8(PL_Sv, len)) # define SvPVbytex(sv, len) ((PL_Sv = (sv)), SvPVbyte(PL_Sv, len)) # define SvPVbytex_nolen(sv) ((PL_Sv = (sv)), SvPVbyte_nolen(PL_Sv)) -# define SvTRUEx_nomg(sv) ((PL_Sv = (sv)), SvTRUE_nomg(PL_Sv)) #endif /* __GNU__ */ #define SvPVXtrue(sv) ( \ @@ -1926,6 +2015,12 @@ scalar. #define SvSHARED_HEK_FROM_PV(pvx) \ ((struct hek*)(pvx - STRUCT_OFFSET(struct hek, hek_key))) +/* +=for apidoc Am|struct hek*|SvSHARED_HASH|SV * sv +Returns the hash for C created by C>. + +=cut +*/ #define SvSHARED_HASH(sv) (0 + SvSHARED_HEK_FROM_PV(SvPVX_const(sv))->hek_hash) /* flag values for sv_*_flags functions */ @@ -2085,10 +2180,11 @@ scalar. #endif /* -=for apidoc Am|SV*|newRV_inc|SV* sv +=for apidoc newRV +=for apidoc_item ||newRV_inc| -Creates an RV wrapper for an SV. The reference count for the original SV is -incremented. +These are identical. They create an RV wrapper for an SV. The reference count +for the original SV is incremented. =cut */ @@ -2098,7 +2194,7 @@ incremented. /* the following macros update any magic values this C is associated with */ /* -=for apidoc_section $magic +=for apidoc_section $SV =for apidoc Am|void|SvGETMAGIC|SV* sv Invokes C> on an SV if it has 'get' magic. For example, this @@ -2112,18 +2208,21 @@ or a tied variable (it calls C). This macro evaluates its argument more than once. =for apidoc Am|void|SvSetSV|SV* dsv|SV* ssv -Calls C if C is not the same as C. May evaluate arguments -more than once. Does not handle 'set' magic on the destination SV. +=for apidoc_item SvSetMagicSV +=for apidoc_item SvSetSV_nosteal +=for apidoc_item SvSetMagicSV_nosteal + +if C is the same as C, these do nothing. Otherwise they all call +some form of C>. They may evaluate their arguments more than +once. -=for apidoc Am|void|SvSetSV_nosteal|SV* dsv|SV* ssv -Calls a non-destructive version of C if C is not the same as -C. May evaluate arguments more than once. +The only differences are: -=for apidoc Am|void|SvSetMagicSV|SV* dsv|SV* ssv -Like C, but does any set magic required afterwards. +C and C perform any required 'set' magic +afterwards on the destination SV; C and C do not. -=for apidoc Am|void|SvSetMagicSV_nosteal|SV* dsv|SV* ssv -Like C, but does any set magic required afterwards. +C C call a non-destructive version of +C. =for apidoc Am|void|SvSHARE|SV* sv Arranges for C to be shared between threads if a suitable module