=for hackers
Found in file av.c
+=item ax
+
+Variable which is setup by C<xsubpp> to indicate the stack base offset,
+used by the C<ST>, C<XSprePUSH> and C<XSRETURN> macros. The C<dMARK> macro
+must be called prior to setup the C<MARK> variable.
+
+ I32 ax
+
+=for hackers
+Found in file XSUB.h
+
+=item bytes_from_utf8
+
+Converts a string C<s> of length C<len> from UTF8 into byte encoding.
+Unlike <utf8_to_bytes> but like C<bytes_to_utf8>, returns a pointer to
+the newly-created string, and updates C<len> to contain the new
+length. Returns the original string if no conversion occurs, C<len>
+is unchanged. Do nothing if C<is_utf8> points to 0. Sets C<is_utf8> to
+0 if C<s> is converted or contains all 7bit characters.
+
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ U8* bytes_from_utf8(U8 *s, STRLEN *len, bool *is_utf8)
+
+=for hackers
+Found in file utf8.c
+
=item bytes_to_utf8
Converts a string C<s> of length C<len> from ASCII into UTF8 encoding.
=for hackers
Found in file op.c
+=item dAX
+
+Sets up the C<ax> variable.
+This is usually handled automatically by C<xsubpp> by calling C<dXSARGS>.
+
+ dAX;
+
+=for hackers
+Found in file XSUB.h
+
+=item dITEMS
+
+Sets up the C<items> variable.
+This is usually handled automatically by C<xsubpp> by calling C<dXSARGS>.
+
+ dITEMS;
+
+=for hackers
+Found in file XSUB.h
+
=item dMARK
Declare a stack marker variable, C<mark>, for the XSUB. See C<MARK> and
=item dXSARGS
-Sets up stack and mark pointers for an XSUB, calling dSP and dMARK. This
-is usually handled automatically by C<xsubpp>. Declares the C<items>
-variable to indicate the number of items on the stack.
+Sets up stack and mark pointers for an XSUB, calling dSP and dMARK.
+Sets up the C<ax> and C<items> variables by calling C<dAX> and C<dITEMS>.
+This is usually handled automatically by C<xsubpp>.
dXSARGS;
=for hackers
Found in file op.h
+=item grok_number
+
+Recognise (or not) a number. The type of the number is returned
+(0 if unrecognised), otherwise it is a bit-ORed combination of
+IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
+IS_NUMBER_NEG, IS_NUMBER_INFINITY (defined in perl.h). If the value
+of the number can fit an in UV, it is returned in the *valuep.
+
+ int grok_number(const char *pv, STRLEN len, UV *valuep)
+
+=for hackers
+Found in file util.c
+
+=item grok_numeric_radix
+
+Scan and skip for a numeric decimal separator (radix).
+
+ bool grok_numeric_radix(const char **sp, const char *send)
+
+=for hackers
+Found in file util.c
+
=item GvSV
Return the SV from the GV.
Returns the glob with the given C<name> and a defined subroutine or
C<NULL>. The glob lives in the given C<stash>, or in the stashes
-accessible via @ISA and @UNIVERSAL.
+accessible via @ISA and UNIVERSAL::.
The argument C<level> should be either 0 or -1. If C<level==0>, as a
side-effect creates a glob with the given C<name> in the given C<stash>
compute it. The return value is the new hash entry so created. It will be
NULL if the operation failed or if the value did not need to be actually
stored within the hash (as in the case of tied hashes). Otherwise the
-contents of the return value can be accessed using the C<He???> macros
+contents of the return value can be accessed using the C<He?> macros
described here. Note that the caller is responsible for suitably
incrementing the reference count of C<val> before the call, and
decrementing it if the function returned NULL.
=item is_utf8_char
-Tests if some arbitrary number of bytes begins in a valid UTF-8 character.
-The actual number of bytes in the UTF-8 character will be returned if it
-is valid, otherwise 0.
-
+Tests if some arbitrary number of bytes begins in a valid UTF-8
+character. Note that an INVARIANT (i.e. ASCII) character is a valid UTF-8 character.
+The actual number of bytes in the UTF-8 character will be returned if
+it is valid, otherwise 0.
+
STRLEN is_utf8_char(U8 *p)
=for hackers
=item is_utf8_string
-Returns true if first C<len> bytes of the given string form valid a UTF8
-string, false otherwise.
+Returns true if first C<len> bytes of the given string form a valid UTF8
+string, false otherwise. Note that 'a valid UTF8 string' does not mean
+'a string that contains UTF8' because a valid ASCII string is a valid
+UTF8 string.
bool is_utf8_string(U8 *s, STRLEN len)
=for hackers
Found in file scope.h
+=item load_module
+
+Loads the module whose name is pointed to by the string part of name.
+Note that the actual module name, not its filename, should be given.
+Eg, "Foo::Bar" instead of "Foo/Bar.pm". flags can be any of
+PERL_LOADMOD_DENY, PERL_LOADMOD_NOIMPORT, or PERL_LOADMOD_IMPORT_OPS
+(or 0 for no flags). ver, if specified, provides version semantics
+similar to C<use Foo::Bar VERSION>. The optional trailing SV*
+arguments can be used to specify arguments to the module's import()
+method, similar to C<use Foo::Bar VERSION LIST>.
+
+ void load_module(U32 flags, SV* name, SV* ver, ...)
+
+=for hackers
+Found in file op.c
+
=item looks_like_number
Test if an the content of an SV looks like a number (or is a
=for hackers
Found in file perl.c
-=item PL_DBsingle
-
-When Perl is run in debugging mode, with the B<-d> switch, this SV is a
-boolean which indicates whether subs are being single-stepped.
-Single-stepping is automatically turned on after every step. This is the C
-variable which corresponds to Perl's $DB::single variable. See
-C<PL_DBsub>.
-
- SV * PL_DBsingle
-
-=for hackers
-Found in file intrpvar.h
-
-=item PL_DBsub
-
-When Perl is run in debugging mode, with the B<-d> switch, this GV contains
-the SV which holds the name of the sub being debugged. This is the C
-variable which corresponds to Perl's $DB::sub variable. See
-C<PL_DBsingle>.
-
- GV * PL_DBsub
-
-=for hackers
-Found in file intrpvar.h
-
-=item PL_DBtrace
-
-Trace variable used when Perl is run in debugging mode, with the B<-d>
-switch. This is the C variable which corresponds to Perl's $DB::trace
-variable. See C<PL_DBsingle>.
-
- SV * PL_DBtrace
-
-=for hackers
-Found in file intrpvar.h
-
-=item PL_dowarn
-
-The C variable which corresponds to Perl's $^W warning variable.
-
- bool PL_dowarn
-
-=for hackers
-Found in file intrpvar.h
-
-=item PL_last_in_gv
-
-The GV which was last used for a filehandle input operation. (C<< <FH> >>)
-
- GV* PL_last_in_gv
-
-=for hackers
-Found in file thrdvar.h
-
=item PL_modglobal
C<PL_modglobal> is a general purpose, interpreter global HV for use by
=for hackers
Found in file thrdvar.h
-=item PL_ofs_sv
-
-The output field separator - C<$,> in Perl space.
-
- SV* PL_ofs_sv
-
-=for hackers
-Found in file thrdvar.h
-
-=item PL_rs
-
-The input record separator - C<$/> in Perl space.
-
- SV* PL_rs
-
-=for hackers
-Found in file thrdvar.h
-
=item PL_sv_no
This is the C<false> SV. See C<PL_sv_yes>. Always refer to this as
=item POPp
-Pops a string off the stack.
+Pops a string off the stack. Deprecated. New code should provide
+a STRLEN n_a and use POPpx.
char* POPp
=for hackers
Found in file pp.h
+=item POPpbytex
+
+Pops a string off the stack which must consist of bytes i.e. characters < 256.
+Requires a variable STRLEN n_a in scope.
+
+ char* POPpbytex
+
+=for hackers
+Found in file pp.h
+
+=item POPpx
+
+Pops a string off the stack.
+Requires a variable STRLEN n_a in scope.
+
+ char* POPpx
+
+=for hackers
+Found in file pp.h
+
=item POPs
Pops an SV off the stack.
=item require_pv
-Tells Perl to C<require> a module.
+Tells Perl to C<require> the file named by the string argument. It is
+analogous to the Perl code C<eval "require '$file'">. It's even
+implemented that way; consider using Perl_load_module instead.
NOTE: the perl_ form of this function is deprecated.
NUL character). Calls C<sv_grow> to perform the expansion if necessary.
Returns a pointer to the character buffer.
- void SvGROW(SV* sv, STRLEN len)
+ char * SvGROW(SV* sv, STRLEN len)
=for hackers
Found in file sv.h
=item SvPOK_only
Tells an SV that it is a string and disables all other OK bits.
+Will also turn off the UTF8 status.
void SvPOK_only(SV* sv)
=item SvPOK_only_UTF8
-Tells an SV that it is a UTF8 string (do not use frivolously)
-and disables all other OK bits.
-
+Tells an SV that it is a string and disables all other OK bits,
+and leaves the UTF8 status as it was.
+
void SvPOK_only_UTF8(SV* sv)
=for hackers
=item SvUTF8_on
-Tells an SV that it is a string and encoded in UTF8. Do not use frivolously.
+Turn on the UTF8 status of an SV (the data is not changed, just the flag).
+Do not use frivolously.
void SvUTF8_on(SV *sv)
=item sv_catpv
Concatenates the string onto the end of the string which is in the SV.
-Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
+If the SV has the UTF8 status set, then the bytes appended should be
+valid UTF8. Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
void sv_catpv(SV* sv, const char* ptr)
=item sv_catpvf
-Processes its arguments like C<sprintf> and appends the formatted output
-to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must
-typically be called after calling this function to handle 'set' magic.
+Processes its arguments like C<sprintf> and appends the formatted
+output to an SV. If the appended data contains "wide" characters
+(including, but not limited to, SVs with a UTF-8 PV formatted with %s,
+and characters >255 formatted with %c), the original SV might get
+upgraded to UTF-8. Handles 'get' magic, but not 'set' magic.
+C<SvSETMAGIC()> must typically be called after calling this function
+to handle 'set' magic.
void sv_catpvf(SV* sv, const char* pat, ...)
=item sv_catpvn
Concatenates the string onto the end of the string which is in the SV. The
-C<len> indicates number of bytes to copy. Handles 'get' magic, but not
-'set' magic. See C<sv_catpvn_mg>.
+C<len> indicates number of bytes to copy. If the SV has the UTF8
+status set, then the bytes appended should be valid UTF8.
+Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>.
void sv_catpvn(SV* sv, const char* ptr, STRLEN len)
=for hackers
Found in file sv.c
+=item sv_catpvn_flags
+
+Concatenates the string onto the end of the string which is in the SV. The
+C<len> indicates number of bytes to copy. If the SV has the UTF8
+status set, then the bytes appended should be valid UTF8.
+If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<dsv> if
+appropriate, else not. C<sv_catpvn> and C<sv_catpvn_nomg> are implemented
+in terms of this function.
+
+ void sv_catpvn_flags(SV* sv, const char* ptr, STRLEN len, I32 flags)
+
+=for hackers
+Found in file sv.c
+
=item sv_catpvn_mg
Like C<sv_catpvn>, but also handles 'set' magic.
=for hackers
Found in file sv.c
+=item sv_catsv_flags
+
+Concatenates the string from SV C<ssv> onto the end of the string in
+SV C<dsv>. Modifies C<dsv> but not C<ssv>. If C<flags> has C<SV_GMAGIC>
+bit set, will C<mg_get> on the SVs if appropriate, else not. C<sv_catsv>
+and C<sv_catsv_nomg> are implemented in terms of this function.
+
+ void sv_catsv_flags(SV* dsv, SV* ssv, I32 flags)
+
+=for hackers
+Found in file sv.c
+
=item sv_catsv_mg
Like C<sv_catsv>, but also handles 'set' magic.
=for hackers
Found in file sv.c
+=item sv_getcwd
+
+Fill the sv with current working directory
+
+ int sv_getcwd(SV* sv)
+
+=for hackers
+Found in file util.c
+
=item sv_gets
Get a line from the filehandle and store it into the SV, optionally
=for hackers
Found in file sv.c
+=item sv_pvn_force_flags
+
+Get a sensible string out of the SV somehow.
+If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<sv> if
+appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are
+implemented in terms of this function.
+
+ char* sv_pvn_force_flags(SV* sv, STRLEN* lp, I32 flags)
+
+=for hackers
+Found in file sv.c
+
=item sv_pvutf8n_force
Get a sensible UTF8-encoded string out of the SV somehow. See
=for hackers
Found in file sv.c
+=item sv_realpath
+
+Wrap or emulate realpath(3).
+
+ int sv_realpath(SV* sv, char *path, STRLEN len)
+
+=for hackers
+Found in file util.c
+
=item sv_reftype
Returns a string describing what the SV is a reference to.
=for hackers
Found in file sv.c
+=item sv_setref_uv
+
+Copies an unsigned integer into a new SV, optionally blessing the SV. The C<rv>
+argument will be upgraded to an RV. That RV will be modified to point to
+the new SV. The C<classname> argument indicates the package for the
+blessing. Set C<classname> to C<Nullch> to avoid the blessing. The new SV
+will be returned and will have a reference count of 1.
+
+ SV* sv_setref_uv(SV* rv, const char* classname, UV uv)
+
+=for hackers
+Found in file sv.c
+
=item sv_setsv
Copies the contents of the source SV C<ssv> into the destination SV C<dsv>.
=for hackers
Found in file sv.c
+=item sv_setsv_flags
+
+Copies the contents of the source SV C<ssv> into the destination SV C<dsv>.
+The source SV may be destroyed if it is mortal. Does not handle 'set'
+magic. If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<ssv> if
+appropriate, else not. C<sv_setsv> and C<sv_setsv_nomg> are implemented
+in terms of this function.
+
+ void sv_setsv_flags(SV* dsv, SV* ssv, I32 flags)
+
+=for hackers
+Found in file sv.c
+
=item sv_setsv_mg
Like C<sv_setsv>, but also handles 'set' magic.
=for hackers
Found in file sv.c
+=item sv_utf8_decode
+
+Convert the octets in the PV from UTF-8 to chars. Scan for validity and then
+turn of SvUTF8 if needed so that we see characters. Used as a building block
+for decode_utf8 in Encode.xs
+
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ bool sv_utf8_decode(SV *sv)
+
+=for hackers
+Found in file sv.c
+
=item sv_utf8_downgrade
Attempt to convert the PV of an SV from UTF8-encoded to byte encoding.
=item sv_utf8_encode
Convert the PV of an SV to UTF8-encoded, but then turn off the C<SvUTF8>
-flag so that it looks like bytes again. Nothing calls this.
-
-NOTE: this function is experimental and may change or be
-removed without notice.
+flag so that it looks like octets again. Used as a building block
+for encode_utf8 in Encode.xs
void sv_utf8_encode(SV *sv)
=item sv_utf8_upgrade
Convert the PV of an SV to its UTF8-encoded form.
+Forces the SV to string form it it is not already.
+Always sets the SvUTF8 flag to avoid future validity checks even
+if all the bytes have hibit clear.
+
+ STRLEN sv_utf8_upgrade(SV *sv)
+
+=for hackers
+Found in file sv.c
+
+=item sv_utf8_upgrade_flags
+
+Convert the PV of an SV to its UTF8-encoded form.
+Forces the SV to string form it it is not already.
+Always sets the SvUTF8 flag to avoid future validity checks even
+if all the bytes have hibit clear. If C<flags> has C<SV_GMAGIC> bit set,
+will C<mg_get> on C<sv> if appropriate, else not. C<sv_utf8_upgrade> and
+C<sv_utf8_upgrade_nomg> are implemented in terms of this function.
- void sv_utf8_upgrade(SV *sv)
+ STRLEN sv_utf8_upgrade_flags(SV *sv, I32 flags)
=for hackers
Found in file sv.c
=for hackers
Found in file handy.h
+=item utf8n_to_uvchr
+
+Returns the native character value of the first character in the string C<s>
+which is assumed to be in UTF8 encoding; C<retlen> will be set to the
+length, in bytes, of that character.
+
+Allows length and flags to be passed to low level routine.
+
+ UV utf8n_to_uvchr(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags)
+
+=for hackers
+Found in file utf8.c
+
+=item utf8n_to_uvuni
+
+Bottom level UTF-8 decode routine.
+Returns the unicode code point value of the first character in the string C<s>
+which is assumed to be in UTF8 encoding and no longer than C<curlen>;
+C<retlen> will be set to the length, in bytes, of that character.
+
+If C<s> does not point to a well-formed UTF8 character, the behaviour
+is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
+it is assumed that the caller will raise a warning, and this function
+will silently just set C<retlen> to C<-1> and return zero. If the
+C<flags> does not contain UTF8_CHECK_ONLY, warnings about
+malformations will be given, C<retlen> will be set to the expected
+length of the UTF-8 character in bytes, and zero will be returned.
+
+The C<flags> can also contain various flags to allow deviations from
+the strict UTF-8 encoding (see F<utf8.h>).
+
+Most code should use utf8_to_uvchr() rather than call this directly.
+
+ UV utf8n_to_uvuni(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags)
+
+=for hackers
+Found in file utf8.c
+
=item utf8_distance
Returns the number of UTF8 characters between the UTF-8 pointers C<a>
=for hackers
Found in file utf8.c
-=item utf8_to_uv
+=item utf8_to_uvchr
-Returns the character value of the first character in the string C<s>
-which is assumed to be in UTF8 encoding and no longer than C<curlen>;
-C<retlen> will be set to the length, in bytes, of that character.
-
-If C<s> does not point to a well-formed UTF8 character, the behaviour
-is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
-it is assumed that the caller will raise a warning, and this function
-will silently just set C<retlen> to C<-1> and return zero. If the
-C<flags> does not contain UTF8_CHECK_ONLY, warnings about
-malformations will be given, C<retlen> will be set to the expected
-length of the UTF-8 character in bytes, and zero will be returned.
+Returns the native character value of the first character in the string C<s>
+which is assumed to be in UTF8 encoding; C<retlen> will be set to the
+length, in bytes, of that character.
-The C<flags> can also contain various flags to allow deviations from
-the strict UTF-8 encoding (see F<utf8.h>).
+If C<s> does not point to a well-formed UTF8 character, zero is
+returned and retlen is set, if possible, to -1.
- UV utf8_to_uv(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags)
+ UV utf8_to_uvchr(U8 *s, STRLEN* retlen)
=for hackers
Found in file utf8.c
-=item utf8_to_uv_simple
+=item utf8_to_uvuni
-Returns the character value of the first character in the string C<s>
+Returns the Unicode code point of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
length, in bytes, of that character.
+This function should only be used when returned UV is considered
+an index into the Unicode semantic tables (e.g. swashes).
+
If C<s> does not point to a well-formed UTF8 character, zero is
returned and retlen is set, if possible, to -1.
- UV utf8_to_uv_simple(U8 *s, STRLEN* retlen)
+ UV utf8_to_uvuni(U8 *s, STRLEN* retlen)
+
+=for hackers
+Found in file utf8.c
+
+=item uvchr_to_utf8
+
+Adds the UTF8 representation of the Native codepoint C<uv> to the end
+of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
+bytes available. The return value is the pointer to the byte after the
+end of the new character. In other words,
+
+ d = uvchr_to_utf8(d, uv);
+
+is the recommended wide native character-aware way of saying
+
+ *(d++) = uv;
+
+ U8* uvchr_to_utf8(U8 *d, UV uv)
=for hackers
Found in file utf8.c
-=item uv_to_utf8
+=item uvuni_to_utf8
Adds the UTF8 representation of the Unicode codepoint C<uv> to the end
of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
bytes available. The return value is the pointer to the byte after the
-end of the new character. In other words,
+end of the new character. In other words,
- d = uv_to_utf8(d, uv);
+ d = uvuni_to_utf8(d, uv);
is the recommended Unicode-aware way of saying
*(d++) = uv;
- U8* uv_to_utf8(U8 *d, UV uv)
+ U8* uvuni_to_utf8(U8 *d, UV uv)
=for hackers
Found in file utf8.c