This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Change 3 functions to be #defines of others
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 247#ifdef GCC_DIAG_PRAGMA
6ab56f1e 248GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 249#endif
d1decf2b
TC
250static void
251S_croak_memory_wrap(void)
252{
253 Perl_croak_nocontext("%s",PL_memory_wrap);
254}
c1d6452f 255#ifdef GCC_DIAG_PRAGMA
6ab56f1e 256GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 257#endif
d1decf2b 258
a8a2ceaa
KW
259/* ------------------------------- utf8.h ------------------------------- */
260
2fe720e2
KW
261/*
262=head1 Unicode Support
263*/
264
55d09dc8
KW
265PERL_STATIC_INLINE void
266S_append_utf8_from_native_byte(const U8 byte, U8** dest)
267{
268 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
269 * encoded string at '*dest', updating '*dest' to include it */
270
55d09dc8
KW
271 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
272
6f2d5cbc 273 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 274 *((*dest)++) = byte;
55d09dc8 275 else {
a09ec51a
KW
276 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
277 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
278 }
279}
280
e123187a 281/*
2fe720e2 282=for apidoc valid_utf8_to_uvchr
2717076a 283Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
284the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
285it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
286non-Unicode code points are allowed.
287
288=cut
289
290 */
291
292PERL_STATIC_INLINE UV
293Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
294{
c41b2540 295 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
296 const U8* send = s + expectlen;
297 UV uv = *s;
298
299 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
300
301 if (retlen) {
302 *retlen = expectlen;
303 }
304
305 /* An invariant is trivially returned */
306 if (expectlen == 1) {
307 return uv;
308 }
309
310 /* Remove the leading bits that indicate the number of bytes, leaving just
311 * the bits that are part of the value */
312 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
313
314 /* Now, loop through the remaining bytes, accumulating each into the
315 * working total as we go. (I khw tried unrolling the loop for up to 4
316 * bytes, but there was no performance improvement) */
317 for (++s; s < send; s++) {
318 uv = UTF8_ACCUMULATE(uv, *s);
319 }
320
321 return UNI_TO_NATIVE(uv);
322
323}
324
1e599354
KW
325/*
326=for apidoc is_utf8_invariant_string
327
82c5d941 328Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 329regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
330EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
331are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
332the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
333characters are invariant, but so also are the C1 controls.
1e599354
KW
334
335If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
336use this option, that C<s> can't have embedded C<NUL> characters and has to
337have a terminating C<NUL> byte).
338
9f2abfde
KW
339See also
340C<L</is_utf8_string>>,
341C<L</is_utf8_string_flags>>,
342C<L</is_utf8_string_loc>>,
343C<L</is_utf8_string_loc_flags>>,
344C<L</is_utf8_string_loclen>>,
345C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
346C<L</is_utf8_fixed_width_buf_flags>>,
347C<L</is_utf8_fixed_width_buf_loc_flags>>,
348C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
349C<L</is_strict_utf8_string>>,
350C<L</is_strict_utf8_string_loc>>,
351C<L</is_strict_utf8_string_loclen>>,
352C<L</is_c9strict_utf8_string>>,
353C<L</is_c9strict_utf8_string_loc>>,
354and
355C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
356
357=cut
0cbf5865
KW
358
359*/
360
361#define is_utf8_invariant_string(s, len) \
362 is_utf8_invariant_string_loc(s, len, NULL)
363
364/*
365=for apidoc is_utf8_invariant_string_loc
366
367Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
368the first UTF-8 variant character in the C<ep> pointer; if all characters are
369UTF-8 invariant, this function does not change the contents of C<*ep>.
370
371=cut
372
1e599354
KW
373*/
374
375PERL_STATIC_INLINE bool
e17544a6 376S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 377{
e17544a6 378 const U8* send;
1e599354
KW
379 const U8* x = s;
380
0cbf5865
KW
381 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
382
e17544a6
KW
383 if (len == 0) {
384 len = strlen((const char *)s);
385 }
386
387 send = s + len;
388
389#ifndef EBCDIC
390 /* Try to get the widest word on this platform */
391# ifdef HAS_LONG_LONG
392# define PERL_WORDCAST unsigned long long
393# define PERL_WORDSIZE LONGLONGSIZE
394# else
395# define PERL_WORDCAST UV
396# define PERL_WORDSIZE UVSIZE
397# endif
398
399# if PERL_WORDSIZE == 4
400# define PERL_VARIANTS_WORD_MASK 0x80808080
401# define PERL_WORD_BOUNDARY_MASK 0x3
402# elif PERL_WORDSIZE == 8
403# define PERL_VARIANTS_WORD_MASK 0x8080808080808080
404# define PERL_WORD_BOUNDARY_MASK 0x7
405# else
406# error Unexpected word size
407# endif
408
409 /* Process per-byte until reach word boundary. XXX This loop could be
410 * eliminated if we knew that this platform had fast unaligned reads */
411 while (x < send && (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) {
412 if (! UTF8_IS_INVARIANT(*x)) {
413 if (ep) {
414 *ep = x;
415 }
416
417 return FALSE;
418 }
419 x++;
420 }
421
422 /* Process per-word as long as we have at least a full word left */
423 while (x + PERL_WORDSIZE <= send) {
424 if ((* (PERL_WORDCAST *) x) & PERL_VARIANTS_WORD_MASK) {
425
426 /* Found a variant. Just return if caller doesn't want its exact
427 * position */
428 if (! ep) {
429 return FALSE;
430 }
431
432 /* Otherwise fall into final loop to find which byte it is */
433 break;
0cbf5865 434 }
e17544a6
KW
435 x += PERL_WORDSIZE;
436 }
437
438# undef PERL_WORDCAST
439# undef PERL_WORDSIZE
440# undef PERL_WORD_BOUNDARY_MASK
441# undef PERL_VARIANTS_WORD_MASK
442#endif
443
444 /* Process per-byte */
445 while (x < send) {
446 if (! UTF8_IS_INVARIANT(*x)) {
447 if (ep) {
448 *ep = x;
449 }
0cbf5865 450
e17544a6 451 return FALSE;
0cbf5865 452 }
1e599354 453
e17544a6 454 x++;
1e599354
KW
455 }
456
457 return TRUE;
458}
459
7c93d8f0 460/*
5ff889fb
KW
461=for apidoc is_utf8_string
462
82c5d941
KW
463Returns TRUE if the first C<len> bytes of string C<s> form a valid
464Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
465be calculated using C<strlen(s)> (which means if you use this option, that C<s>
466can't have embedded C<NUL> characters and has to have a terminating C<NUL>
467byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
468
2717076a
KW
469This function considers Perl's extended UTF-8 to be valid. That means that
470code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
471considered valid by this function. Use C<L</is_strict_utf8_string>>,
472C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
473code points are considered valid.
5ff889fb 474
9f2abfde
KW
475See also
476C<L</is_utf8_invariant_string>>,
0cbf5865 477C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
478C<L</is_utf8_string_loc>>,
479C<L</is_utf8_string_loclen>>,
8bc127bf
KW
480C<L</is_utf8_fixed_width_buf_flags>>,
481C<L</is_utf8_fixed_width_buf_loc_flags>>,
482C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
483
484=cut
485*/
486
dd237e82 487#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb
KW
488
489/*
9f2abfde
KW
490=for apidoc is_strict_utf8_string
491
492Returns TRUE if the first C<len> bytes of string C<s> form a valid
493UTF-8-encoded string that is fully interchangeable by any application using
494Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
495calculated using C<strlen(s)> (which means if you use this option, that C<s>
496can't have embedded C<NUL> characters and has to have a terminating C<NUL>
497byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
498
499This function returns FALSE for strings containing any
500code points above the Unicode max of 0x10FFFF, surrogate code points, or
501non-character code points.
502
503See also
504C<L</is_utf8_invariant_string>>,
0cbf5865 505C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
506C<L</is_utf8_string>>,
507C<L</is_utf8_string_flags>>,
508C<L</is_utf8_string_loc>>,
509C<L</is_utf8_string_loc_flags>>,
510C<L</is_utf8_string_loclen>>,
511C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
512C<L</is_utf8_fixed_width_buf_flags>>,
513C<L</is_utf8_fixed_width_buf_loc_flags>>,
514C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
515C<L</is_strict_utf8_string_loc>>,
516C<L</is_strict_utf8_string_loclen>>,
517C<L</is_c9strict_utf8_string>>,
518C<L</is_c9strict_utf8_string_loc>>,
519and
520C<L</is_c9strict_utf8_string_loclen>>.
521
522=cut
523*/
524
dd237e82 525#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
526
527/*
528=for apidoc is_c9strict_utf8_string
529
530Returns TRUE if the first C<len> bytes of string C<s> form a valid
531UTF-8-encoded string that conforms to
532L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
533otherwise it returns FALSE. If C<len> is 0, it will be calculated using
534C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
535C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
536characters being ASCII constitute 'a valid UTF-8 string'.
537
538This function returns FALSE for strings containing any code points above the
539Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
540code points per
541L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
542
543See also
544C<L</is_utf8_invariant_string>>,
0cbf5865 545C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
546C<L</is_utf8_string>>,
547C<L</is_utf8_string_flags>>,
548C<L</is_utf8_string_loc>>,
549C<L</is_utf8_string_loc_flags>>,
550C<L</is_utf8_string_loclen>>,
551C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
552C<L</is_utf8_fixed_width_buf_flags>>,
553C<L</is_utf8_fixed_width_buf_loc_flags>>,
554C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
555C<L</is_strict_utf8_string>>,
556C<L</is_strict_utf8_string_loc>>,
557C<L</is_strict_utf8_string_loclen>>,
558C<L</is_c9strict_utf8_string_loc>>,
559and
560C<L</is_c9strict_utf8_string_loclen>>.
561
562=cut
563*/
564
dd237e82 565#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
566
567/*
568=for apidoc is_utf8_string_flags
569
570Returns TRUE if the first C<len> bytes of string C<s> form a valid
571UTF-8 string, subject to the restrictions imposed by C<flags>;
572returns FALSE otherwise. If C<len> is 0, it will be calculated
573using C<strlen(s)> (which means if you use this option, that C<s> can't have
574embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
575that all characters being ASCII constitute 'a valid UTF-8 string'.
576
577If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
578C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
579as C<L</is_strict_utf8_string>>; and if C<flags> is
580C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
581C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
582combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
583C<L</utf8n_to_uvchr>>, with the same meanings.
584
585See also
586C<L</is_utf8_invariant_string>>,
0cbf5865 587C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
588C<L</is_utf8_string>>,
589C<L</is_utf8_string_loc>>,
590C<L</is_utf8_string_loc_flags>>,
591C<L</is_utf8_string_loclen>>,
592C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
593C<L</is_utf8_fixed_width_buf_flags>>,
594C<L</is_utf8_fixed_width_buf_loc_flags>>,
595C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
596C<L</is_strict_utf8_string>>,
597C<L</is_strict_utf8_string_loc>>,
598C<L</is_strict_utf8_string_loclen>>,
599C<L</is_c9strict_utf8_string>>,
600C<L</is_c9strict_utf8_string_loc>>,
601and
602C<L</is_c9strict_utf8_string_loclen>>.
603
604=cut
605*/
606
607PERL_STATIC_INLINE bool
f60f61fd 608S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 609{
f60f61fd 610 const U8* send;
9f2abfde
KW
611 const U8* x = s;
612
613 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
614 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 615 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 616
f60f61fd
KW
617 if (len == 0) {
618 len = strlen((const char *)s);
619 }
620
9f2abfde
KW
621 if (flags == 0) {
622 return is_utf8_string(s, len);
623 }
624
d044b7a7 625 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
626 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
627 {
628 return is_strict_utf8_string(s, len);
629 }
630
d044b7a7 631 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
632 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
633 {
634 return is_c9strict_utf8_string(s, len);
635 }
636
f60f61fd 637 send = s + len;
9f2abfde
KW
638 while (x < send) {
639 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
640 if (UNLIKELY(! cur_len)) {
641 return FALSE;
642 }
643 x += cur_len;
644 }
645
646 return TRUE;
647}
648
649/*
5ff889fb
KW
650
651=for apidoc is_utf8_string_loc
652
2717076a 653Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 654case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 655"utf8ness success") in the C<ep> pointer.
5ff889fb 656
2717076a 657See also C<L</is_utf8_string_loclen>>.
5ff889fb 658
3964c812
KW
659=cut
660*/
661
662#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
663
664/*
665
5ff889fb
KW
666=for apidoc is_utf8_string_loclen
667
2717076a 668Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 669case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 670"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 671encoded characters in the C<el> pointer.
5ff889fb 672
2717076a 673See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
674
675=cut
676*/
677
56e4cf64 678PERL_STATIC_INLINE bool
c41b2540 679Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb
KW
680{
681 const U8* const send = s + (len ? len : strlen((const char *)s));
682 const U8* x = s;
683 STRLEN outlen = 0;
684
685 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
686
687 while (x < send) {
c41b2540
KW
688 const STRLEN cur_len = isUTF8_CHAR(x, send);
689 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
690 break;
691 }
c41b2540 692 x += cur_len;
5ff889fb
KW
693 outlen++;
694 }
695
696 if (el)
697 *el = outlen;
698
699 if (ep) {
700 *ep = x;
701 }
702
703 return (x == send);
704}
705
706/*
9f2abfde
KW
707
708=for apidoc is_strict_utf8_string_loc
709
710Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
711case of "utf8ness failure") or the location C<s>+C<len> (in the case of
712"utf8ness success") in the C<ep> pointer.
713
714See also C<L</is_strict_utf8_string_loclen>>.
715
716=cut
717*/
718
719#define is_strict_utf8_string_loc(s, len, ep) \
720 is_strict_utf8_string_loclen(s, len, ep, 0)
721
722/*
723
724=for apidoc is_strict_utf8_string_loclen
725
726Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
727case of "utf8ness failure") or the location C<s>+C<len> (in the case of
728"utf8ness success") in the C<ep> pointer, and the number of UTF-8
729encoded characters in the C<el> pointer.
730
731See also C<L</is_strict_utf8_string_loc>>.
732
733=cut
734*/
735
736PERL_STATIC_INLINE bool
737S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
738{
739 const U8* const send = s + (len ? len : strlen((const char *)s));
740 const U8* x = s;
741 STRLEN outlen = 0;
742
743 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
744
745 while (x < send) {
746 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
747 if (UNLIKELY(! cur_len)) {
748 break;
749 }
750 x += cur_len;
751 outlen++;
752 }
753
754 if (el)
755 *el = outlen;
756
757 if (ep) {
758 *ep = x;
759 }
760
761 return (x == send);
762}
763
764/*
765
766=for apidoc is_c9strict_utf8_string_loc
767
768Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
769the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
770"utf8ness success") in the C<ep> pointer.
771
772See also C<L</is_c9strict_utf8_string_loclen>>.
773
774=cut
775*/
776
777#define is_c9strict_utf8_string_loc(s, len, ep) \
778 is_c9strict_utf8_string_loclen(s, len, ep, 0)
779
780/*
781
782=for apidoc is_c9strict_utf8_string_loclen
783
784Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
785the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
786"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
787characters in the C<el> pointer.
788
789See also C<L</is_c9strict_utf8_string_loc>>.
790
791=cut
792*/
793
794PERL_STATIC_INLINE bool
795S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
796{
797 const U8* const send = s + (len ? len : strlen((const char *)s));
798 const U8* x = s;
799 STRLEN outlen = 0;
800
801 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
802
803 while (x < send) {
804 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
805 if (UNLIKELY(! cur_len)) {
806 break;
807 }
808 x += cur_len;
809 outlen++;
810 }
811
812 if (el)
813 *el = outlen;
814
815 if (ep) {
816 *ep = x;
817 }
818
819 return (x == send);
820}
821
822/*
823
824=for apidoc is_utf8_string_loc_flags
825
826Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
827case of "utf8ness failure") or the location C<s>+C<len> (in the case of
828"utf8ness success") in the C<ep> pointer.
829
830See also C<L</is_utf8_string_loclen_flags>>.
831
832=cut
833*/
834
835#define is_utf8_string_loc_flags(s, len, ep, flags) \
836 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
837
838
839/* The above 3 actual functions could have been moved into the more general one
840 * just below, and made #defines that call it with the right 'flags'. They are
841 * currently kept separate to increase their chances of getting inlined */
842
843/*
844
845=for apidoc is_utf8_string_loclen_flags
846
847Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
848case of "utf8ness failure") or the location C<s>+C<len> (in the case of
849"utf8ness success") in the C<ep> pointer, and the number of UTF-8
850encoded characters in the C<el> pointer.
851
852See also C<L</is_utf8_string_loc_flags>>.
853
854=cut
855*/
856
857PERL_STATIC_INLINE bool
f60f61fd 858S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 859{
f60f61fd 860 const U8* send;
9f2abfde
KW
861 const U8* x = s;
862 STRLEN outlen = 0;
863
864 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
865 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 866 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 867
f60f61fd
KW
868 if (len == 0) {
869 len = strlen((const char *)s);
870 }
871
9f2abfde
KW
872 if (flags == 0) {
873 return is_utf8_string_loclen(s, len, ep, el);
874 }
875
d044b7a7 876 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
877 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
878 {
879 return is_strict_utf8_string_loclen(s, len, ep, el);
880 }
881
d044b7a7 882 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
883 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
884 {
885 return is_c9strict_utf8_string_loclen(s, len, ep, el);
886 }
887
f60f61fd 888 send = s + len;
9f2abfde
KW
889 while (x < send) {
890 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
891 if (UNLIKELY(! cur_len)) {
892 break;
893 }
894 x += cur_len;
895 outlen++;
896 }
897
898 if (el)
899 *el = outlen;
900
901 if (ep) {
902 *ep = x;
903 }
904
905 return (x == send);
906}
907
908/*
7c93d8f0
KW
909=for apidoc utf8_distance
910
911Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
912and C<b>.
913
914WARNING: use only if you *know* that the pointers point inside the
915same UTF-8 buffer.
916
917=cut
918*/
919
920PERL_STATIC_INLINE IV
921Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
922{
923 PERL_ARGS_ASSERT_UTF8_DISTANCE;
924
925 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
926}
927
928/*
929=for apidoc utf8_hop
930
931Return the UTF-8 pointer C<s> displaced by C<off> characters, either
932forward or backward.
933
934WARNING: do not use the following unless you *know* C<off> is within
935the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
936on the first byte of character or just after the last byte of a character.
937
938=cut
939*/
940
941PERL_STATIC_INLINE U8 *
942Perl_utf8_hop(const U8 *s, SSize_t off)
943{
944 PERL_ARGS_ASSERT_UTF8_HOP;
945
946 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
947 * the bitops (especially ~) can create illegal UTF-8.
948 * In other words: in Perl UTF-8 is not just for Unicode. */
949
950 if (off >= 0) {
951 while (off--)
952 s += UTF8SKIP(s);
953 }
954 else {
955 while (off++) {
956 s--;
957 while (UTF8_IS_CONTINUATION(*s))
958 s--;
959 }
960 }
de979548 961 GCC_DIAG_IGNORE(-Wcast-qual);
7c93d8f0 962 return (U8 *)s;
de979548 963 GCC_DIAG_RESTORE;
7c93d8f0
KW
964}
965
4dab108f 966/*
65df57a8
TC
967=for apidoc utf8_hop_forward
968
969Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
970forward.
971
972C<off> must be non-negative.
973
974C<s> must be before or equal to C<end>.
975
976When moving forward it will not move beyond C<end>.
977
978Will not exceed this limit even if the string is not valid "UTF-8".
979
980=cut
981*/
982
983PERL_STATIC_INLINE U8 *
984Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
985{
986 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
987
988 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
989 * the bitops (especially ~) can create illegal UTF-8.
990 * In other words: in Perl UTF-8 is not just for Unicode. */
991
992 assert(s <= end);
993 assert(off >= 0);
994
995 while (off--) {
996 STRLEN skip = UTF8SKIP(s);
de979548
P
997 if ((STRLEN)(end - s) <= skip) {
998 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 999 return (U8 *)end;
de979548
P
1000 GCC_DIAG_RESTORE;
1001 }
65df57a8
TC
1002 s += skip;
1003 }
1004
de979548 1005 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1006 return (U8 *)s;
de979548 1007 GCC_DIAG_RESTORE;
65df57a8
TC
1008}
1009
1010/*
1011=for apidoc utf8_hop_back
1012
1013Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1014backward.
1015
1016C<off> must be non-positive.
1017
1018C<s> must be after or equal to C<start>.
1019
1020When moving backward it will not move before C<start>.
1021
1022Will not exceed this limit even if the string is not valid "UTF-8".
1023
1024=cut
1025*/
1026
1027PERL_STATIC_INLINE U8 *
1028Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1029{
1030 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1031
1032 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1033 * the bitops (especially ~) can create illegal UTF-8.
1034 * In other words: in Perl UTF-8 is not just for Unicode. */
1035
1036 assert(start <= s);
1037 assert(off <= 0);
1038
1039 while (off++ && s > start) {
1040 s--;
1041 while (UTF8_IS_CONTINUATION(*s) && s > start)
1042 s--;
1043 }
1044
de979548 1045 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1046 return (U8 *)s;
de979548 1047 GCC_DIAG_RESTORE;
65df57a8
TC
1048}
1049
1050/*
1051=for apidoc utf8_hop_safe
1052
1053Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1054either forward or backward.
1055
1056When moving backward it will not move before C<start>.
1057
1058When moving forward it will not move beyond C<end>.
1059
1060Will not exceed those limits even if the string is not valid "UTF-8".
1061
1062=cut
1063*/
1064
1065PERL_STATIC_INLINE U8 *
1066Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1067{
1068 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1069
1070 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1071 * the bitops (especially ~) can create illegal UTF-8.
1072 * In other words: in Perl UTF-8 is not just for Unicode. */
1073
1074 assert(start <= s && s <= end);
1075
1076 if (off >= 0) {
1077 return utf8_hop_forward(s, off, end);
1078 }
1079 else {
1080 return utf8_hop_back(s, off, start);
1081 }
1082}
1083
1084/*
4dab108f
KW
1085
1086=for apidoc is_utf8_valid_partial_char
1087
6cbb9248
KW
1088Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1089S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1090points. Otherwise, it returns 1 if there exists at least one non-empty
1091sequence of bytes that when appended to sequence C<s>, starting at position
1092C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1093otherwise returns 0.
1094
1095In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1096point.
1097
1098This is useful when a fixed-length buffer is being tested for being well-formed
1099UTF-8, but the final few bytes in it don't comprise a full character; that is,
1100it is split somewhere in the middle of the final code point's UTF-8
1101representation. (Presumably when the buffer is refreshed with the next chunk
1102of data, the new first bytes will complete the partial code point.) This
1103function is used to verify that the final bytes in the current buffer are in
1104fact the legal beginning of some code point, so that if they aren't, the
1105failure can be signalled without having to wait for the next read.
4dab108f
KW
1106
1107=cut
1108*/
2717076a
KW
1109#define is_utf8_valid_partial_char(s, e) \
1110 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1111
1112/*
1113
1114=for apidoc is_utf8_valid_partial_char_flags
1115
1116Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1117or not the input is a valid UTF-8 encoded partial character, but it takes an
1118extra parameter, C<flags>, which can further restrict which code points are
1119considered valid.
1120
1121If C<flags> is 0, this behaves identically to
1122C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1123of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1124there is any sequence of bytes that can complete the input partial character in
1125such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1126TRUE; otherwise FALSE. Non character code points cannot be determined based on
1127partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1128determined from just the first one or two bytes.
1129
1130=cut
1131 */
1132
56e4cf64 1133PERL_STATIC_INLINE bool
f1c999a7 1134S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1135{
f1c999a7 1136 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1137
f1c999a7 1138 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1139 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1140
8875bd48 1141 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1142 return FALSE;
1143 }
1144
f1c999a7 1145 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1146}
1147
8bc127bf
KW
1148/*
1149
1150=for apidoc is_utf8_fixed_width_buf_flags
1151
1152Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1153is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1154otherwise it returns FALSE.
1155
1156If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1157without restriction. If the final few bytes of the buffer do not form a
1158complete code point, this will return TRUE anyway, provided that
1159C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1160
1161If C<flags> in non-zero, it can be any combination of the
1162C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1163same meanings.
1164
1165This function differs from C<L</is_utf8_string_flags>> only in that the latter
1166returns FALSE if the final few bytes of the string don't form a complete code
1167point.
1168
1169=cut
1170 */
1171#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1172 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1173
1174/*
1175
1176=for apidoc is_utf8_fixed_width_buf_loc_flags
1177
1178Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1179failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1180to the beginning of any partial character at the end of the buffer; if there is
1181no partial character C<*ep> will contain C<s>+C<len>.
1182
1183See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1184
1185=cut
1186*/
1187
1188#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1189 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1190
1191/*
1192
1193=for apidoc is_utf8_fixed_width_buf_loclen_flags
1194
1195Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1196complete, valid characters found in the C<el> pointer.
1197
1198=cut
1199*/
1200
1201PERL_STATIC_INLINE bool
1202S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1203 const STRLEN len,
1204 const U8 **ep,
1205 STRLEN *el,
1206 const U32 flags)
1207{
1208 const U8 * maybe_partial;
1209
1210 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1211
1212 if (! ep) {
1213 ep = &maybe_partial;
1214 }
1215
1216 /* If it's entirely valid, return that; otherwise see if the only error is
1217 * that the final few bytes are for a partial character */
1218 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1219 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1220}
1221
c8028aa6
TC
1222/* ------------------------------- perl.h ----------------------------- */
1223
1224/*
dcccc8ff
KW
1225=head1 Miscellaneous Functions
1226
41188aa0 1227=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1228
6602b933 1229Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1230If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1231
1232Return TRUE if the name is safe.
1233
796b6530 1234Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1235
1236=cut
1237*/
1238
1239PERL_STATIC_INLINE bool
41188aa0 1240S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1241 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1242 * perl itself uses xce*() functions which accept 8-bit strings.
1243 */
1244
1245 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1246
6c4650b3 1247 if (len > 1) {
c8028aa6 1248 char *null_at;
41188aa0 1249 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1250 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1251 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1252 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1253 what, op_name, pv, null_at+1);
c8028aa6
TC
1254 return FALSE;
1255 }
1256 }
1257
1258 return TRUE;
1259}
1260
1261/*
7cb3f959
TC
1262
1263Return true if the supplied filename has a newline character
fa6c7d00 1264immediately before the first (hopefully only) NUL.
7cb3f959
TC
1265
1266My original look at this incorrectly used the len from SvPV(), but
1267that's incorrect, since we allow for a NUL in pv[len-1].
1268
1269So instead, strlen() and work from there.
1270
1271This allow for the user reading a filename, forgetting to chomp it,
1272then calling:
1273
1274 open my $foo, "$file\0";
1275
1276*/
1277
1278#ifdef PERL_CORE
1279
1280PERL_STATIC_INLINE bool
1281S_should_warn_nl(const char *pv) {
1282 STRLEN len;
1283
1284 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1285
1286 len = strlen(pv);
1287
1288 return len > 0 && pv[len-1] == '\n';
1289}
1290
1291#endif
1292
81d52ecd
JH
1293/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1294
1295#define MAX_CHARSET_NAME_LENGTH 2
1296
1297PERL_STATIC_INLINE const char *
1298get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1299{
1300 /* Returns a string that corresponds to the name of the regex character set
1301 * given by 'flags', and *lenp is set the length of that string, which
1302 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1303
1304 *lenp = 1;
1305 switch (get_regex_charset(flags)) {
1306 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1307 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1308 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1309 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1310 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1311 *lenp = 2;
1312 return ASCII_MORE_RESTRICT_PAT_MODS;
1313 }
1314 /* The NOT_REACHED; hides an assert() which has a rather complex
1315 * definition in perl.h. */
1316 NOT_REACHED; /* NOTREACHED */
1317 return "?"; /* Unknown */
1318}
1319
7cb3f959 1320/*
ed382232
TC
1321
1322Return false if any get magic is on the SV other than taint magic.
1323
1324*/
1325
1326PERL_STATIC_INLINE bool
1327S_sv_only_taint_gmagic(SV *sv) {
1328 MAGIC *mg = SvMAGIC(sv);
1329
1330 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1331
1332 while (mg) {
1333 if (mg->mg_type != PERL_MAGIC_taint
1334 && !(mg->mg_flags & MGf_GSKIP)
1335 && mg->mg_virtual->svt_get) {
1336 return FALSE;
1337 }
1338 mg = mg->mg_moremagic;
1339 }
1340
1341 return TRUE;
1342}
1343
ed8ff0f3
DM
1344/* ------------------ cop.h ------------------------------------------- */
1345
1346
1347/* Enter a block. Push a new base context and return its address. */
1348
1349PERL_STATIC_INLINE PERL_CONTEXT *
1350S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1351{
1352 PERL_CONTEXT * cx;
1353
1354 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1355
1356 CXINC;
1357 cx = CX_CUR();
1358 cx->cx_type = type;
1359 cx->blk_gimme = gimme;
1360 cx->blk_oldsaveix = saveix;
4caf7d8c 1361 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1362 cx->blk_oldcop = PL_curcop;
4caf7d8c 1363 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1364 cx->blk_oldscopesp = PL_scopestack_ix;
1365 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1366 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1367
1368 PL_tmps_floor = PL_tmps_ix;
1369 CX_DEBUG(cx, "PUSH");
1370 return cx;
1371}
1372
1373
1374/* Exit a block (RETURN and LAST). */
1375
1376PERL_STATIC_INLINE void
1377S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1378{
1379 PERL_ARGS_ASSERT_CX_POPBLOCK;
1380
1381 CX_DEBUG(cx, "POP");
1382 /* these 3 are common to cx_popblock and cx_topblock */
1383 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1384 PL_scopestack_ix = cx->blk_oldscopesp;
1385 PL_curpm = cx->blk_oldpm;
1386
1387 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1388 * and leaves a CX entry lying around for repeated use, so
1389 * skip for multicall */ \
1390 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1391 || PL_savestack_ix == cx->blk_oldsaveix);
1392 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1393 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1394}
1395
1396/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1397 * Whereas cx_popblock() restores the state to the point just before
1398 * cx_pushblock() was called, cx_topblock() restores it to the point just
1399 * *after* cx_pushblock() was called. */
1400
1401PERL_STATIC_INLINE void
1402S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1403{
1404 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1405
1406 CX_DEBUG(cx, "TOP");
1407 /* these 3 are common to cx_popblock and cx_topblock */
1408 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1409 PL_scopestack_ix = cx->blk_oldscopesp;
1410 PL_curpm = cx->blk_oldpm;
1411
1412 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1413}
1414
1415
a73d8813
DM
1416PERL_STATIC_INLINE void
1417S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1418{
1419 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1420
1421 PERL_ARGS_ASSERT_CX_PUSHSUB;
1422
3f6bd23a 1423 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1424 cx->blk_sub.cv = cv;
1425 cx->blk_sub.olddepth = CvDEPTH(cv);
1426 cx->blk_sub.prevcomppad = PL_comppad;
1427 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1428 cx->blk_sub.retop = retop;
1429 SvREFCNT_inc_simple_void_NN(cv);
1430 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1431}
1432
1433
1434/* subsets of cx_popsub() */
1435
1436PERL_STATIC_INLINE void
1437S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1438{
1439 CV *cv;
1440
1441 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1442 assert(CxTYPE(cx) == CXt_SUB);
1443
1444 PL_comppad = cx->blk_sub.prevcomppad;
1445 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1446 cv = cx->blk_sub.cv;
1447 CvDEPTH(cv) = cx->blk_sub.olddepth;
1448 cx->blk_sub.cv = NULL;
1449 SvREFCNT_dec(cv);
1450}
1451
1452
1453/* handle the @_ part of leaving a sub */
1454
1455PERL_STATIC_INLINE void
1456S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1457{
1458 AV *av;
1459
1460 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1461 assert(CxTYPE(cx) == CXt_SUB);
1462 assert(AvARRAY(MUTABLE_AV(
1463 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1464 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1465
1466 CX_POP_SAVEARRAY(cx);
1467 av = MUTABLE_AV(PAD_SVl(0));
1468 if (UNLIKELY(AvREAL(av)))
1469 /* abandon @_ if it got reified */
1470 clear_defarray(av, 0);
1471 else {
1472 CLEAR_ARGARRAY(av);
1473 }
1474}
1475
1476
1477PERL_STATIC_INLINE void
1478S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1479{
1480 PERL_ARGS_ASSERT_CX_POPSUB;
1481 assert(CxTYPE(cx) == CXt_SUB);
1482
3f6bd23a 1483 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1484
1485 if (CxHASARGS(cx))
1486 cx_popsub_args(cx);
1487 cx_popsub_common(cx);
1488}
1489
1490
6a7d52cc
DM
1491PERL_STATIC_INLINE void
1492S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1493{
1494 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1495
1496 cx->blk_format.cv = cv;
1497 cx->blk_format.retop = retop;
1498 cx->blk_format.gv = gv;
1499 cx->blk_format.dfoutgv = PL_defoutgv;
1500 cx->blk_format.prevcomppad = PL_comppad;
1501 cx->blk_u16 = 0;
1502
1503 SvREFCNT_inc_simple_void_NN(cv);
1504 CvDEPTH(cv)++;
1505 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1506}
1507
1508
1509PERL_STATIC_INLINE void
1510S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1511{
1512 CV *cv;
1513 GV *dfout;
1514
1515 PERL_ARGS_ASSERT_CX_POPFORMAT;
1516 assert(CxTYPE(cx) == CXt_FORMAT);
1517
1518 dfout = cx->blk_format.dfoutgv;
1519 setdefout(dfout);
1520 cx->blk_format.dfoutgv = NULL;
1521 SvREFCNT_dec_NN(dfout);
1522
1523 PL_comppad = cx->blk_format.prevcomppad;
1524 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1525 cv = cx->blk_format.cv;
1526 cx->blk_format.cv = NULL;
1527 --CvDEPTH(cv);
1528 SvREFCNT_dec_NN(cv);
1529}
1530
1531
13febba5
DM
1532PERL_STATIC_INLINE void
1533S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1534{
1535 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1536
1537 cx->blk_eval.retop = retop;
1538 cx->blk_eval.old_namesv = namesv;
1539 cx->blk_eval.old_eval_root = PL_eval_root;
1540 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1541 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1542 cx->blk_eval.cur_top_env = PL_top_env;
1543
4c57ced5 1544 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1545 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1546 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1547}
1548
1549
1550PERL_STATIC_INLINE void
1551S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1552{
1553 SV *sv;
1554
1555 PERL_ARGS_ASSERT_CX_POPEVAL;
1556 assert(CxTYPE(cx) == CXt_EVAL);
1557
1558 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1559 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1560 PL_eval_root = cx->blk_eval.old_eval_root;
1561 sv = cx->blk_eval.cur_text;
4c57ced5 1562 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1563 cx->blk_eval.cur_text = NULL;
1564 SvREFCNT_dec_NN(sv);
1565 }
1566
1567 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1568 if (sv) {
1569 cx->blk_eval.old_namesv = NULL;
1570 SvREFCNT_dec_NN(sv);
1571 }
13febba5 1572}
6a7d52cc 1573
a73d8813 1574
d1b6bf72
DM
1575/* push a plain loop, i.e.
1576 * { block }
1577 * while (cond) { block }
1578 * for (init;cond;continue) { block }
1579 * This loop can be last/redo'ed etc.
1580 */
1581
1582PERL_STATIC_INLINE void
1583S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1584{
1585 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1586 cx->blk_loop.my_op = cLOOP;
1587}
1588
1589
1590/* push a true for loop, i.e.
1591 * for var (list) { block }
1592 */
1593
1594PERL_STATIC_INLINE void
1595S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1596{
1597 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1598
1599 /* this one line is common with cx_pushloop_plain */
1600 cx->blk_loop.my_op = cLOOP;
1601
1602 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1603 cx->blk_loop.itersave = itersave;
1604#ifdef USE_ITHREADS
1605 cx->blk_loop.oldcomppad = PL_comppad;
1606#endif
1607}
1608
1609
1610/* pop all loop types, including plain */
1611
1612PERL_STATIC_INLINE void
1613S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1614{
1615 PERL_ARGS_ASSERT_CX_POPLOOP;
1616
1617 assert(CxTYPE_is_LOOP(cx));
1618 if ( CxTYPE(cx) == CXt_LOOP_ARY
1619 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1620 {
1621 /* Free ary or cur. This assumes that state_u.ary.ary
1622 * aligns with state_u.lazysv.cur. See cx_dup() */
1623 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1624 cx->blk_loop.state_u.lazysv.cur = NULL;
1625 SvREFCNT_dec_NN(sv);
1626 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1627 sv = cx->blk_loop.state_u.lazysv.end;
1628 cx->blk_loop.state_u.lazysv.end = NULL;
1629 SvREFCNT_dec_NN(sv);
1630 }
1631 }
1632 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1633 SV *cursv;
1634 SV **svp = (cx)->blk_loop.itervar_u.svp;
1635 if ((cx->cx_type & CXp_FOR_GV))
1636 svp = &GvSV((GV*)svp);
1637 cursv = *svp;
1638 *svp = cx->blk_loop.itersave;
1639 cx->blk_loop.itersave = NULL;
1640 SvREFCNT_dec(cursv);
1641 }
1642}
1643
2a7b7c61
DM
1644
1645PERL_STATIC_INLINE void
1646S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1647{
1648 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1649
1650 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1651}
1652
1653
1654PERL_STATIC_INLINE void
1655S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1656{
1657 PERL_ARGS_ASSERT_CX_POPWHEN;
1658 assert(CxTYPE(cx) == CXt_WHEN);
1659
1660 PERL_UNUSED_ARG(cx);
59a14f30 1661 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1662 /* currently NOOP */
1663}
1664
1665
1666PERL_STATIC_INLINE void
1667S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1668{
1669 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1670
1671 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1672 cx->blk_givwhen.defsv_save = orig_defsv;
1673}
1674
1675
1676PERL_STATIC_INLINE void
1677S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1678{
1679 SV *sv;
1680
1681 PERL_ARGS_ASSERT_CX_POPGIVEN;
1682 assert(CxTYPE(cx) == CXt_GIVEN);
1683
1684 sv = GvSV(PL_defgv);
1685 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1686 cx->blk_givwhen.defsv_save = NULL;
1687 SvREFCNT_dec(sv);
1688}
1689
ec2c235b
KW
1690/* ------------------ util.h ------------------------------------------- */
1691
1692/*
1693=head1 Miscellaneous Functions
1694
1695=for apidoc foldEQ
1696
1697Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1698same
1699case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1700match themselves and their opposite case counterparts. Non-cased and non-ASCII
1701range bytes match only themselves.
1702
1703=cut
1704*/
1705
1706PERL_STATIC_INLINE I32
1707Perl_foldEQ(const char *s1, const char *s2, I32 len)
1708{
1709 const U8 *a = (const U8 *)s1;
1710 const U8 *b = (const U8 *)s2;
1711
1712 PERL_ARGS_ASSERT_FOLDEQ;
1713
1714 assert(len >= 0);
1715
1716 while (len--) {
1717 if (*a != *b && *a != PL_fold[*b])
1718 return 0;
1719 a++,b++;
1720 }
1721 return 1;
1722}
1723
0f9cb40c 1724PERL_STATIC_INLINE I32
ec2c235b
KW
1725Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1726{
1727 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1728 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1729 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1730 * does it check that the strings each have at least 'len' characters */
1731
1732 const U8 *a = (const U8 *)s1;
1733 const U8 *b = (const U8 *)s2;
1734
1735 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1736
1737 assert(len >= 0);
1738
1739 while (len--) {
1740 if (*a != *b && *a != PL_fold_latin1[*b]) {
1741 return 0;
1742 }
1743 a++, b++;
1744 }
1745 return 1;
1746}
1747
1748/*
1749=for apidoc foldEQ_locale
1750
1751Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1752same case-insensitively in the current locale; false otherwise.
1753
1754=cut
1755*/
1756
0f9cb40c 1757PERL_STATIC_INLINE I32
ec2c235b
KW
1758Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1759{
1760 dVAR;
1761 const U8 *a = (const U8 *)s1;
1762 const U8 *b = (const U8 *)s2;
1763
1764 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1765
1766 assert(len >= 0);
1767
1768 while (len--) {
1769 if (*a != *b && *a != PL_fold_locale[*b])
1770 return 0;
1771 a++,b++;
1772 }
1773 return 1;
1774}
1775
6dba01e2
KW
1776#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1777
1778PERL_STATIC_INLINE void *
1779S_my_memrchr(const char * s, const char c, const STRLEN len)
1780{
1781 /* memrchr(), since many platforms lack it */
1782
1783 const char * t = s + len - 1;
1784
1785 PERL_ARGS_ASSERT_MY_MEMRCHR;
1786
1787 while (t >= s) {
1788 if (*t == c) {
1789 return (void *) t;
1790 }
1791 t--;
1792 }
1793
1794 return NULL;
1795}
1796
1797#endif
1798
ed382232 1799/*
c8028aa6
TC
1800 * ex: set ts=8 sts=4 sw=4 et:
1801 */