This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
give REGEXP SVs the POK flag again
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 247#ifdef GCC_DIAG_PRAGMA
6ab56f1e 248GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 249#endif
d1decf2b
TC
250static void
251S_croak_memory_wrap(void)
252{
253 Perl_croak_nocontext("%s",PL_memory_wrap);
254}
c1d6452f 255#ifdef GCC_DIAG_PRAGMA
6ab56f1e 256GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 257#endif
d1decf2b 258
a8a2ceaa
KW
259/* ------------------------------- utf8.h ------------------------------- */
260
2fe720e2
KW
261/*
262=head1 Unicode Support
263*/
264
55d09dc8
KW
265PERL_STATIC_INLINE void
266S_append_utf8_from_native_byte(const U8 byte, U8** dest)
267{
268 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
269 * encoded string at '*dest', updating '*dest' to include it */
270
55d09dc8
KW
271 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
272
6f2d5cbc 273 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 274 *((*dest)++) = byte;
55d09dc8 275 else {
a09ec51a
KW
276 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
277 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
278 }
279}
280
e123187a 281/*
2fe720e2 282=for apidoc valid_utf8_to_uvchr
2717076a 283Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
284the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
285it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
286non-Unicode code points are allowed.
287
288=cut
289
290 */
291
292PERL_STATIC_INLINE UV
293Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
294{
c41b2540 295 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
296 const U8* send = s + expectlen;
297 UV uv = *s;
298
299 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
300
301 if (retlen) {
302 *retlen = expectlen;
303 }
304
305 /* An invariant is trivially returned */
306 if (expectlen == 1) {
307 return uv;
308 }
309
310 /* Remove the leading bits that indicate the number of bytes, leaving just
311 * the bits that are part of the value */
312 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
313
314 /* Now, loop through the remaining bytes, accumulating each into the
315 * working total as we go. (I khw tried unrolling the loop for up to 4
316 * bytes, but there was no performance improvement) */
317 for (++s; s < send; s++) {
318 uv = UTF8_ACCUMULATE(uv, *s);
319 }
320
321 return UNI_TO_NATIVE(uv);
322
323}
324
1e599354
KW
325/*
326=for apidoc is_utf8_invariant_string
327
82c5d941 328Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 329regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
330EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
331are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
332the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
333characters are invariant, but so also are the C1 controls.
1e599354
KW
334
335If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
336use this option, that C<s> can't have embedded C<NUL> characters and has to
337have a terminating C<NUL> byte).
338
9f2abfde
KW
339See also
340C<L</is_utf8_string>>,
341C<L</is_utf8_string_flags>>,
342C<L</is_utf8_string_loc>>,
343C<L</is_utf8_string_loc_flags>>,
344C<L</is_utf8_string_loclen>>,
345C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
346C<L</is_utf8_fixed_width_buf_flags>>,
347C<L</is_utf8_fixed_width_buf_loc_flags>>,
348C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
349C<L</is_strict_utf8_string>>,
350C<L</is_strict_utf8_string_loc>>,
351C<L</is_strict_utf8_string_loclen>>,
352C<L</is_c9strict_utf8_string>>,
353C<L</is_c9strict_utf8_string_loc>>,
354and
355C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
356
357=cut
0cbf5865
KW
358
359*/
360
361#define is_utf8_invariant_string(s, len) \
362 is_utf8_invariant_string_loc(s, len, NULL)
363
364/*
365=for apidoc is_utf8_invariant_string_loc
366
367Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
368the first UTF-8 variant character in the C<ep> pointer; if all characters are
369UTF-8 invariant, this function does not change the contents of C<*ep>.
370
371=cut
372
373XXX On ASCII machines this could be sped up by doing word-at-a-time operations
374
1e599354
KW
375*/
376
377PERL_STATIC_INLINE bool
0cbf5865 378S_is_utf8_invariant_string_loc(const U8* const s, const STRLEN len, const U8 ** ep)
1e599354
KW
379{
380 const U8* const send = s + (len ? len : strlen((const char *)s));
381 const U8* x = s;
382
0cbf5865
KW
383 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
384
385 while (x < send) {
386 if (UTF8_IS_INVARIANT(*x)) {
387 x++;
388 continue;
389 }
390
391 if (ep) {
392 *ep = x;
393 }
1e599354 394
0cbf5865 395 return FALSE;
1e599354
KW
396 }
397
398 return TRUE;
399}
400
7c93d8f0 401/*
5ff889fb
KW
402=for apidoc is_utf8_string
403
82c5d941
KW
404Returns TRUE if the first C<len> bytes of string C<s> form a valid
405Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
406be calculated using C<strlen(s)> (which means if you use this option, that C<s>
407can't have embedded C<NUL> characters and has to have a terminating C<NUL>
408byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
409
2717076a
KW
410This function considers Perl's extended UTF-8 to be valid. That means that
411code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
412considered valid by this function. Use C<L</is_strict_utf8_string>>,
413C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
414code points are considered valid.
5ff889fb 415
9f2abfde
KW
416See also
417C<L</is_utf8_invariant_string>>,
0cbf5865 418C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
419C<L</is_utf8_string_loc>>,
420C<L</is_utf8_string_loclen>>,
8bc127bf
KW
421C<L</is_utf8_fixed_width_buf_flags>>,
422C<L</is_utf8_fixed_width_buf_loc_flags>>,
423C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
424
425=cut
426*/
427
56e4cf64 428PERL_STATIC_INLINE bool
c41b2540 429Perl_is_utf8_string(const U8 *s, const STRLEN len)
5ff889fb 430{
35936d22
KW
431 /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
432 * Be aware of possible changes to that */
433
5ff889fb
KW
434 const U8* const send = s + (len ? len : strlen((const char *)s));
435 const U8* x = s;
436
437 PERL_ARGS_ASSERT_IS_UTF8_STRING;
438
439 while (x < send) {
c41b2540
KW
440 const STRLEN cur_len = isUTF8_CHAR(x, send);
441 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
442 return FALSE;
443 }
c41b2540 444 x += cur_len;
5ff889fb
KW
445 }
446
447 return TRUE;
448}
449
450/*
9f2abfde
KW
451=for apidoc is_strict_utf8_string
452
453Returns TRUE if the first C<len> bytes of string C<s> form a valid
454UTF-8-encoded string that is fully interchangeable by any application using
455Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
456calculated using C<strlen(s)> (which means if you use this option, that C<s>
457can't have embedded C<NUL> characters and has to have a terminating C<NUL>
458byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
459
460This function returns FALSE for strings containing any
461code points above the Unicode max of 0x10FFFF, surrogate code points, or
462non-character code points.
463
464See also
465C<L</is_utf8_invariant_string>>,
0cbf5865 466C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
467C<L</is_utf8_string>>,
468C<L</is_utf8_string_flags>>,
469C<L</is_utf8_string_loc>>,
470C<L</is_utf8_string_loc_flags>>,
471C<L</is_utf8_string_loclen>>,
472C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
473C<L</is_utf8_fixed_width_buf_flags>>,
474C<L</is_utf8_fixed_width_buf_loc_flags>>,
475C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
476C<L</is_strict_utf8_string_loc>>,
477C<L</is_strict_utf8_string_loclen>>,
478C<L</is_c9strict_utf8_string>>,
479C<L</is_c9strict_utf8_string_loc>>,
480and
481C<L</is_c9strict_utf8_string_loclen>>.
482
483=cut
484*/
485
486PERL_STATIC_INLINE bool
487S_is_strict_utf8_string(const U8 *s, const STRLEN len)
488{
489 const U8* const send = s + (len ? len : strlen((const char *)s));
490 const U8* x = s;
491
492 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
493
494 while (x < send) {
495 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
496 if (UNLIKELY(! cur_len)) {
497 return FALSE;
498 }
499 x += cur_len;
500 }
501
502 return TRUE;
503}
504
505/*
506=for apidoc is_c9strict_utf8_string
507
508Returns TRUE if the first C<len> bytes of string C<s> form a valid
509UTF-8-encoded string that conforms to
510L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
511otherwise it returns FALSE. If C<len> is 0, it will be calculated using
512C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
513C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
514characters being ASCII constitute 'a valid UTF-8 string'.
515
516This function returns FALSE for strings containing any code points above the
517Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
518code points per
519L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
520
521See also
522C<L</is_utf8_invariant_string>>,
0cbf5865 523C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
524C<L</is_utf8_string>>,
525C<L</is_utf8_string_flags>>,
526C<L</is_utf8_string_loc>>,
527C<L</is_utf8_string_loc_flags>>,
528C<L</is_utf8_string_loclen>>,
529C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
530C<L</is_utf8_fixed_width_buf_flags>>,
531C<L</is_utf8_fixed_width_buf_loc_flags>>,
532C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
533C<L</is_strict_utf8_string>>,
534C<L</is_strict_utf8_string_loc>>,
535C<L</is_strict_utf8_string_loclen>>,
536C<L</is_c9strict_utf8_string_loc>>,
537and
538C<L</is_c9strict_utf8_string_loclen>>.
539
540=cut
541*/
542
543PERL_STATIC_INLINE bool
544S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
545{
546 const U8* const send = s + (len ? len : strlen((const char *)s));
547 const U8* x = s;
548
549 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
550
551 while (x < send) {
552 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
553 if (UNLIKELY(! cur_len)) {
554 return FALSE;
555 }
556 x += cur_len;
557 }
558
559 return TRUE;
560}
561
562/* The above 3 functions could have been moved into the more general one just
563 * below, and made #defines that call it with the right 'flags'. They are
564 * currently kept separate to increase their chances of getting inlined */
565
566/*
567=for apidoc is_utf8_string_flags
568
569Returns TRUE if the first C<len> bytes of string C<s> form a valid
570UTF-8 string, subject to the restrictions imposed by C<flags>;
571returns FALSE otherwise. If C<len> is 0, it will be calculated
572using C<strlen(s)> (which means if you use this option, that C<s> can't have
573embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
574that all characters being ASCII constitute 'a valid UTF-8 string'.
575
576If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
577C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
578as C<L</is_strict_utf8_string>>; and if C<flags> is
579C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
580C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
581combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
582C<L</utf8n_to_uvchr>>, with the same meanings.
583
584See also
585C<L</is_utf8_invariant_string>>,
0cbf5865 586C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
587C<L</is_utf8_string>>,
588C<L</is_utf8_string_loc>>,
589C<L</is_utf8_string_loc_flags>>,
590C<L</is_utf8_string_loclen>>,
591C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
592C<L</is_utf8_fixed_width_buf_flags>>,
593C<L</is_utf8_fixed_width_buf_loc_flags>>,
594C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
595C<L</is_strict_utf8_string>>,
596C<L</is_strict_utf8_string_loc>>,
597C<L</is_strict_utf8_string_loclen>>,
598C<L</is_c9strict_utf8_string>>,
599C<L</is_c9strict_utf8_string_loc>>,
600and
601C<L</is_c9strict_utf8_string_loclen>>.
602
603=cut
604*/
605
606PERL_STATIC_INLINE bool
607S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
608{
609 const U8* const send = s + (len ? len : strlen((const char *)s));
610 const U8* x = s;
611
612 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
613 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 614 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde
KW
615
616 if (flags == 0) {
617 return is_utf8_string(s, len);
618 }
619
d044b7a7 620 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
621 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
622 {
623 return is_strict_utf8_string(s, len);
624 }
625
d044b7a7 626 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
627 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
628 {
629 return is_c9strict_utf8_string(s, len);
630 }
631
632 while (x < send) {
633 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
634 if (UNLIKELY(! cur_len)) {
635 return FALSE;
636 }
637 x += cur_len;
638 }
639
640 return TRUE;
641}
642
643/*
5ff889fb
KW
644
645=for apidoc is_utf8_string_loc
646
2717076a 647Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 648case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 649"utf8ness success") in the C<ep> pointer.
5ff889fb 650
2717076a 651See also C<L</is_utf8_string_loclen>>.
5ff889fb 652
3964c812
KW
653=cut
654*/
655
656#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
657
658/*
659
5ff889fb
KW
660=for apidoc is_utf8_string_loclen
661
2717076a 662Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 663case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 664"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 665encoded characters in the C<el> pointer.
5ff889fb 666
2717076a 667See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
668
669=cut
670*/
671
56e4cf64 672PERL_STATIC_INLINE bool
c41b2540 673Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb
KW
674{
675 const U8* const send = s + (len ? len : strlen((const char *)s));
676 const U8* x = s;
677 STRLEN outlen = 0;
678
679 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
680
681 while (x < send) {
c41b2540
KW
682 const STRLEN cur_len = isUTF8_CHAR(x, send);
683 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
684 break;
685 }
c41b2540 686 x += cur_len;
5ff889fb
KW
687 outlen++;
688 }
689
690 if (el)
691 *el = outlen;
692
693 if (ep) {
694 *ep = x;
695 }
696
697 return (x == send);
698}
699
700/*
9f2abfde
KW
701
702=for apidoc is_strict_utf8_string_loc
703
704Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
705case of "utf8ness failure") or the location C<s>+C<len> (in the case of
706"utf8ness success") in the C<ep> pointer.
707
708See also C<L</is_strict_utf8_string_loclen>>.
709
710=cut
711*/
712
713#define is_strict_utf8_string_loc(s, len, ep) \
714 is_strict_utf8_string_loclen(s, len, ep, 0)
715
716/*
717
718=for apidoc is_strict_utf8_string_loclen
719
720Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
721case of "utf8ness failure") or the location C<s>+C<len> (in the case of
722"utf8ness success") in the C<ep> pointer, and the number of UTF-8
723encoded characters in the C<el> pointer.
724
725See also C<L</is_strict_utf8_string_loc>>.
726
727=cut
728*/
729
730PERL_STATIC_INLINE bool
731S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
732{
733 const U8* const send = s + (len ? len : strlen((const char *)s));
734 const U8* x = s;
735 STRLEN outlen = 0;
736
737 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
738
739 while (x < send) {
740 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
741 if (UNLIKELY(! cur_len)) {
742 break;
743 }
744 x += cur_len;
745 outlen++;
746 }
747
748 if (el)
749 *el = outlen;
750
751 if (ep) {
752 *ep = x;
753 }
754
755 return (x == send);
756}
757
758/*
759
760=for apidoc is_c9strict_utf8_string_loc
761
762Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
763the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
764"utf8ness success") in the C<ep> pointer.
765
766See also C<L</is_c9strict_utf8_string_loclen>>.
767
768=cut
769*/
770
771#define is_c9strict_utf8_string_loc(s, len, ep) \
772 is_c9strict_utf8_string_loclen(s, len, ep, 0)
773
774/*
775
776=for apidoc is_c9strict_utf8_string_loclen
777
778Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
779the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
780"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
781characters in the C<el> pointer.
782
783See also C<L</is_c9strict_utf8_string_loc>>.
784
785=cut
786*/
787
788PERL_STATIC_INLINE bool
789S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
790{
791 const U8* const send = s + (len ? len : strlen((const char *)s));
792 const U8* x = s;
793 STRLEN outlen = 0;
794
795 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
796
797 while (x < send) {
798 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
799 if (UNLIKELY(! cur_len)) {
800 break;
801 }
802 x += cur_len;
803 outlen++;
804 }
805
806 if (el)
807 *el = outlen;
808
809 if (ep) {
810 *ep = x;
811 }
812
813 return (x == send);
814}
815
816/*
817
818=for apidoc is_utf8_string_loc_flags
819
820Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
821case of "utf8ness failure") or the location C<s>+C<len> (in the case of
822"utf8ness success") in the C<ep> pointer.
823
824See also C<L</is_utf8_string_loclen_flags>>.
825
826=cut
827*/
828
829#define is_utf8_string_loc_flags(s, len, ep, flags) \
830 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
831
832
833/* The above 3 actual functions could have been moved into the more general one
834 * just below, and made #defines that call it with the right 'flags'. They are
835 * currently kept separate to increase their chances of getting inlined */
836
837/*
838
839=for apidoc is_utf8_string_loclen_flags
840
841Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
842case of "utf8ness failure") or the location C<s>+C<len> (in the case of
843"utf8ness success") in the C<ep> pointer, and the number of UTF-8
844encoded characters in the C<el> pointer.
845
846See also C<L</is_utf8_string_loc_flags>>.
847
848=cut
849*/
850
851PERL_STATIC_INLINE bool
852S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
853{
854 const U8* const send = s + (len ? len : strlen((const char *)s));
855 const U8* x = s;
856 STRLEN outlen = 0;
857
858 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
859 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 860 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde
KW
861
862 if (flags == 0) {
863 return is_utf8_string_loclen(s, len, ep, el);
864 }
865
d044b7a7 866 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
867 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
868 {
869 return is_strict_utf8_string_loclen(s, len, ep, el);
870 }
871
d044b7a7 872 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
873 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
874 {
875 return is_c9strict_utf8_string_loclen(s, len, ep, el);
876 }
877
878 while (x < send) {
879 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
880 if (UNLIKELY(! cur_len)) {
881 break;
882 }
883 x += cur_len;
884 outlen++;
885 }
886
887 if (el)
888 *el = outlen;
889
890 if (ep) {
891 *ep = x;
892 }
893
894 return (x == send);
895}
896
897/*
7c93d8f0
KW
898=for apidoc utf8_distance
899
900Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
901and C<b>.
902
903WARNING: use only if you *know* that the pointers point inside the
904same UTF-8 buffer.
905
906=cut
907*/
908
909PERL_STATIC_INLINE IV
910Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
911{
912 PERL_ARGS_ASSERT_UTF8_DISTANCE;
913
914 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
915}
916
917/*
918=for apidoc utf8_hop
919
920Return the UTF-8 pointer C<s> displaced by C<off> characters, either
921forward or backward.
922
923WARNING: do not use the following unless you *know* C<off> is within
924the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
925on the first byte of character or just after the last byte of a character.
926
927=cut
928*/
929
930PERL_STATIC_INLINE U8 *
931Perl_utf8_hop(const U8 *s, SSize_t off)
932{
933 PERL_ARGS_ASSERT_UTF8_HOP;
934
935 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
936 * the bitops (especially ~) can create illegal UTF-8.
937 * In other words: in Perl UTF-8 is not just for Unicode. */
938
939 if (off >= 0) {
940 while (off--)
941 s += UTF8SKIP(s);
942 }
943 else {
944 while (off++) {
945 s--;
946 while (UTF8_IS_CONTINUATION(*s))
947 s--;
948 }
949 }
de979548 950 GCC_DIAG_IGNORE(-Wcast-qual);
7c93d8f0 951 return (U8 *)s;
de979548 952 GCC_DIAG_RESTORE;
7c93d8f0
KW
953}
954
4dab108f 955/*
65df57a8
TC
956=for apidoc utf8_hop_forward
957
958Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
959forward.
960
961C<off> must be non-negative.
962
963C<s> must be before or equal to C<end>.
964
965When moving forward it will not move beyond C<end>.
966
967Will not exceed this limit even if the string is not valid "UTF-8".
968
969=cut
970*/
971
972PERL_STATIC_INLINE U8 *
973Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
974{
975 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
976
977 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
978 * the bitops (especially ~) can create illegal UTF-8.
979 * In other words: in Perl UTF-8 is not just for Unicode. */
980
981 assert(s <= end);
982 assert(off >= 0);
983
984 while (off--) {
985 STRLEN skip = UTF8SKIP(s);
de979548
P
986 if ((STRLEN)(end - s) <= skip) {
987 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 988 return (U8 *)end;
de979548
P
989 GCC_DIAG_RESTORE;
990 }
65df57a8
TC
991 s += skip;
992 }
993
de979548 994 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 995 return (U8 *)s;
de979548 996 GCC_DIAG_RESTORE;
65df57a8
TC
997}
998
999/*
1000=for apidoc utf8_hop_back
1001
1002Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1003backward.
1004
1005C<off> must be non-positive.
1006
1007C<s> must be after or equal to C<start>.
1008
1009When moving backward it will not move before C<start>.
1010
1011Will not exceed this limit even if the string is not valid "UTF-8".
1012
1013=cut
1014*/
1015
1016PERL_STATIC_INLINE U8 *
1017Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1018{
1019 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1020
1021 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1022 * the bitops (especially ~) can create illegal UTF-8.
1023 * In other words: in Perl UTF-8 is not just for Unicode. */
1024
1025 assert(start <= s);
1026 assert(off <= 0);
1027
1028 while (off++ && s > start) {
1029 s--;
1030 while (UTF8_IS_CONTINUATION(*s) && s > start)
1031 s--;
1032 }
1033
de979548 1034 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1035 return (U8 *)s;
de979548 1036 GCC_DIAG_RESTORE;
65df57a8
TC
1037}
1038
1039/*
1040=for apidoc utf8_hop_safe
1041
1042Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1043either forward or backward.
1044
1045When moving backward it will not move before C<start>.
1046
1047When moving forward it will not move beyond C<end>.
1048
1049Will not exceed those limits even if the string is not valid "UTF-8".
1050
1051=cut
1052*/
1053
1054PERL_STATIC_INLINE U8 *
1055Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1056{
1057 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1058
1059 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1060 * the bitops (especially ~) can create illegal UTF-8.
1061 * In other words: in Perl UTF-8 is not just for Unicode. */
1062
1063 assert(start <= s && s <= end);
1064
1065 if (off >= 0) {
1066 return utf8_hop_forward(s, off, end);
1067 }
1068 else {
1069 return utf8_hop_back(s, off, start);
1070 }
1071}
1072
1073/*
4dab108f
KW
1074
1075=for apidoc is_utf8_valid_partial_char
1076
6cbb9248
KW
1077Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1078S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1079points. Otherwise, it returns 1 if there exists at least one non-empty
1080sequence of bytes that when appended to sequence C<s>, starting at position
1081C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1082otherwise returns 0.
1083
1084In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1085point.
1086
1087This is useful when a fixed-length buffer is being tested for being well-formed
1088UTF-8, but the final few bytes in it don't comprise a full character; that is,
1089it is split somewhere in the middle of the final code point's UTF-8
1090representation. (Presumably when the buffer is refreshed with the next chunk
1091of data, the new first bytes will complete the partial code point.) This
1092function is used to verify that the final bytes in the current buffer are in
1093fact the legal beginning of some code point, so that if they aren't, the
1094failure can be signalled without having to wait for the next read.
4dab108f
KW
1095
1096=cut
1097*/
2717076a
KW
1098#define is_utf8_valid_partial_char(s, e) \
1099 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1100
1101/*
1102
1103=for apidoc is_utf8_valid_partial_char_flags
1104
1105Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1106or not the input is a valid UTF-8 encoded partial character, but it takes an
1107extra parameter, C<flags>, which can further restrict which code points are
1108considered valid.
1109
1110If C<flags> is 0, this behaves identically to
1111C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1112of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1113there is any sequence of bytes that can complete the input partial character in
1114such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1115TRUE; otherwise FALSE. Non character code points cannot be determined based on
1116partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1117determined from just the first one or two bytes.
1118
1119=cut
1120 */
1121
56e4cf64 1122PERL_STATIC_INLINE bool
f1c999a7 1123S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1124{
f1c999a7 1125 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1126
f1c999a7 1127 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1128 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1129
8875bd48 1130 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1131 return FALSE;
1132 }
1133
f1c999a7 1134 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1135}
1136
8bc127bf
KW
1137/*
1138
1139=for apidoc is_utf8_fixed_width_buf_flags
1140
1141Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1142is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1143otherwise it returns FALSE.
1144
1145If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1146without restriction. If the final few bytes of the buffer do not form a
1147complete code point, this will return TRUE anyway, provided that
1148C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1149
1150If C<flags> in non-zero, it can be any combination of the
1151C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1152same meanings.
1153
1154This function differs from C<L</is_utf8_string_flags>> only in that the latter
1155returns FALSE if the final few bytes of the string don't form a complete code
1156point.
1157
1158=cut
1159 */
1160#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1161 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1162
1163/*
1164
1165=for apidoc is_utf8_fixed_width_buf_loc_flags
1166
1167Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1168failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1169to the beginning of any partial character at the end of the buffer; if there is
1170no partial character C<*ep> will contain C<s>+C<len>.
1171
1172See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1173
1174=cut
1175*/
1176
1177#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1178 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1179
1180/*
1181
1182=for apidoc is_utf8_fixed_width_buf_loclen_flags
1183
1184Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1185complete, valid characters found in the C<el> pointer.
1186
1187=cut
1188*/
1189
1190PERL_STATIC_INLINE bool
1191S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1192 const STRLEN len,
1193 const U8 **ep,
1194 STRLEN *el,
1195 const U32 flags)
1196{
1197 const U8 * maybe_partial;
1198
1199 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1200
1201 if (! ep) {
1202 ep = &maybe_partial;
1203 }
1204
1205 /* If it's entirely valid, return that; otherwise see if the only error is
1206 * that the final few bytes are for a partial character */
1207 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1208 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1209}
1210
c8028aa6
TC
1211/* ------------------------------- perl.h ----------------------------- */
1212
1213/*
dcccc8ff
KW
1214=head1 Miscellaneous Functions
1215
41188aa0 1216=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1217
6602b933 1218Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1219If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1220
1221Return TRUE if the name is safe.
1222
796b6530 1223Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1224
1225=cut
1226*/
1227
1228PERL_STATIC_INLINE bool
41188aa0 1229S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1230 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1231 * perl itself uses xce*() functions which accept 8-bit strings.
1232 */
1233
1234 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1235
6c4650b3 1236 if (len > 1) {
c8028aa6 1237 char *null_at;
41188aa0 1238 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1239 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1240 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1241 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1242 what, op_name, pv, null_at+1);
c8028aa6
TC
1243 return FALSE;
1244 }
1245 }
1246
1247 return TRUE;
1248}
1249
1250/*
7cb3f959
TC
1251
1252Return true if the supplied filename has a newline character
fa6c7d00 1253immediately before the first (hopefully only) NUL.
7cb3f959
TC
1254
1255My original look at this incorrectly used the len from SvPV(), but
1256that's incorrect, since we allow for a NUL in pv[len-1].
1257
1258So instead, strlen() and work from there.
1259
1260This allow for the user reading a filename, forgetting to chomp it,
1261then calling:
1262
1263 open my $foo, "$file\0";
1264
1265*/
1266
1267#ifdef PERL_CORE
1268
1269PERL_STATIC_INLINE bool
1270S_should_warn_nl(const char *pv) {
1271 STRLEN len;
1272
1273 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1274
1275 len = strlen(pv);
1276
1277 return len > 0 && pv[len-1] == '\n';
1278}
1279
1280#endif
1281
81d52ecd
JH
1282/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1283
1284#define MAX_CHARSET_NAME_LENGTH 2
1285
1286PERL_STATIC_INLINE const char *
1287get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1288{
1289 /* Returns a string that corresponds to the name of the regex character set
1290 * given by 'flags', and *lenp is set the length of that string, which
1291 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1292
1293 *lenp = 1;
1294 switch (get_regex_charset(flags)) {
1295 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1296 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1297 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1298 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1299 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1300 *lenp = 2;
1301 return ASCII_MORE_RESTRICT_PAT_MODS;
1302 }
1303 /* The NOT_REACHED; hides an assert() which has a rather complex
1304 * definition in perl.h. */
1305 NOT_REACHED; /* NOTREACHED */
1306 return "?"; /* Unknown */
1307}
1308
7cb3f959 1309/*
ed382232
TC
1310
1311Return false if any get magic is on the SV other than taint magic.
1312
1313*/
1314
1315PERL_STATIC_INLINE bool
1316S_sv_only_taint_gmagic(SV *sv) {
1317 MAGIC *mg = SvMAGIC(sv);
1318
1319 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1320
1321 while (mg) {
1322 if (mg->mg_type != PERL_MAGIC_taint
1323 && !(mg->mg_flags & MGf_GSKIP)
1324 && mg->mg_virtual->svt_get) {
1325 return FALSE;
1326 }
1327 mg = mg->mg_moremagic;
1328 }
1329
1330 return TRUE;
1331}
1332
ed8ff0f3
DM
1333/* ------------------ cop.h ------------------------------------------- */
1334
1335
1336/* Enter a block. Push a new base context and return its address. */
1337
1338PERL_STATIC_INLINE PERL_CONTEXT *
1339S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1340{
1341 PERL_CONTEXT * cx;
1342
1343 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1344
1345 CXINC;
1346 cx = CX_CUR();
1347 cx->cx_type = type;
1348 cx->blk_gimme = gimme;
1349 cx->blk_oldsaveix = saveix;
4caf7d8c 1350 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1351 cx->blk_oldcop = PL_curcop;
4caf7d8c 1352 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1353 cx->blk_oldscopesp = PL_scopestack_ix;
1354 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1355 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1356
1357 PL_tmps_floor = PL_tmps_ix;
1358 CX_DEBUG(cx, "PUSH");
1359 return cx;
1360}
1361
1362
1363/* Exit a block (RETURN and LAST). */
1364
1365PERL_STATIC_INLINE void
1366S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1367{
1368 PERL_ARGS_ASSERT_CX_POPBLOCK;
1369
1370 CX_DEBUG(cx, "POP");
1371 /* these 3 are common to cx_popblock and cx_topblock */
1372 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1373 PL_scopestack_ix = cx->blk_oldscopesp;
1374 PL_curpm = cx->blk_oldpm;
1375
1376 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1377 * and leaves a CX entry lying around for repeated use, so
1378 * skip for multicall */ \
1379 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1380 || PL_savestack_ix == cx->blk_oldsaveix);
1381 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1382 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1383}
1384
1385/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1386 * Whereas cx_popblock() restores the state to the point just before
1387 * cx_pushblock() was called, cx_topblock() restores it to the point just
1388 * *after* cx_pushblock() was called. */
1389
1390PERL_STATIC_INLINE void
1391S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1392{
1393 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1394
1395 CX_DEBUG(cx, "TOP");
1396 /* these 3 are common to cx_popblock and cx_topblock */
1397 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1398 PL_scopestack_ix = cx->blk_oldscopesp;
1399 PL_curpm = cx->blk_oldpm;
1400
1401 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1402}
1403
1404
a73d8813
DM
1405PERL_STATIC_INLINE void
1406S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1407{
1408 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1409
1410 PERL_ARGS_ASSERT_CX_PUSHSUB;
1411
3f6bd23a 1412 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1413 cx->blk_sub.cv = cv;
1414 cx->blk_sub.olddepth = CvDEPTH(cv);
1415 cx->blk_sub.prevcomppad = PL_comppad;
1416 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1417 cx->blk_sub.retop = retop;
1418 SvREFCNT_inc_simple_void_NN(cv);
1419 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1420}
1421
1422
1423/* subsets of cx_popsub() */
1424
1425PERL_STATIC_INLINE void
1426S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1427{
1428 CV *cv;
1429
1430 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1431 assert(CxTYPE(cx) == CXt_SUB);
1432
1433 PL_comppad = cx->blk_sub.prevcomppad;
1434 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1435 cv = cx->blk_sub.cv;
1436 CvDEPTH(cv) = cx->blk_sub.olddepth;
1437 cx->blk_sub.cv = NULL;
1438 SvREFCNT_dec(cv);
1439}
1440
1441
1442/* handle the @_ part of leaving a sub */
1443
1444PERL_STATIC_INLINE void
1445S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1446{
1447 AV *av;
1448
1449 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1450 assert(CxTYPE(cx) == CXt_SUB);
1451 assert(AvARRAY(MUTABLE_AV(
1452 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1453 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1454
1455 CX_POP_SAVEARRAY(cx);
1456 av = MUTABLE_AV(PAD_SVl(0));
1457 if (UNLIKELY(AvREAL(av)))
1458 /* abandon @_ if it got reified */
1459 clear_defarray(av, 0);
1460 else {
1461 CLEAR_ARGARRAY(av);
1462 }
1463}
1464
1465
1466PERL_STATIC_INLINE void
1467S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1468{
1469 PERL_ARGS_ASSERT_CX_POPSUB;
1470 assert(CxTYPE(cx) == CXt_SUB);
1471
3f6bd23a 1472 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1473
1474 if (CxHASARGS(cx))
1475 cx_popsub_args(cx);
1476 cx_popsub_common(cx);
1477}
1478
1479
6a7d52cc
DM
1480PERL_STATIC_INLINE void
1481S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1482{
1483 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1484
1485 cx->blk_format.cv = cv;
1486 cx->blk_format.retop = retop;
1487 cx->blk_format.gv = gv;
1488 cx->blk_format.dfoutgv = PL_defoutgv;
1489 cx->blk_format.prevcomppad = PL_comppad;
1490 cx->blk_u16 = 0;
1491
1492 SvREFCNT_inc_simple_void_NN(cv);
1493 CvDEPTH(cv)++;
1494 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1495}
1496
1497
1498PERL_STATIC_INLINE void
1499S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1500{
1501 CV *cv;
1502 GV *dfout;
1503
1504 PERL_ARGS_ASSERT_CX_POPFORMAT;
1505 assert(CxTYPE(cx) == CXt_FORMAT);
1506
1507 dfout = cx->blk_format.dfoutgv;
1508 setdefout(dfout);
1509 cx->blk_format.dfoutgv = NULL;
1510 SvREFCNT_dec_NN(dfout);
1511
1512 PL_comppad = cx->blk_format.prevcomppad;
1513 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1514 cv = cx->blk_format.cv;
1515 cx->blk_format.cv = NULL;
1516 --CvDEPTH(cv);
1517 SvREFCNT_dec_NN(cv);
1518}
1519
1520
13febba5
DM
1521PERL_STATIC_INLINE void
1522S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1523{
1524 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1525
1526 cx->blk_eval.retop = retop;
1527 cx->blk_eval.old_namesv = namesv;
1528 cx->blk_eval.old_eval_root = PL_eval_root;
1529 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1530 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1531 cx->blk_eval.cur_top_env = PL_top_env;
1532
4c57ced5 1533 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1534 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1535 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1536}
1537
1538
1539PERL_STATIC_INLINE void
1540S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1541{
1542 SV *sv;
1543
1544 PERL_ARGS_ASSERT_CX_POPEVAL;
1545 assert(CxTYPE(cx) == CXt_EVAL);
1546
1547 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1548 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1549 PL_eval_root = cx->blk_eval.old_eval_root;
1550 sv = cx->blk_eval.cur_text;
4c57ced5 1551 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1552 cx->blk_eval.cur_text = NULL;
1553 SvREFCNT_dec_NN(sv);
1554 }
1555
1556 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1557 if (sv) {
1558 cx->blk_eval.old_namesv = NULL;
1559 SvREFCNT_dec_NN(sv);
1560 }
13febba5 1561}
6a7d52cc 1562
a73d8813 1563
d1b6bf72
DM
1564/* push a plain loop, i.e.
1565 * { block }
1566 * while (cond) { block }
1567 * for (init;cond;continue) { block }
1568 * This loop can be last/redo'ed etc.
1569 */
1570
1571PERL_STATIC_INLINE void
1572S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1573{
1574 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1575 cx->blk_loop.my_op = cLOOP;
1576}
1577
1578
1579/* push a true for loop, i.e.
1580 * for var (list) { block }
1581 */
1582
1583PERL_STATIC_INLINE void
1584S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1585{
1586 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1587
1588 /* this one line is common with cx_pushloop_plain */
1589 cx->blk_loop.my_op = cLOOP;
1590
1591 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1592 cx->blk_loop.itersave = itersave;
1593#ifdef USE_ITHREADS
1594 cx->blk_loop.oldcomppad = PL_comppad;
1595#endif
1596}
1597
1598
1599/* pop all loop types, including plain */
1600
1601PERL_STATIC_INLINE void
1602S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1603{
1604 PERL_ARGS_ASSERT_CX_POPLOOP;
1605
1606 assert(CxTYPE_is_LOOP(cx));
1607 if ( CxTYPE(cx) == CXt_LOOP_ARY
1608 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1609 {
1610 /* Free ary or cur. This assumes that state_u.ary.ary
1611 * aligns with state_u.lazysv.cur. See cx_dup() */
1612 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1613 cx->blk_loop.state_u.lazysv.cur = NULL;
1614 SvREFCNT_dec_NN(sv);
1615 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1616 sv = cx->blk_loop.state_u.lazysv.end;
1617 cx->blk_loop.state_u.lazysv.end = NULL;
1618 SvREFCNT_dec_NN(sv);
1619 }
1620 }
1621 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1622 SV *cursv;
1623 SV **svp = (cx)->blk_loop.itervar_u.svp;
1624 if ((cx->cx_type & CXp_FOR_GV))
1625 svp = &GvSV((GV*)svp);
1626 cursv = *svp;
1627 *svp = cx->blk_loop.itersave;
1628 cx->blk_loop.itersave = NULL;
1629 SvREFCNT_dec(cursv);
1630 }
1631}
1632
2a7b7c61
DM
1633
1634PERL_STATIC_INLINE void
1635S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1636{
1637 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1638
1639 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1640}
1641
1642
1643PERL_STATIC_INLINE void
1644S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1645{
1646 PERL_ARGS_ASSERT_CX_POPWHEN;
1647 assert(CxTYPE(cx) == CXt_WHEN);
1648
1649 PERL_UNUSED_ARG(cx);
59a14f30 1650 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1651 /* currently NOOP */
1652}
1653
1654
1655PERL_STATIC_INLINE void
1656S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1657{
1658 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1659
1660 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1661 cx->blk_givwhen.defsv_save = orig_defsv;
1662}
1663
1664
1665PERL_STATIC_INLINE void
1666S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1667{
1668 SV *sv;
1669
1670 PERL_ARGS_ASSERT_CX_POPGIVEN;
1671 assert(CxTYPE(cx) == CXt_GIVEN);
1672
1673 sv = GvSV(PL_defgv);
1674 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1675 cx->blk_givwhen.defsv_save = NULL;
1676 SvREFCNT_dec(sv);
1677}
1678
ec2c235b
KW
1679/* ------------------ util.h ------------------------------------------- */
1680
1681/*
1682=head1 Miscellaneous Functions
1683
1684=for apidoc foldEQ
1685
1686Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1687same
1688case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1689match themselves and their opposite case counterparts. Non-cased and non-ASCII
1690range bytes match only themselves.
1691
1692=cut
1693*/
1694
1695PERL_STATIC_INLINE I32
1696Perl_foldEQ(const char *s1, const char *s2, I32 len)
1697{
1698 const U8 *a = (const U8 *)s1;
1699 const U8 *b = (const U8 *)s2;
1700
1701 PERL_ARGS_ASSERT_FOLDEQ;
1702
1703 assert(len >= 0);
1704
1705 while (len--) {
1706 if (*a != *b && *a != PL_fold[*b])
1707 return 0;
1708 a++,b++;
1709 }
1710 return 1;
1711}
1712
0f9cb40c 1713PERL_STATIC_INLINE I32
ec2c235b
KW
1714Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1715{
1716 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1717 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1718 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1719 * does it check that the strings each have at least 'len' characters */
1720
1721 const U8 *a = (const U8 *)s1;
1722 const U8 *b = (const U8 *)s2;
1723
1724 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1725
1726 assert(len >= 0);
1727
1728 while (len--) {
1729 if (*a != *b && *a != PL_fold_latin1[*b]) {
1730 return 0;
1731 }
1732 a++, b++;
1733 }
1734 return 1;
1735}
1736
1737/*
1738=for apidoc foldEQ_locale
1739
1740Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1741same case-insensitively in the current locale; false otherwise.
1742
1743=cut
1744*/
1745
0f9cb40c 1746PERL_STATIC_INLINE I32
ec2c235b
KW
1747Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1748{
1749 dVAR;
1750 const U8 *a = (const U8 *)s1;
1751 const U8 *b = (const U8 *)s2;
1752
1753 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1754
1755 assert(len >= 0);
1756
1757 while (len--) {
1758 if (*a != *b && *a != PL_fold_locale[*b])
1759 return 0;
1760 a++,b++;
1761 }
1762 return 1;
1763}
1764
ed382232 1765/*
c8028aa6
TC
1766 * ex: set ts=8 sts=4 sw=4 et:
1767 */