This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/t/utf8_warn_base.pl: White-space, comments only
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
156 assert(isREGEXP(re));
157 return re->sv_u.svu_rx;
158}
159
27669aa4
FC
160/* ------------------------------- sv.h ------------------------------- */
161
162PERL_STATIC_INLINE SV *
163S_SvREFCNT_inc(SV *sv)
164{
2439e033 165 if (LIKELY(sv != NULL))
27669aa4
FC
166 SvREFCNT(sv)++;
167 return sv;
168}
169PERL_STATIC_INLINE SV *
170S_SvREFCNT_inc_NN(SV *sv)
171{
172 SvREFCNT(sv)++;
173 return sv;
174}
175PERL_STATIC_INLINE void
176S_SvREFCNT_inc_void(SV *sv)
177{
2439e033 178 if (LIKELY(sv != NULL))
27669aa4
FC
179 SvREFCNT(sv)++;
180}
75e16a44
FC
181PERL_STATIC_INLINE void
182S_SvREFCNT_dec(pTHX_ SV *sv)
183{
2439e033 184 if (LIKELY(sv != NULL)) {
75a9bf96 185 U32 rc = SvREFCNT(sv);
79e2a32a 186 if (LIKELY(rc > 1))
75a9bf96
DM
187 SvREFCNT(sv) = rc - 1;
188 else
189 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
190 }
191}
541377b1
FC
192
193PERL_STATIC_INLINE void
4a9a56a7
DM
194S_SvREFCNT_dec_NN(pTHX_ SV *sv)
195{
196 U32 rc = SvREFCNT(sv);
79e2a32a 197 if (LIKELY(rc > 1))
4a9a56a7
DM
198 SvREFCNT(sv) = rc - 1;
199 else
200 Perl_sv_free2(aTHX_ sv, rc);
201}
202
203PERL_STATIC_INLINE void
541377b1
FC
204SvAMAGIC_on(SV *sv)
205{
206 assert(SvROK(sv));
207 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
208}
209PERL_STATIC_INLINE void
210SvAMAGIC_off(SV *sv)
211{
212 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
213 HvAMAGIC_off(SvSTASH(SvRV(sv)));
214}
215
216PERL_STATIC_INLINE U32
541377b1
FC
217S_SvPADSTALE_on(SV *sv)
218{
c0683843 219 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
220 return SvFLAGS(sv) |= SVs_PADSTALE;
221}
222PERL_STATIC_INLINE U32
223S_SvPADSTALE_off(SV *sv)
224{
c0683843 225 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
226 return SvFLAGS(sv) &= ~SVs_PADSTALE;
227}
25fdce4a 228#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 229PERL_STATIC_INLINE STRLEN
6964422a 230S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 231{
25fdce4a 232 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
233 if (SvGAMAGIC(sv)) {
234 U8 *hopped = utf8_hop((U8 *)pv, pos);
235 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
236 return (STRLEN)(hopped - (U8 *)pv);
237 }
238 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
239}
240#endif
f019c49e 241
d1decf2b
TC
242/* ------------------------------- handy.h ------------------------------- */
243
244/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 245#ifdef GCC_DIAG_PRAGMA
6ab56f1e 246GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 247#endif
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
c1d6452f 253#ifdef GCC_DIAG_PRAGMA
6ab56f1e 254GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 255#endif
d1decf2b 256
a8a2ceaa
KW
257/* ------------------------------- utf8.h ------------------------------- */
258
2fe720e2
KW
259/*
260=head1 Unicode Support
261*/
262
55d09dc8
KW
263PERL_STATIC_INLINE void
264S_append_utf8_from_native_byte(const U8 byte, U8** dest)
265{
266 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
267 * encoded string at '*dest', updating '*dest' to include it */
268
55d09dc8
KW
269 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
270
6f2d5cbc 271 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 272 *((*dest)++) = byte;
55d09dc8 273 else {
a09ec51a
KW
274 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
275 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
276 }
277}
278
e123187a 279/*
2fe720e2 280=for apidoc valid_utf8_to_uvchr
2717076a 281Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
282the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
283it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
284non-Unicode code points are allowed.
285
286=cut
287
288 */
289
290PERL_STATIC_INLINE UV
291Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
292{
c41b2540 293 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
294 const U8* send = s + expectlen;
295 UV uv = *s;
296
297 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
298
299 if (retlen) {
300 *retlen = expectlen;
301 }
302
303 /* An invariant is trivially returned */
304 if (expectlen == 1) {
305 return uv;
306 }
307
308 /* Remove the leading bits that indicate the number of bytes, leaving just
309 * the bits that are part of the value */
310 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
311
312 /* Now, loop through the remaining bytes, accumulating each into the
313 * working total as we go. (I khw tried unrolling the loop for up to 4
314 * bytes, but there was no performance improvement) */
315 for (++s; s < send; s++) {
316 uv = UTF8_ACCUMULATE(uv, *s);
317 }
318
319 return UNI_TO_NATIVE(uv);
320
321}
322
1e599354
KW
323/*
324=for apidoc is_utf8_invariant_string
325
82c5d941 326Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 327regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
328EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
329are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
330the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
331characters are invariant, but so also are the C1 controls.
1e599354
KW
332
333If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
334use this option, that C<s> can't have embedded C<NUL> characters and has to
335have a terminating C<NUL> byte).
336
9f2abfde
KW
337See also
338C<L</is_utf8_string>>,
339C<L</is_utf8_string_flags>>,
340C<L</is_utf8_string_loc>>,
341C<L</is_utf8_string_loc_flags>>,
342C<L</is_utf8_string_loclen>>,
343C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
344C<L</is_utf8_fixed_width_buf_flags>>,
345C<L</is_utf8_fixed_width_buf_loc_flags>>,
346C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
347C<L</is_strict_utf8_string>>,
348C<L</is_strict_utf8_string_loc>>,
349C<L</is_strict_utf8_string_loclen>>,
350C<L</is_c9strict_utf8_string>>,
351C<L</is_c9strict_utf8_string_loc>>,
352and
353C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
354
355=cut
0cbf5865
KW
356
357*/
358
359#define is_utf8_invariant_string(s, len) \
360 is_utf8_invariant_string_loc(s, len, NULL)
361
362/*
363=for apidoc is_utf8_invariant_string_loc
364
365Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
366the first UTF-8 variant character in the C<ep> pointer; if all characters are
367UTF-8 invariant, this function does not change the contents of C<*ep>.
368
369=cut
370
371XXX On ASCII machines this could be sped up by doing word-at-a-time operations
372
1e599354
KW
373*/
374
375PERL_STATIC_INLINE bool
0cbf5865 376S_is_utf8_invariant_string_loc(const U8* const s, const STRLEN len, const U8 ** ep)
1e599354
KW
377{
378 const U8* const send = s + (len ? len : strlen((const char *)s));
379 const U8* x = s;
380
0cbf5865
KW
381 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
382
383 while (x < send) {
384 if (UTF8_IS_INVARIANT(*x)) {
385 x++;
386 continue;
387 }
388
389 if (ep) {
390 *ep = x;
391 }
1e599354 392
0cbf5865 393 return FALSE;
1e599354
KW
394 }
395
396 return TRUE;
397}
398
7c93d8f0 399/*
5ff889fb
KW
400=for apidoc is_utf8_string
401
82c5d941
KW
402Returns TRUE if the first C<len> bytes of string C<s> form a valid
403Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
404be calculated using C<strlen(s)> (which means if you use this option, that C<s>
405can't have embedded C<NUL> characters and has to have a terminating C<NUL>
406byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
407
2717076a
KW
408This function considers Perl's extended UTF-8 to be valid. That means that
409code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
410considered valid by this function. Use C<L</is_strict_utf8_string>>,
411C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
412code points are considered valid.
5ff889fb 413
9f2abfde
KW
414See also
415C<L</is_utf8_invariant_string>>,
0cbf5865 416C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
417C<L</is_utf8_string_loc>>,
418C<L</is_utf8_string_loclen>>,
8bc127bf
KW
419C<L</is_utf8_fixed_width_buf_flags>>,
420C<L</is_utf8_fixed_width_buf_loc_flags>>,
421C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
422
423=cut
424*/
425
56e4cf64 426PERL_STATIC_INLINE bool
c41b2540 427Perl_is_utf8_string(const U8 *s, const STRLEN len)
5ff889fb 428{
35936d22
KW
429 /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
430 * Be aware of possible changes to that */
431
5ff889fb
KW
432 const U8* const send = s + (len ? len : strlen((const char *)s));
433 const U8* x = s;
434
435 PERL_ARGS_ASSERT_IS_UTF8_STRING;
436
437 while (x < send) {
c41b2540
KW
438 const STRLEN cur_len = isUTF8_CHAR(x, send);
439 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
440 return FALSE;
441 }
c41b2540 442 x += cur_len;
5ff889fb
KW
443 }
444
445 return TRUE;
446}
447
448/*
9f2abfde
KW
449=for apidoc is_strict_utf8_string
450
451Returns TRUE if the first C<len> bytes of string C<s> form a valid
452UTF-8-encoded string that is fully interchangeable by any application using
453Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
454calculated using C<strlen(s)> (which means if you use this option, that C<s>
455can't have embedded C<NUL> characters and has to have a terminating C<NUL>
456byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
457
458This function returns FALSE for strings containing any
459code points above the Unicode max of 0x10FFFF, surrogate code points, or
460non-character code points.
461
462See also
463C<L</is_utf8_invariant_string>>,
0cbf5865 464C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
465C<L</is_utf8_string>>,
466C<L</is_utf8_string_flags>>,
467C<L</is_utf8_string_loc>>,
468C<L</is_utf8_string_loc_flags>>,
469C<L</is_utf8_string_loclen>>,
470C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
471C<L</is_utf8_fixed_width_buf_flags>>,
472C<L</is_utf8_fixed_width_buf_loc_flags>>,
473C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
474C<L</is_strict_utf8_string_loc>>,
475C<L</is_strict_utf8_string_loclen>>,
476C<L</is_c9strict_utf8_string>>,
477C<L</is_c9strict_utf8_string_loc>>,
478and
479C<L</is_c9strict_utf8_string_loclen>>.
480
481=cut
482*/
483
484PERL_STATIC_INLINE bool
485S_is_strict_utf8_string(const U8 *s, const STRLEN len)
486{
487 const U8* const send = s + (len ? len : strlen((const char *)s));
488 const U8* x = s;
489
490 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
491
492 while (x < send) {
493 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
494 if (UNLIKELY(! cur_len)) {
495 return FALSE;
496 }
497 x += cur_len;
498 }
499
500 return TRUE;
501}
502
503/*
504=for apidoc is_c9strict_utf8_string
505
506Returns TRUE if the first C<len> bytes of string C<s> form a valid
507UTF-8-encoded string that conforms to
508L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
509otherwise it returns FALSE. If C<len> is 0, it will be calculated using
510C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
511C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
512characters being ASCII constitute 'a valid UTF-8 string'.
513
514This function returns FALSE for strings containing any code points above the
515Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
516code points per
517L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
518
519See also
520C<L</is_utf8_invariant_string>>,
0cbf5865 521C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
522C<L</is_utf8_string>>,
523C<L</is_utf8_string_flags>>,
524C<L</is_utf8_string_loc>>,
525C<L</is_utf8_string_loc_flags>>,
526C<L</is_utf8_string_loclen>>,
527C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
528C<L</is_utf8_fixed_width_buf_flags>>,
529C<L</is_utf8_fixed_width_buf_loc_flags>>,
530C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
531C<L</is_strict_utf8_string>>,
532C<L</is_strict_utf8_string_loc>>,
533C<L</is_strict_utf8_string_loclen>>,
534C<L</is_c9strict_utf8_string_loc>>,
535and
536C<L</is_c9strict_utf8_string_loclen>>.
537
538=cut
539*/
540
541PERL_STATIC_INLINE bool
542S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
543{
544 const U8* const send = s + (len ? len : strlen((const char *)s));
545 const U8* x = s;
546
547 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
548
549 while (x < send) {
550 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
551 if (UNLIKELY(! cur_len)) {
552 return FALSE;
553 }
554 x += cur_len;
555 }
556
557 return TRUE;
558}
559
560/* The above 3 functions could have been moved into the more general one just
561 * below, and made #defines that call it with the right 'flags'. They are
562 * currently kept separate to increase their chances of getting inlined */
563
564/*
565=for apidoc is_utf8_string_flags
566
567Returns TRUE if the first C<len> bytes of string C<s> form a valid
568UTF-8 string, subject to the restrictions imposed by C<flags>;
569returns FALSE otherwise. If C<len> is 0, it will be calculated
570using C<strlen(s)> (which means if you use this option, that C<s> can't have
571embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
572that all characters being ASCII constitute 'a valid UTF-8 string'.
573
574If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
575C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
576as C<L</is_strict_utf8_string>>; and if C<flags> is
577C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
578C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
579combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
580C<L</utf8n_to_uvchr>>, with the same meanings.
581
582See also
583C<L</is_utf8_invariant_string>>,
0cbf5865 584C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
585C<L</is_utf8_string>>,
586C<L</is_utf8_string_loc>>,
587C<L</is_utf8_string_loc_flags>>,
588C<L</is_utf8_string_loclen>>,
589C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
590C<L</is_utf8_fixed_width_buf_flags>>,
591C<L</is_utf8_fixed_width_buf_loc_flags>>,
592C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
593C<L</is_strict_utf8_string>>,
594C<L</is_strict_utf8_string_loc>>,
595C<L</is_strict_utf8_string_loclen>>,
596C<L</is_c9strict_utf8_string>>,
597C<L</is_c9strict_utf8_string_loc>>,
598and
599C<L</is_c9strict_utf8_string_loclen>>.
600
601=cut
602*/
603
604PERL_STATIC_INLINE bool
605S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
606{
607 const U8* const send = s + (len ? len : strlen((const char *)s));
608 const U8* x = s;
609
610 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
611 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
612 |UTF8_DISALLOW_ABOVE_31_BIT)));
613
614 if (flags == 0) {
615 return is_utf8_string(s, len);
616 }
617
618 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
619 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
620 {
621 return is_strict_utf8_string(s, len);
622 }
623
624 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
625 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
626 {
627 return is_c9strict_utf8_string(s, len);
628 }
629
630 while (x < send) {
631 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
632 if (UNLIKELY(! cur_len)) {
633 return FALSE;
634 }
635 x += cur_len;
636 }
637
638 return TRUE;
639}
640
641/*
5ff889fb
KW
642
643=for apidoc is_utf8_string_loc
644
2717076a 645Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 646case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 647"utf8ness success") in the C<ep> pointer.
5ff889fb 648
2717076a 649See also C<L</is_utf8_string_loclen>>.
5ff889fb 650
3964c812
KW
651=cut
652*/
653
654#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
655
656/*
657
5ff889fb
KW
658=for apidoc is_utf8_string_loclen
659
2717076a 660Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 661case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 662"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 663encoded characters in the C<el> pointer.
5ff889fb 664
2717076a 665See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
666
667=cut
668*/
669
56e4cf64 670PERL_STATIC_INLINE bool
c41b2540 671Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb
KW
672{
673 const U8* const send = s + (len ? len : strlen((const char *)s));
674 const U8* x = s;
675 STRLEN outlen = 0;
676
677 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
678
679 while (x < send) {
c41b2540
KW
680 const STRLEN cur_len = isUTF8_CHAR(x, send);
681 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
682 break;
683 }
c41b2540 684 x += cur_len;
5ff889fb
KW
685 outlen++;
686 }
687
688 if (el)
689 *el = outlen;
690
691 if (ep) {
692 *ep = x;
693 }
694
695 return (x == send);
696}
697
698/*
9f2abfde
KW
699
700=for apidoc is_strict_utf8_string_loc
701
702Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
703case of "utf8ness failure") or the location C<s>+C<len> (in the case of
704"utf8ness success") in the C<ep> pointer.
705
706See also C<L</is_strict_utf8_string_loclen>>.
707
708=cut
709*/
710
711#define is_strict_utf8_string_loc(s, len, ep) \
712 is_strict_utf8_string_loclen(s, len, ep, 0)
713
714/*
715
716=for apidoc is_strict_utf8_string_loclen
717
718Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
719case of "utf8ness failure") or the location C<s>+C<len> (in the case of
720"utf8ness success") in the C<ep> pointer, and the number of UTF-8
721encoded characters in the C<el> pointer.
722
723See also C<L</is_strict_utf8_string_loc>>.
724
725=cut
726*/
727
728PERL_STATIC_INLINE bool
729S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
730{
731 const U8* const send = s + (len ? len : strlen((const char *)s));
732 const U8* x = s;
733 STRLEN outlen = 0;
734
735 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
736
737 while (x < send) {
738 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
739 if (UNLIKELY(! cur_len)) {
740 break;
741 }
742 x += cur_len;
743 outlen++;
744 }
745
746 if (el)
747 *el = outlen;
748
749 if (ep) {
750 *ep = x;
751 }
752
753 return (x == send);
754}
755
756/*
757
758=for apidoc is_c9strict_utf8_string_loc
759
760Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
761the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
762"utf8ness success") in the C<ep> pointer.
763
764See also C<L</is_c9strict_utf8_string_loclen>>.
765
766=cut
767*/
768
769#define is_c9strict_utf8_string_loc(s, len, ep) \
770 is_c9strict_utf8_string_loclen(s, len, ep, 0)
771
772/*
773
774=for apidoc is_c9strict_utf8_string_loclen
775
776Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
777the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
778"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
779characters in the C<el> pointer.
780
781See also C<L</is_c9strict_utf8_string_loc>>.
782
783=cut
784*/
785
786PERL_STATIC_INLINE bool
787S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
788{
789 const U8* const send = s + (len ? len : strlen((const char *)s));
790 const U8* x = s;
791 STRLEN outlen = 0;
792
793 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
794
795 while (x < send) {
796 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
797 if (UNLIKELY(! cur_len)) {
798 break;
799 }
800 x += cur_len;
801 outlen++;
802 }
803
804 if (el)
805 *el = outlen;
806
807 if (ep) {
808 *ep = x;
809 }
810
811 return (x == send);
812}
813
814/*
815
816=for apidoc is_utf8_string_loc_flags
817
818Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
819case of "utf8ness failure") or the location C<s>+C<len> (in the case of
820"utf8ness success") in the C<ep> pointer.
821
822See also C<L</is_utf8_string_loclen_flags>>.
823
824=cut
825*/
826
827#define is_utf8_string_loc_flags(s, len, ep, flags) \
828 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
829
830
831/* The above 3 actual functions could have been moved into the more general one
832 * just below, and made #defines that call it with the right 'flags'. They are
833 * currently kept separate to increase their chances of getting inlined */
834
835/*
836
837=for apidoc is_utf8_string_loclen_flags
838
839Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
840case of "utf8ness failure") or the location C<s>+C<len> (in the case of
841"utf8ness success") in the C<ep> pointer, and the number of UTF-8
842encoded characters in the C<el> pointer.
843
844See also C<L</is_utf8_string_loc_flags>>.
845
846=cut
847*/
848
849PERL_STATIC_INLINE bool
850S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
851{
852 const U8* const send = s + (len ? len : strlen((const char *)s));
853 const U8* x = s;
854 STRLEN outlen = 0;
855
856 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
857 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
858 |UTF8_DISALLOW_ABOVE_31_BIT)));
859
860 if (flags == 0) {
861 return is_utf8_string_loclen(s, len, ep, el);
862 }
863
864 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
865 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
866 {
867 return is_strict_utf8_string_loclen(s, len, ep, el);
868 }
869
870 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
871 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
872 {
873 return is_c9strict_utf8_string_loclen(s, len, ep, el);
874 }
875
876 while (x < send) {
877 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
878 if (UNLIKELY(! cur_len)) {
879 break;
880 }
881 x += cur_len;
882 outlen++;
883 }
884
885 if (el)
886 *el = outlen;
887
888 if (ep) {
889 *ep = x;
890 }
891
892 return (x == send);
893}
894
895/*
7c93d8f0
KW
896=for apidoc utf8_distance
897
898Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
899and C<b>.
900
901WARNING: use only if you *know* that the pointers point inside the
902same UTF-8 buffer.
903
904=cut
905*/
906
907PERL_STATIC_INLINE IV
908Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
909{
910 PERL_ARGS_ASSERT_UTF8_DISTANCE;
911
912 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
913}
914
915/*
916=for apidoc utf8_hop
917
918Return the UTF-8 pointer C<s> displaced by C<off> characters, either
919forward or backward.
920
921WARNING: do not use the following unless you *know* C<off> is within
922the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
923on the first byte of character or just after the last byte of a character.
924
925=cut
926*/
927
928PERL_STATIC_INLINE U8 *
929Perl_utf8_hop(const U8 *s, SSize_t off)
930{
931 PERL_ARGS_ASSERT_UTF8_HOP;
932
933 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
934 * the bitops (especially ~) can create illegal UTF-8.
935 * In other words: in Perl UTF-8 is not just for Unicode. */
936
937 if (off >= 0) {
938 while (off--)
939 s += UTF8SKIP(s);
940 }
941 else {
942 while (off++) {
943 s--;
944 while (UTF8_IS_CONTINUATION(*s))
945 s--;
946 }
947 }
de979548 948 GCC_DIAG_IGNORE(-Wcast-qual);
7c93d8f0 949 return (U8 *)s;
de979548 950 GCC_DIAG_RESTORE;
7c93d8f0
KW
951}
952
4dab108f 953/*
65df57a8
TC
954=for apidoc utf8_hop_forward
955
956Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
957forward.
958
959C<off> must be non-negative.
960
961C<s> must be before or equal to C<end>.
962
963When moving forward it will not move beyond C<end>.
964
965Will not exceed this limit even if the string is not valid "UTF-8".
966
967=cut
968*/
969
970PERL_STATIC_INLINE U8 *
971Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
972{
973 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
974
975 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
976 * the bitops (especially ~) can create illegal UTF-8.
977 * In other words: in Perl UTF-8 is not just for Unicode. */
978
979 assert(s <= end);
980 assert(off >= 0);
981
982 while (off--) {
983 STRLEN skip = UTF8SKIP(s);
de979548
P
984 if ((STRLEN)(end - s) <= skip) {
985 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 986 return (U8 *)end;
de979548
P
987 GCC_DIAG_RESTORE;
988 }
65df57a8
TC
989 s += skip;
990 }
991
de979548 992 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 993 return (U8 *)s;
de979548 994 GCC_DIAG_RESTORE;
65df57a8
TC
995}
996
997/*
998=for apidoc utf8_hop_back
999
1000Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1001backward.
1002
1003C<off> must be non-positive.
1004
1005C<s> must be after or equal to C<start>.
1006
1007When moving backward it will not move before C<start>.
1008
1009Will not exceed this limit even if the string is not valid "UTF-8".
1010
1011=cut
1012*/
1013
1014PERL_STATIC_INLINE U8 *
1015Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1016{
1017 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1018
1019 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1020 * the bitops (especially ~) can create illegal UTF-8.
1021 * In other words: in Perl UTF-8 is not just for Unicode. */
1022
1023 assert(start <= s);
1024 assert(off <= 0);
1025
1026 while (off++ && s > start) {
1027 s--;
1028 while (UTF8_IS_CONTINUATION(*s) && s > start)
1029 s--;
1030 }
1031
de979548 1032 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1033 return (U8 *)s;
de979548 1034 GCC_DIAG_RESTORE;
65df57a8
TC
1035}
1036
1037/*
1038=for apidoc utf8_hop_safe
1039
1040Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1041either forward or backward.
1042
1043When moving backward it will not move before C<start>.
1044
1045When moving forward it will not move beyond C<end>.
1046
1047Will not exceed those limits even if the string is not valid "UTF-8".
1048
1049=cut
1050*/
1051
1052PERL_STATIC_INLINE U8 *
1053Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1054{
1055 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1056
1057 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1058 * the bitops (especially ~) can create illegal UTF-8.
1059 * In other words: in Perl UTF-8 is not just for Unicode. */
1060
1061 assert(start <= s && s <= end);
1062
1063 if (off >= 0) {
1064 return utf8_hop_forward(s, off, end);
1065 }
1066 else {
1067 return utf8_hop_back(s, off, start);
1068 }
1069}
1070
1071/*
4dab108f
KW
1072
1073=for apidoc is_utf8_valid_partial_char
1074
6cbb9248
KW
1075Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1076S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1077points. Otherwise, it returns 1 if there exists at least one non-empty
1078sequence of bytes that when appended to sequence C<s>, starting at position
1079C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1080otherwise returns 0.
1081
1082In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1083point.
1084
1085This is useful when a fixed-length buffer is being tested for being well-formed
1086UTF-8, but the final few bytes in it don't comprise a full character; that is,
1087it is split somewhere in the middle of the final code point's UTF-8
1088representation. (Presumably when the buffer is refreshed with the next chunk
1089of data, the new first bytes will complete the partial code point.) This
1090function is used to verify that the final bytes in the current buffer are in
1091fact the legal beginning of some code point, so that if they aren't, the
1092failure can be signalled without having to wait for the next read.
4dab108f
KW
1093
1094=cut
1095*/
2717076a
KW
1096#define is_utf8_valid_partial_char(s, e) \
1097 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1098
1099/*
1100
1101=for apidoc is_utf8_valid_partial_char_flags
1102
1103Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1104or not the input is a valid UTF-8 encoded partial character, but it takes an
1105extra parameter, C<flags>, which can further restrict which code points are
1106considered valid.
1107
1108If C<flags> is 0, this behaves identically to
1109C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1110of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1111there is any sequence of bytes that can complete the input partial character in
1112such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1113TRUE; otherwise FALSE. Non character code points cannot be determined based on
1114partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1115determined from just the first one or two bytes.
1116
1117=cut
1118 */
1119
56e4cf64 1120PERL_STATIC_INLINE bool
f1c999a7 1121S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1122{
f1c999a7 1123 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1124
f1c999a7
KW
1125 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1126 |UTF8_DISALLOW_ABOVE_31_BIT)));
4dab108f 1127
8875bd48 1128 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1129 return FALSE;
1130 }
1131
f1c999a7 1132 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1133}
1134
8bc127bf
KW
1135/*
1136
1137=for apidoc is_utf8_fixed_width_buf_flags
1138
1139Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1140is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1141otherwise it returns FALSE.
1142
1143If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1144without restriction. If the final few bytes of the buffer do not form a
1145complete code point, this will return TRUE anyway, provided that
1146C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1147
1148If C<flags> in non-zero, it can be any combination of the
1149C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1150same meanings.
1151
1152This function differs from C<L</is_utf8_string_flags>> only in that the latter
1153returns FALSE if the final few bytes of the string don't form a complete code
1154point.
1155
1156=cut
1157 */
1158#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1159 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1160
1161/*
1162
1163=for apidoc is_utf8_fixed_width_buf_loc_flags
1164
1165Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1166failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1167to the beginning of any partial character at the end of the buffer; if there is
1168no partial character C<*ep> will contain C<s>+C<len>.
1169
1170See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1171
1172=cut
1173*/
1174
1175#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1176 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1177
1178/*
1179
1180=for apidoc is_utf8_fixed_width_buf_loclen_flags
1181
1182Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1183complete, valid characters found in the C<el> pointer.
1184
1185=cut
1186*/
1187
1188PERL_STATIC_INLINE bool
1189S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1190 const STRLEN len,
1191 const U8 **ep,
1192 STRLEN *el,
1193 const U32 flags)
1194{
1195 const U8 * maybe_partial;
1196
1197 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1198
1199 if (! ep) {
1200 ep = &maybe_partial;
1201 }
1202
1203 /* If it's entirely valid, return that; otherwise see if the only error is
1204 * that the final few bytes are for a partial character */
1205 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1206 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1207}
1208
c8028aa6
TC
1209/* ------------------------------- perl.h ----------------------------- */
1210
1211/*
dcccc8ff
KW
1212=head1 Miscellaneous Functions
1213
41188aa0 1214=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1215
6602b933 1216Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1217If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1218
1219Return TRUE if the name is safe.
1220
796b6530 1221Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1222
1223=cut
1224*/
1225
1226PERL_STATIC_INLINE bool
41188aa0 1227S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1228 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1229 * perl itself uses xce*() functions which accept 8-bit strings.
1230 */
1231
1232 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1233
6c4650b3 1234 if (len > 1) {
c8028aa6 1235 char *null_at;
41188aa0 1236 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1237 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1238 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1239 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1240 what, op_name, pv, null_at+1);
c8028aa6
TC
1241 return FALSE;
1242 }
1243 }
1244
1245 return TRUE;
1246}
1247
1248/*
7cb3f959
TC
1249
1250Return true if the supplied filename has a newline character
fa6c7d00 1251immediately before the first (hopefully only) NUL.
7cb3f959
TC
1252
1253My original look at this incorrectly used the len from SvPV(), but
1254that's incorrect, since we allow for a NUL in pv[len-1].
1255
1256So instead, strlen() and work from there.
1257
1258This allow for the user reading a filename, forgetting to chomp it,
1259then calling:
1260
1261 open my $foo, "$file\0";
1262
1263*/
1264
1265#ifdef PERL_CORE
1266
1267PERL_STATIC_INLINE bool
1268S_should_warn_nl(const char *pv) {
1269 STRLEN len;
1270
1271 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1272
1273 len = strlen(pv);
1274
1275 return len > 0 && pv[len-1] == '\n';
1276}
1277
1278#endif
1279
81d52ecd
JH
1280/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1281
1282#define MAX_CHARSET_NAME_LENGTH 2
1283
1284PERL_STATIC_INLINE const char *
1285get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1286{
1287 /* Returns a string that corresponds to the name of the regex character set
1288 * given by 'flags', and *lenp is set the length of that string, which
1289 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1290
1291 *lenp = 1;
1292 switch (get_regex_charset(flags)) {
1293 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1294 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1295 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1296 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1297 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1298 *lenp = 2;
1299 return ASCII_MORE_RESTRICT_PAT_MODS;
1300 }
1301 /* The NOT_REACHED; hides an assert() which has a rather complex
1302 * definition in perl.h. */
1303 NOT_REACHED; /* NOTREACHED */
1304 return "?"; /* Unknown */
1305}
1306
7cb3f959 1307/*
ed382232
TC
1308
1309Return false if any get magic is on the SV other than taint magic.
1310
1311*/
1312
1313PERL_STATIC_INLINE bool
1314S_sv_only_taint_gmagic(SV *sv) {
1315 MAGIC *mg = SvMAGIC(sv);
1316
1317 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1318
1319 while (mg) {
1320 if (mg->mg_type != PERL_MAGIC_taint
1321 && !(mg->mg_flags & MGf_GSKIP)
1322 && mg->mg_virtual->svt_get) {
1323 return FALSE;
1324 }
1325 mg = mg->mg_moremagic;
1326 }
1327
1328 return TRUE;
1329}
1330
ed8ff0f3
DM
1331/* ------------------ cop.h ------------------------------------------- */
1332
1333
1334/* Enter a block. Push a new base context and return its address. */
1335
1336PERL_STATIC_INLINE PERL_CONTEXT *
1337S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1338{
1339 PERL_CONTEXT * cx;
1340
1341 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1342
1343 CXINC;
1344 cx = CX_CUR();
1345 cx->cx_type = type;
1346 cx->blk_gimme = gimme;
1347 cx->blk_oldsaveix = saveix;
4caf7d8c 1348 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1349 cx->blk_oldcop = PL_curcop;
4caf7d8c 1350 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1351 cx->blk_oldscopesp = PL_scopestack_ix;
1352 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1353 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1354
1355 PL_tmps_floor = PL_tmps_ix;
1356 CX_DEBUG(cx, "PUSH");
1357 return cx;
1358}
1359
1360
1361/* Exit a block (RETURN and LAST). */
1362
1363PERL_STATIC_INLINE void
1364S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1365{
1366 PERL_ARGS_ASSERT_CX_POPBLOCK;
1367
1368 CX_DEBUG(cx, "POP");
1369 /* these 3 are common to cx_popblock and cx_topblock */
1370 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1371 PL_scopestack_ix = cx->blk_oldscopesp;
1372 PL_curpm = cx->blk_oldpm;
1373
1374 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1375 * and leaves a CX entry lying around for repeated use, so
1376 * skip for multicall */ \
1377 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1378 || PL_savestack_ix == cx->blk_oldsaveix);
1379 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1380 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1381}
1382
1383/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1384 * Whereas cx_popblock() restores the state to the point just before
1385 * cx_pushblock() was called, cx_topblock() restores it to the point just
1386 * *after* cx_pushblock() was called. */
1387
1388PERL_STATIC_INLINE void
1389S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1390{
1391 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1392
1393 CX_DEBUG(cx, "TOP");
1394 /* these 3 are common to cx_popblock and cx_topblock */
1395 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1396 PL_scopestack_ix = cx->blk_oldscopesp;
1397 PL_curpm = cx->blk_oldpm;
1398
1399 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1400}
1401
1402
a73d8813
DM
1403PERL_STATIC_INLINE void
1404S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1405{
1406 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1407
1408 PERL_ARGS_ASSERT_CX_PUSHSUB;
1409
3f6bd23a 1410 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1411 cx->blk_sub.cv = cv;
1412 cx->blk_sub.olddepth = CvDEPTH(cv);
1413 cx->blk_sub.prevcomppad = PL_comppad;
1414 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1415 cx->blk_sub.retop = retop;
1416 SvREFCNT_inc_simple_void_NN(cv);
1417 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1418}
1419
1420
1421/* subsets of cx_popsub() */
1422
1423PERL_STATIC_INLINE void
1424S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1425{
1426 CV *cv;
1427
1428 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1429 assert(CxTYPE(cx) == CXt_SUB);
1430
1431 PL_comppad = cx->blk_sub.prevcomppad;
1432 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1433 cv = cx->blk_sub.cv;
1434 CvDEPTH(cv) = cx->blk_sub.olddepth;
1435 cx->blk_sub.cv = NULL;
1436 SvREFCNT_dec(cv);
1437}
1438
1439
1440/* handle the @_ part of leaving a sub */
1441
1442PERL_STATIC_INLINE void
1443S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1444{
1445 AV *av;
1446
1447 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1448 assert(CxTYPE(cx) == CXt_SUB);
1449 assert(AvARRAY(MUTABLE_AV(
1450 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1451 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1452
1453 CX_POP_SAVEARRAY(cx);
1454 av = MUTABLE_AV(PAD_SVl(0));
1455 if (UNLIKELY(AvREAL(av)))
1456 /* abandon @_ if it got reified */
1457 clear_defarray(av, 0);
1458 else {
1459 CLEAR_ARGARRAY(av);
1460 }
1461}
1462
1463
1464PERL_STATIC_INLINE void
1465S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1466{
1467 PERL_ARGS_ASSERT_CX_POPSUB;
1468 assert(CxTYPE(cx) == CXt_SUB);
1469
3f6bd23a 1470 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1471
1472 if (CxHASARGS(cx))
1473 cx_popsub_args(cx);
1474 cx_popsub_common(cx);
1475}
1476
1477
6a7d52cc
DM
1478PERL_STATIC_INLINE void
1479S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1480{
1481 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1482
1483 cx->blk_format.cv = cv;
1484 cx->blk_format.retop = retop;
1485 cx->blk_format.gv = gv;
1486 cx->blk_format.dfoutgv = PL_defoutgv;
1487 cx->blk_format.prevcomppad = PL_comppad;
1488 cx->blk_u16 = 0;
1489
1490 SvREFCNT_inc_simple_void_NN(cv);
1491 CvDEPTH(cv)++;
1492 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1493}
1494
1495
1496PERL_STATIC_INLINE void
1497S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1498{
1499 CV *cv;
1500 GV *dfout;
1501
1502 PERL_ARGS_ASSERT_CX_POPFORMAT;
1503 assert(CxTYPE(cx) == CXt_FORMAT);
1504
1505 dfout = cx->blk_format.dfoutgv;
1506 setdefout(dfout);
1507 cx->blk_format.dfoutgv = NULL;
1508 SvREFCNT_dec_NN(dfout);
1509
1510 PL_comppad = cx->blk_format.prevcomppad;
1511 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1512 cv = cx->blk_format.cv;
1513 cx->blk_format.cv = NULL;
1514 --CvDEPTH(cv);
1515 SvREFCNT_dec_NN(cv);
1516}
1517
1518
13febba5
DM
1519PERL_STATIC_INLINE void
1520S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1521{
1522 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1523
1524 cx->blk_eval.retop = retop;
1525 cx->blk_eval.old_namesv = namesv;
1526 cx->blk_eval.old_eval_root = PL_eval_root;
1527 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1528 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1529 cx->blk_eval.cur_top_env = PL_top_env;
1530
4c57ced5 1531 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1532 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1533 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1534}
1535
1536
1537PERL_STATIC_INLINE void
1538S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1539{
1540 SV *sv;
1541
1542 PERL_ARGS_ASSERT_CX_POPEVAL;
1543 assert(CxTYPE(cx) == CXt_EVAL);
1544
1545 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1546 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1547 PL_eval_root = cx->blk_eval.old_eval_root;
1548 sv = cx->blk_eval.cur_text;
4c57ced5 1549 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1550 cx->blk_eval.cur_text = NULL;
1551 SvREFCNT_dec_NN(sv);
1552 }
1553
1554 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1555 if (sv) {
1556 cx->blk_eval.old_namesv = NULL;
1557 SvREFCNT_dec_NN(sv);
1558 }
13febba5 1559}
6a7d52cc 1560
a73d8813 1561
d1b6bf72
DM
1562/* push a plain loop, i.e.
1563 * { block }
1564 * while (cond) { block }
1565 * for (init;cond;continue) { block }
1566 * This loop can be last/redo'ed etc.
1567 */
1568
1569PERL_STATIC_INLINE void
1570S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1571{
1572 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1573 cx->blk_loop.my_op = cLOOP;
1574}
1575
1576
1577/* push a true for loop, i.e.
1578 * for var (list) { block }
1579 */
1580
1581PERL_STATIC_INLINE void
1582S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1583{
1584 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1585
1586 /* this one line is common with cx_pushloop_plain */
1587 cx->blk_loop.my_op = cLOOP;
1588
1589 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1590 cx->blk_loop.itersave = itersave;
1591#ifdef USE_ITHREADS
1592 cx->blk_loop.oldcomppad = PL_comppad;
1593#endif
1594}
1595
1596
1597/* pop all loop types, including plain */
1598
1599PERL_STATIC_INLINE void
1600S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1601{
1602 PERL_ARGS_ASSERT_CX_POPLOOP;
1603
1604 assert(CxTYPE_is_LOOP(cx));
1605 if ( CxTYPE(cx) == CXt_LOOP_ARY
1606 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1607 {
1608 /* Free ary or cur. This assumes that state_u.ary.ary
1609 * aligns with state_u.lazysv.cur. See cx_dup() */
1610 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1611 cx->blk_loop.state_u.lazysv.cur = NULL;
1612 SvREFCNT_dec_NN(sv);
1613 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1614 sv = cx->blk_loop.state_u.lazysv.end;
1615 cx->blk_loop.state_u.lazysv.end = NULL;
1616 SvREFCNT_dec_NN(sv);
1617 }
1618 }
1619 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1620 SV *cursv;
1621 SV **svp = (cx)->blk_loop.itervar_u.svp;
1622 if ((cx->cx_type & CXp_FOR_GV))
1623 svp = &GvSV((GV*)svp);
1624 cursv = *svp;
1625 *svp = cx->blk_loop.itersave;
1626 cx->blk_loop.itersave = NULL;
1627 SvREFCNT_dec(cursv);
1628 }
1629}
1630
2a7b7c61
DM
1631
1632PERL_STATIC_INLINE void
1633S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1634{
1635 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1636
1637 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1638}
1639
1640
1641PERL_STATIC_INLINE void
1642S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1643{
1644 PERL_ARGS_ASSERT_CX_POPWHEN;
1645 assert(CxTYPE(cx) == CXt_WHEN);
1646
1647 PERL_UNUSED_ARG(cx);
59a14f30 1648 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1649 /* currently NOOP */
1650}
1651
1652
1653PERL_STATIC_INLINE void
1654S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1655{
1656 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1657
1658 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1659 cx->blk_givwhen.defsv_save = orig_defsv;
1660}
1661
1662
1663PERL_STATIC_INLINE void
1664S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1665{
1666 SV *sv;
1667
1668 PERL_ARGS_ASSERT_CX_POPGIVEN;
1669 assert(CxTYPE(cx) == CXt_GIVEN);
1670
1671 sv = GvSV(PL_defgv);
1672 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1673 cx->blk_givwhen.defsv_save = NULL;
1674 SvREFCNT_dec(sv);
1675}
1676
ec2c235b
KW
1677/* ------------------ util.h ------------------------------------------- */
1678
1679/*
1680=head1 Miscellaneous Functions
1681
1682=for apidoc foldEQ
1683
1684Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1685same
1686case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1687match themselves and their opposite case counterparts. Non-cased and non-ASCII
1688range bytes match only themselves.
1689
1690=cut
1691*/
1692
1693PERL_STATIC_INLINE I32
1694Perl_foldEQ(const char *s1, const char *s2, I32 len)
1695{
1696 const U8 *a = (const U8 *)s1;
1697 const U8 *b = (const U8 *)s2;
1698
1699 PERL_ARGS_ASSERT_FOLDEQ;
1700
1701 assert(len >= 0);
1702
1703 while (len--) {
1704 if (*a != *b && *a != PL_fold[*b])
1705 return 0;
1706 a++,b++;
1707 }
1708 return 1;
1709}
1710
0f9cb40c 1711PERL_STATIC_INLINE I32
ec2c235b
KW
1712Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1713{
1714 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1715 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1716 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1717 * does it check that the strings each have at least 'len' characters */
1718
1719 const U8 *a = (const U8 *)s1;
1720 const U8 *b = (const U8 *)s2;
1721
1722 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1723
1724 assert(len >= 0);
1725
1726 while (len--) {
1727 if (*a != *b && *a != PL_fold_latin1[*b]) {
1728 return 0;
1729 }
1730 a++, b++;
1731 }
1732 return 1;
1733}
1734
1735/*
1736=for apidoc foldEQ_locale
1737
1738Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1739same case-insensitively in the current locale; false otherwise.
1740
1741=cut
1742*/
1743
0f9cb40c 1744PERL_STATIC_INLINE I32
ec2c235b
KW
1745Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1746{
1747 dVAR;
1748 const U8 *a = (const U8 *)s1;
1749 const U8 *b = (const U8 *)s2;
1750
1751 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1752
1753 assert(len >= 0);
1754
1755 while (len--) {
1756 if (*a != *b && *a != PL_fold_locale[*b])
1757 return 0;
1758 a++,b++;
1759 }
1760 return 1;
1761}
1762
ed382232 1763/*
c8028aa6
TC
1764 * ex: set ts=8 sts=4 sw=4 et:
1765 */