This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlapi: Clarifications, nits in Unicode support docs
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
134 "MARK top %p %"IVdf"\n",
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
144 "MARK pop %p %"IVdf"\n",
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
156 assert(isREGEXP(re));
157 return re->sv_u.svu_rx;
158}
159
27669aa4
FC
160/* ------------------------------- sv.h ------------------------------- */
161
162PERL_STATIC_INLINE SV *
163S_SvREFCNT_inc(SV *sv)
164{
2439e033 165 if (LIKELY(sv != NULL))
27669aa4
FC
166 SvREFCNT(sv)++;
167 return sv;
168}
169PERL_STATIC_INLINE SV *
170S_SvREFCNT_inc_NN(SV *sv)
171{
172 SvREFCNT(sv)++;
173 return sv;
174}
175PERL_STATIC_INLINE void
176S_SvREFCNT_inc_void(SV *sv)
177{
2439e033 178 if (LIKELY(sv != NULL))
27669aa4
FC
179 SvREFCNT(sv)++;
180}
75e16a44
FC
181PERL_STATIC_INLINE void
182S_SvREFCNT_dec(pTHX_ SV *sv)
183{
2439e033 184 if (LIKELY(sv != NULL)) {
75a9bf96 185 U32 rc = SvREFCNT(sv);
79e2a32a 186 if (LIKELY(rc > 1))
75a9bf96
DM
187 SvREFCNT(sv) = rc - 1;
188 else
189 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
190 }
191}
541377b1
FC
192
193PERL_STATIC_INLINE void
4a9a56a7
DM
194S_SvREFCNT_dec_NN(pTHX_ SV *sv)
195{
196 U32 rc = SvREFCNT(sv);
79e2a32a 197 if (LIKELY(rc > 1))
4a9a56a7
DM
198 SvREFCNT(sv) = rc - 1;
199 else
200 Perl_sv_free2(aTHX_ sv, rc);
201}
202
203PERL_STATIC_INLINE void
541377b1
FC
204SvAMAGIC_on(SV *sv)
205{
206 assert(SvROK(sv));
207 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
208}
209PERL_STATIC_INLINE void
210SvAMAGIC_off(SV *sv)
211{
212 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
213 HvAMAGIC_off(SvSTASH(SvRV(sv)));
214}
215
216PERL_STATIC_INLINE U32
541377b1
FC
217S_SvPADSTALE_on(SV *sv)
218{
c0683843 219 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
220 return SvFLAGS(sv) |= SVs_PADSTALE;
221}
222PERL_STATIC_INLINE U32
223S_SvPADSTALE_off(SV *sv)
224{
c0683843 225 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
226 return SvFLAGS(sv) &= ~SVs_PADSTALE;
227}
25fdce4a 228#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 229PERL_STATIC_INLINE STRLEN
6964422a 230S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 231{
25fdce4a 232 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
233 if (SvGAMAGIC(sv)) {
234 U8 *hopped = utf8_hop((U8 *)pv, pos);
235 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
236 return (STRLEN)(hopped - (U8 *)pv);
237 }
238 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
239}
240#endif
f019c49e 241
d1decf2b
TC
242/* ------------------------------- handy.h ------------------------------- */
243
244/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 245#ifdef GCC_DIAG_PRAGMA
6ab56f1e 246GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 247#endif
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
c1d6452f 253#ifdef GCC_DIAG_PRAGMA
6ab56f1e 254GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 255#endif
d1decf2b 256
a8a2ceaa
KW
257/* ------------------------------- utf8.h ------------------------------- */
258
2fe720e2
KW
259/*
260=head1 Unicode Support
261*/
262
55d09dc8
KW
263PERL_STATIC_INLINE void
264S_append_utf8_from_native_byte(const U8 byte, U8** dest)
265{
266 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
267 * encoded string at '*dest', updating '*dest' to include it */
268
55d09dc8
KW
269 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
270
6f2d5cbc 271 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 272 *((*dest)++) = byte;
55d09dc8 273 else {
a09ec51a
KW
274 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
275 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
276 }
277}
278
e123187a 279/*
2fe720e2 280=for apidoc valid_utf8_to_uvchr
2717076a 281Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
282the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
283it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
284non-Unicode code points are allowed.
285
286=cut
287
288 */
289
290PERL_STATIC_INLINE UV
291Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
292{
c41b2540 293 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
294 const U8* send = s + expectlen;
295 UV uv = *s;
296
297 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
298
299 if (retlen) {
300 *retlen = expectlen;
301 }
302
303 /* An invariant is trivially returned */
304 if (expectlen == 1) {
305 return uv;
306 }
307
308 /* Remove the leading bits that indicate the number of bytes, leaving just
309 * the bits that are part of the value */
310 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
311
312 /* Now, loop through the remaining bytes, accumulating each into the
313 * working total as we go. (I khw tried unrolling the loop for up to 4
314 * bytes, but there was no performance improvement) */
315 for (++s; s < send; s++) {
316 uv = UTF8_ACCUMULATE(uv, *s);
317 }
318
319 return UNI_TO_NATIVE(uv);
320
321}
322
1e599354
KW
323/*
324=for apidoc is_utf8_invariant_string
325
82c5d941 326Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 327regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
328EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
329are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
330the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
331characters are invariant, but so also are the C1 controls.
1e599354
KW
332
333If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
334use this option, that C<s> can't have embedded C<NUL> characters and has to
335have a terminating C<NUL> byte).
336
337See also L</is_utf8_string>(), L</is_utf8_string_loclen>(), and
338L</is_utf8_string_loc>().
339
340=cut
341*/
342
343PERL_STATIC_INLINE bool
344S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
345{
346 const U8* const send = s + (len ? len : strlen((const char *)s));
347 const U8* x = s;
348
349 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
350
351 for (; x < send; ++x) {
352 if (!UTF8_IS_INVARIANT(*x))
353 return FALSE;
354 }
355
356 return TRUE;
357}
358
7c93d8f0 359/*
5ff889fb
KW
360=for apidoc is_utf8_string
361
82c5d941
KW
362Returns TRUE if the first C<len> bytes of string C<s> form a valid
363Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
364be calculated using C<strlen(s)> (which means if you use this option, that C<s>
365can't have embedded C<NUL> characters and has to have a terminating C<NUL>
366byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
367
2717076a
KW
368This function considers Perl's extended UTF-8 to be valid. That means that
369code points above Unicode, surrogates, and non-character code points are
82c5d941 370considered valid by this function.
5ff889fb
KW
371
372See also L</is_utf8_invariant_string>(), L</is_utf8_string_loclen>(), and
373L</is_utf8_string_loc>().
374
375=cut
376*/
377
56e4cf64 378PERL_STATIC_INLINE bool
c41b2540 379Perl_is_utf8_string(const U8 *s, const STRLEN len)
5ff889fb 380{
35936d22
KW
381 /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
382 * Be aware of possible changes to that */
383
5ff889fb
KW
384 const U8* const send = s + (len ? len : strlen((const char *)s));
385 const U8* x = s;
386
387 PERL_ARGS_ASSERT_IS_UTF8_STRING;
388
389 while (x < send) {
c41b2540
KW
390 const STRLEN cur_len = isUTF8_CHAR(x, send);
391 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
392 return FALSE;
393 }
c41b2540 394 x += cur_len;
5ff889fb
KW
395 }
396
397 return TRUE;
398}
399
400/*
401Implemented as a macro in utf8.h
402
403=for apidoc is_utf8_string_loc
404
2717076a 405Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 406case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 407"utf8ness success") in the C<ep> pointer.
5ff889fb 408
2717076a 409See also C<L</is_utf8_string_loclen>>.
5ff889fb
KW
410
411=for apidoc is_utf8_string_loclen
412
2717076a 413Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb
KW
414case of "utf8ness failure") or the location C<s>+C<len> (in the case of
415"utf8ness success") in the C<ep>, and the number of UTF-8
82c5d941 416encoded characters in the C<el> pointer.
5ff889fb 417
2717076a 418See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
419
420=cut
421*/
422
56e4cf64 423PERL_STATIC_INLINE bool
c41b2540 424Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb
KW
425{
426 const U8* const send = s + (len ? len : strlen((const char *)s));
427 const U8* x = s;
428 STRLEN outlen = 0;
429
430 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
431
432 while (x < send) {
c41b2540
KW
433 const STRLEN cur_len = isUTF8_CHAR(x, send);
434 if (UNLIKELY(! cur_len)) {
5ff889fb
KW
435 break;
436 }
c41b2540 437 x += cur_len;
5ff889fb
KW
438 outlen++;
439 }
440
441 if (el)
442 *el = outlen;
443
444 if (ep) {
445 *ep = x;
446 }
447
448 return (x == send);
449}
450
451/*
7c93d8f0
KW
452=for apidoc utf8_distance
453
454Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
455and C<b>.
456
457WARNING: use only if you *know* that the pointers point inside the
458same UTF-8 buffer.
459
460=cut
461*/
462
463PERL_STATIC_INLINE IV
464Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
465{
466 PERL_ARGS_ASSERT_UTF8_DISTANCE;
467
468 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
469}
470
471/*
472=for apidoc utf8_hop
473
474Return the UTF-8 pointer C<s> displaced by C<off> characters, either
475forward or backward.
476
477WARNING: do not use the following unless you *know* C<off> is within
478the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
479on the first byte of character or just after the last byte of a character.
480
481=cut
482*/
483
484PERL_STATIC_INLINE U8 *
485Perl_utf8_hop(const U8 *s, SSize_t off)
486{
487 PERL_ARGS_ASSERT_UTF8_HOP;
488
489 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
490 * the bitops (especially ~) can create illegal UTF-8.
491 * In other words: in Perl UTF-8 is not just for Unicode. */
492
493 if (off >= 0) {
494 while (off--)
495 s += UTF8SKIP(s);
496 }
497 else {
498 while (off++) {
499 s--;
500 while (UTF8_IS_CONTINUATION(*s))
501 s--;
502 }
503 }
504 return (U8 *)s;
505}
506
4dab108f
KW
507/*
508
509=for apidoc is_utf8_valid_partial_char
510
6cbb9248
KW
511Returns 0 if the sequence of bytes starting at C<s> and looking no further than
512S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
513points. Otherwise, it returns 1 if there exists at least one non-empty
514sequence of bytes that when appended to sequence C<s>, starting at position
515C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
516otherwise returns 0.
517
518In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
519point.
520
521This is useful when a fixed-length buffer is being tested for being well-formed
522UTF-8, but the final few bytes in it don't comprise a full character; that is,
523it is split somewhere in the middle of the final code point's UTF-8
524representation. (Presumably when the buffer is refreshed with the next chunk
525of data, the new first bytes will complete the partial code point.) This
526function is used to verify that the final bytes in the current buffer are in
527fact the legal beginning of some code point, so that if they aren't, the
528failure can be signalled without having to wait for the next read.
4dab108f
KW
529
530=cut
531*/
2717076a
KW
532#define is_utf8_valid_partial_char(s, e) \
533 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
534
535/*
536
537=for apidoc is_utf8_valid_partial_char_flags
538
539Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
540or not the input is a valid UTF-8 encoded partial character, but it takes an
541extra parameter, C<flags>, which can further restrict which code points are
542considered valid.
543
544If C<flags> is 0, this behaves identically to
545C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
546of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
547there is any sequence of bytes that can complete the input partial character in
548such a way that a non-prohibited character is formed, the function returns
2717076a
KW
549TRUE; otherwise FALSE. Non character code points cannot be determined based on
550partial character input. But many of the other possible excluded types can be
f1c999a7
KW
551determined from just the first one or two bytes.
552
553=cut
554 */
555
56e4cf64 556PERL_STATIC_INLINE bool
f1c999a7 557S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 558{
f1c999a7 559 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 560
f1c999a7
KW
561 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
562 |UTF8_DISALLOW_ABOVE_31_BIT)));
4dab108f 563
8875bd48 564 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
565 return FALSE;
566 }
567
f1c999a7 568 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
569}
570
c8028aa6
TC
571/* ------------------------------- perl.h ----------------------------- */
572
573/*
dcccc8ff
KW
574=head1 Miscellaneous Functions
575
41188aa0 576=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 577
6602b933 578Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 579If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
580
581Return TRUE if the name is safe.
582
796b6530 583Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
584
585=cut
586*/
587
588PERL_STATIC_INLINE bool
41188aa0 589S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
590 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
591 * perl itself uses xce*() functions which accept 8-bit strings.
592 */
593
594 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
595
6c4650b3 596 if (len > 1) {
c8028aa6 597 char *null_at;
41188aa0 598 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 599 SETERRNO(ENOENT, LIB_INVARG);
1d505182 600 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 601 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 602 what, op_name, pv, null_at+1);
c8028aa6
TC
603 return FALSE;
604 }
605 }
606
607 return TRUE;
608}
609
610/*
7cb3f959
TC
611
612Return true if the supplied filename has a newline character
fa6c7d00 613immediately before the first (hopefully only) NUL.
7cb3f959
TC
614
615My original look at this incorrectly used the len from SvPV(), but
616that's incorrect, since we allow for a NUL in pv[len-1].
617
618So instead, strlen() and work from there.
619
620This allow for the user reading a filename, forgetting to chomp it,
621then calling:
622
623 open my $foo, "$file\0";
624
625*/
626
627#ifdef PERL_CORE
628
629PERL_STATIC_INLINE bool
630S_should_warn_nl(const char *pv) {
631 STRLEN len;
632
633 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
634
635 len = strlen(pv);
636
637 return len > 0 && pv[len-1] == '\n';
638}
639
640#endif
641
81d52ecd
JH
642/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
643
644#define MAX_CHARSET_NAME_LENGTH 2
645
646PERL_STATIC_INLINE const char *
647get_regex_charset_name(const U32 flags, STRLEN* const lenp)
648{
649 /* Returns a string that corresponds to the name of the regex character set
650 * given by 'flags', and *lenp is set the length of that string, which
651 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
652
653 *lenp = 1;
654 switch (get_regex_charset(flags)) {
655 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
656 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
657 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
658 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
659 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
660 *lenp = 2;
661 return ASCII_MORE_RESTRICT_PAT_MODS;
662 }
663 /* The NOT_REACHED; hides an assert() which has a rather complex
664 * definition in perl.h. */
665 NOT_REACHED; /* NOTREACHED */
666 return "?"; /* Unknown */
667}
668
7cb3f959 669/*
ed382232
TC
670
671Return false if any get magic is on the SV other than taint magic.
672
673*/
674
675PERL_STATIC_INLINE bool
676S_sv_only_taint_gmagic(SV *sv) {
677 MAGIC *mg = SvMAGIC(sv);
678
679 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
680
681 while (mg) {
682 if (mg->mg_type != PERL_MAGIC_taint
683 && !(mg->mg_flags & MGf_GSKIP)
684 && mg->mg_virtual->svt_get) {
685 return FALSE;
686 }
687 mg = mg->mg_moremagic;
688 }
689
690 return TRUE;
691}
692
ed8ff0f3
DM
693/* ------------------ cop.h ------------------------------------------- */
694
695
696/* Enter a block. Push a new base context and return its address. */
697
698PERL_STATIC_INLINE PERL_CONTEXT *
699S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
700{
701 PERL_CONTEXT * cx;
702
703 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
704
705 CXINC;
706 cx = CX_CUR();
707 cx->cx_type = type;
708 cx->blk_gimme = gimme;
709 cx->blk_oldsaveix = saveix;
4caf7d8c 710 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 711 cx->blk_oldcop = PL_curcop;
4caf7d8c 712 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
713 cx->blk_oldscopesp = PL_scopestack_ix;
714 cx->blk_oldpm = PL_curpm;
ce8bb8d8 715 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
716
717 PL_tmps_floor = PL_tmps_ix;
718 CX_DEBUG(cx, "PUSH");
719 return cx;
720}
721
722
723/* Exit a block (RETURN and LAST). */
724
725PERL_STATIC_INLINE void
726S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
727{
728 PERL_ARGS_ASSERT_CX_POPBLOCK;
729
730 CX_DEBUG(cx, "POP");
731 /* these 3 are common to cx_popblock and cx_topblock */
732 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
733 PL_scopestack_ix = cx->blk_oldscopesp;
734 PL_curpm = cx->blk_oldpm;
735
736 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
737 * and leaves a CX entry lying around for repeated use, so
738 * skip for multicall */ \
739 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
740 || PL_savestack_ix == cx->blk_oldsaveix);
741 PL_curcop = cx->blk_oldcop;
ce8bb8d8 742 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
743}
744
745/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
746 * Whereas cx_popblock() restores the state to the point just before
747 * cx_pushblock() was called, cx_topblock() restores it to the point just
748 * *after* cx_pushblock() was called. */
749
750PERL_STATIC_INLINE void
751S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
752{
753 PERL_ARGS_ASSERT_CX_TOPBLOCK;
754
755 CX_DEBUG(cx, "TOP");
756 /* these 3 are common to cx_popblock and cx_topblock */
757 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
758 PL_scopestack_ix = cx->blk_oldscopesp;
759 PL_curpm = cx->blk_oldpm;
760
761 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
762}
763
764
a73d8813
DM
765PERL_STATIC_INLINE void
766S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
767{
768 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
769
770 PERL_ARGS_ASSERT_CX_PUSHSUB;
771
3f6bd23a 772 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
773 cx->blk_sub.cv = cv;
774 cx->blk_sub.olddepth = CvDEPTH(cv);
775 cx->blk_sub.prevcomppad = PL_comppad;
776 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
777 cx->blk_sub.retop = retop;
778 SvREFCNT_inc_simple_void_NN(cv);
779 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
780}
781
782
783/* subsets of cx_popsub() */
784
785PERL_STATIC_INLINE void
786S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
787{
788 CV *cv;
789
790 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
791 assert(CxTYPE(cx) == CXt_SUB);
792
793 PL_comppad = cx->blk_sub.prevcomppad;
794 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
795 cv = cx->blk_sub.cv;
796 CvDEPTH(cv) = cx->blk_sub.olddepth;
797 cx->blk_sub.cv = NULL;
798 SvREFCNT_dec(cv);
799}
800
801
802/* handle the @_ part of leaving a sub */
803
804PERL_STATIC_INLINE void
805S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
806{
807 AV *av;
808
809 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
810 assert(CxTYPE(cx) == CXt_SUB);
811 assert(AvARRAY(MUTABLE_AV(
812 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
813 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
814
815 CX_POP_SAVEARRAY(cx);
816 av = MUTABLE_AV(PAD_SVl(0));
817 if (UNLIKELY(AvREAL(av)))
818 /* abandon @_ if it got reified */
819 clear_defarray(av, 0);
820 else {
821 CLEAR_ARGARRAY(av);
822 }
823}
824
825
826PERL_STATIC_INLINE void
827S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
828{
829 PERL_ARGS_ASSERT_CX_POPSUB;
830 assert(CxTYPE(cx) == CXt_SUB);
831
3f6bd23a 832 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
833
834 if (CxHASARGS(cx))
835 cx_popsub_args(cx);
836 cx_popsub_common(cx);
837}
838
839
6a7d52cc
DM
840PERL_STATIC_INLINE void
841S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
842{
843 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
844
845 cx->blk_format.cv = cv;
846 cx->blk_format.retop = retop;
847 cx->blk_format.gv = gv;
848 cx->blk_format.dfoutgv = PL_defoutgv;
849 cx->blk_format.prevcomppad = PL_comppad;
850 cx->blk_u16 = 0;
851
852 SvREFCNT_inc_simple_void_NN(cv);
853 CvDEPTH(cv)++;
854 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
855}
856
857
858PERL_STATIC_INLINE void
859S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
860{
861 CV *cv;
862 GV *dfout;
863
864 PERL_ARGS_ASSERT_CX_POPFORMAT;
865 assert(CxTYPE(cx) == CXt_FORMAT);
866
867 dfout = cx->blk_format.dfoutgv;
868 setdefout(dfout);
869 cx->blk_format.dfoutgv = NULL;
870 SvREFCNT_dec_NN(dfout);
871
872 PL_comppad = cx->blk_format.prevcomppad;
873 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
874 cv = cx->blk_format.cv;
875 cx->blk_format.cv = NULL;
876 --CvDEPTH(cv);
877 SvREFCNT_dec_NN(cv);
878}
879
880
13febba5
DM
881PERL_STATIC_INLINE void
882S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
883{
884 PERL_ARGS_ASSERT_CX_PUSHEVAL;
885
886 cx->blk_eval.retop = retop;
887 cx->blk_eval.old_namesv = namesv;
888 cx->blk_eval.old_eval_root = PL_eval_root;
889 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
890 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
891 cx->blk_eval.cur_top_env = PL_top_env;
892
893 assert(!(PL_in_eval & ~ 0x7F));
894 assert(!(PL_op->op_type & ~0x1FF));
895 cx->blk_u16 = (PL_in_eval & 0x7F) | ((U16)PL_op->op_type << 7);
896}
897
898
899PERL_STATIC_INLINE void
900S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
901{
902 SV *sv;
903
904 PERL_ARGS_ASSERT_CX_POPEVAL;
905 assert(CxTYPE(cx) == CXt_EVAL);
906
907 PL_in_eval = CxOLD_IN_EVAL(cx);
908 PL_eval_root = cx->blk_eval.old_eval_root;
909 sv = cx->blk_eval.cur_text;
910 if (sv && SvSCREAM(sv)) {
911 cx->blk_eval.cur_text = NULL;
912 SvREFCNT_dec_NN(sv);
913 }
914
915 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
916 if (sv) {
917 cx->blk_eval.old_namesv = NULL;
918 SvREFCNT_dec_NN(sv);
919 }
13febba5 920}
6a7d52cc 921
a73d8813 922
d1b6bf72
DM
923/* push a plain loop, i.e.
924 * { block }
925 * while (cond) { block }
926 * for (init;cond;continue) { block }
927 * This loop can be last/redo'ed etc.
928 */
929
930PERL_STATIC_INLINE void
931S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
932{
933 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
934 cx->blk_loop.my_op = cLOOP;
935}
936
937
938/* push a true for loop, i.e.
939 * for var (list) { block }
940 */
941
942PERL_STATIC_INLINE void
943S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
944{
945 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
946
947 /* this one line is common with cx_pushloop_plain */
948 cx->blk_loop.my_op = cLOOP;
949
950 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
951 cx->blk_loop.itersave = itersave;
952#ifdef USE_ITHREADS
953 cx->blk_loop.oldcomppad = PL_comppad;
954#endif
955}
956
957
958/* pop all loop types, including plain */
959
960PERL_STATIC_INLINE void
961S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
962{
963 PERL_ARGS_ASSERT_CX_POPLOOP;
964
965 assert(CxTYPE_is_LOOP(cx));
966 if ( CxTYPE(cx) == CXt_LOOP_ARY
967 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
968 {
969 /* Free ary or cur. This assumes that state_u.ary.ary
970 * aligns with state_u.lazysv.cur. See cx_dup() */
971 SV *sv = cx->blk_loop.state_u.lazysv.cur;
972 cx->blk_loop.state_u.lazysv.cur = NULL;
973 SvREFCNT_dec_NN(sv);
974 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
975 sv = cx->blk_loop.state_u.lazysv.end;
976 cx->blk_loop.state_u.lazysv.end = NULL;
977 SvREFCNT_dec_NN(sv);
978 }
979 }
980 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
981 SV *cursv;
982 SV **svp = (cx)->blk_loop.itervar_u.svp;
983 if ((cx->cx_type & CXp_FOR_GV))
984 svp = &GvSV((GV*)svp);
985 cursv = *svp;
986 *svp = cx->blk_loop.itersave;
987 cx->blk_loop.itersave = NULL;
988 SvREFCNT_dec(cursv);
989 }
990}
991
2a7b7c61
DM
992
993PERL_STATIC_INLINE void
994S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
995{
996 PERL_ARGS_ASSERT_CX_PUSHWHEN;
997
998 cx->blk_givwhen.leave_op = cLOGOP->op_other;
999}
1000
1001
1002PERL_STATIC_INLINE void
1003S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1004{
1005 PERL_ARGS_ASSERT_CX_POPWHEN;
1006 assert(CxTYPE(cx) == CXt_WHEN);
1007
1008 PERL_UNUSED_ARG(cx);
59a14f30 1009 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1010 /* currently NOOP */
1011}
1012
1013
1014PERL_STATIC_INLINE void
1015S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1016{
1017 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1018
1019 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1020 cx->blk_givwhen.defsv_save = orig_defsv;
1021}
1022
1023
1024PERL_STATIC_INLINE void
1025S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1026{
1027 SV *sv;
1028
1029 PERL_ARGS_ASSERT_CX_POPGIVEN;
1030 assert(CxTYPE(cx) == CXt_GIVEN);
1031
1032 sv = GvSV(PL_defgv);
1033 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1034 cx->blk_givwhen.defsv_save = NULL;
1035 SvREFCNT_dec(sv);
1036}
1037
ed382232 1038/*
c8028aa6
TC
1039 * ex: set ts=8 sts=4 sw=4 et:
1040 */