This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
inline.h: Silence compiler warning
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
4ab2fd9b 385/* This looks like 0x010101... */
2c5c8af5 386# define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
4ab2fd9b
KW
387
388/* This looks like 0x808080... */
2c5c8af5
KW
389# define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
390# define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
391# define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 392
099e59a4
KW
393/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
394 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
395 * optimized out completely on a 32-bit system, and its mask gets optimized out
396 * on a 64-bit system */
2c5c8af5 397# define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
5eabe374
KW
398 | ( PTR2nat(x) >> 1) \
399 | ( ( (PTR2nat(x) \
400 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4 401
3f515a2e
KW
402#ifndef EBCDIC
403
099e59a4
KW
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
2c5c8af5
KW
441# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
442 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
1d2af574
KW
443
444 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
445 assert(*ep >= s && *ep < send);
446
447 return FALSE;
448
2c5c8af5 449# else /* If weird byte order, drop into next loop to do byte-at-a-time
1d2af574
KW
450 checks. */
451
46bb68f6 452 break;
2c5c8af5 453# endif
46bb68f6 454 }
1d2af574 455
46bb68f6 456 x += PERL_WORDSIZE;
1d2af574 457
099e59a4 458 } while (x + PERL_WORDSIZE <= send);
b40579ff 459 }
e17544a6 460
0b08cab0 461#endif /* End of ! EBCDIC */
e17544a6
KW
462
463 /* Process per-byte */
464 while (x < send) {
465 if (! UTF8_IS_INVARIANT(*x)) {
466 if (ep) {
467 *ep = x;
468 }
0cbf5865 469
e17544a6 470 return FALSE;
0cbf5865 471 }
1e599354 472
e17544a6 473 x++;
1e599354
KW
474 }
475
476 return TRUE;
477}
478
23a7ee81
KW
479#ifndef EBCDIC
480
1d2af574
KW
481PERL_STATIC_INLINE unsigned int
482S__variant_byte_number(PERL_UINTMAX_T word)
483{
484
485 /* This returns the position in a word (0..7) of the first variant byte in
486 * it. This is a helper function. Note that there are no branches */
487
488 assert(word);
489
490 /* Get just the msb bits of each byte */
491 word &= PERL_VARIANTS_WORD_MASK;
492
597ee3f4
KW
493# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
494 easiest thing is to hide that from the callers */
495 {
496 unsigned int i;
497 const U8 * s = (U8 *) &word;
498 dTHX;
499
500 for (i = 0; i < sizeof(word); i++ ) {
501 if (s[i]) {
502 return i;
503 }
504 }
505
506 Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
507 __FILE__, __LINE__);
508 }
509
510# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
1d2af574
KW
511
512 /* Bytes are stored like
513 * Byte8 ... Byte2 Byte1
514 * 63..56...15...8 7...0
515 *
516 * Isolate the lsb;
517 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
518 *
519 * The word will look this this, with a rightmost set bit in position 's':
520 * ('x's are don't cares)
521 * s
522 * x..x100..0
523 * x..xx10..0 Right shift (rightmost 0 is shifted off)
524 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
525 * the 1 just to their left into a 0; the remainder is
526 * untouched
527 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
528 * bottom to all 1
529 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
530 *
531 * Another method is to do 'word &= -word'; but it generates a compiler
532 * message on some platforms about taking the negative of an unsigned */
533
534 word >>= 1;
535 word = 1 + (word ^ (word - 1));
536
537# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
538
539 /* Bytes are stored like
540 * Byte1 Byte2 ... Byte8
541 * 63..56 55..47 ... 7...0
542 *
543 * Isolate the msb; http://codeforces.com/blog/entry/10330
544 *
545 * Only the most significant set bit matters. Or'ing word with its right
546 * shift of 1 makes that bit and the next one to its right both 1. Then
547 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
548 * msb and all to the right being 1. */
549 word |= word >> 1;
550 word |= word >> 2;
551 word |= word >> 4;
552 word |= word >> 8;
553 word |= word >> 16;
554 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
555
556 /* Then subtracting the right shift by 1 clears all but the left-most of
557 * the 1 bits, which is our desired result */
558 word -= (word >> 1);
559
560# else
561# error Unexpected byte order
562# endif
563
7cf2d6c7
KW
564 /* Here 'word' has a single bit set: the msb of the first byte in which it
565 * is set. Calculate that position in the word. We can use this
1d2af574 566 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
67e12c5c
KW
567 * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
568 * just get shifted off at compile time) */
569 word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
570 | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
571 | (39 << 24) | (47 << 16)
572 | (55 << 8) | (63 << 0));
1d2af574
KW
573 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
574
575 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
576 word = ((word + 1) >> 3) - 1;
577
578# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
579
580 /* And invert the result */
581 word = CHARBITS - word - 1;
582
583# endif
584
585 return (unsigned int) word;
586}
587
23a7ee81 588#endif
03c1e4ab
KW
589#if defined(PERL_CORE) || defined(PERL_EXT)
590
591/*
592=for apidoc variant_under_utf8_count
593
594This function looks at the sequence of bytes between C<s> and C<e>, which are
595assumed to be encoded in ASCII/Latin1, and returns how many of them would
596change should the string be translated into UTF-8. Due to the nature of UTF-8,
597each of these would occupy two bytes instead of the single one in the input
598string. Thus, this function returns the precise number of bytes the string
599would expand by when translated to UTF-8.
600
601Unlike most of the other functions that have C<utf8> in their name, the input
602to this function is NOT a UTF-8-encoded string. The function name is slightly
603I<odd> to emphasize this.
604
605This function is internal to Perl because khw thinks that any XS code that
606would want this is probably operating too close to the internals. Presenting a
607valid use case could change that.
608
609See also
610C<L<perlapi/is_utf8_invariant_string>>
611and
612C<L<perlapi/is_utf8_invariant_string_loc>>,
613
614=cut
615
616*/
617
618PERL_STATIC_INLINE Size_t
619S_variant_under_utf8_count(const U8* const s, const U8* const e)
620{
621 const U8* x = s;
622 Size_t count = 0;
623
624 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
625
626# ifndef EBCDIC
627
5d0379de
KW
628 /* Test if the string is long enough to use word-at-a-time. (Logic is the
629 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
630 if ((STRLEN) (e - x) >= PERL_WORDSIZE
631 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
632 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
633 {
634
635 /* Process per-byte until reach word boundary. XXX This loop could be
636 * eliminated if we knew that this platform had fast unaligned reads */
637 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
638 count += ! UTF8_IS_INVARIANT(*x++);
639 }
640
641 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
642 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
643 explanation of how this works */
0cb29117
KW
644 count += (Size_t)
645 ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
03c1e4ab
KW
646 * PERL_COUNT_MULTIPLIER)
647 >> ((PERL_WORDSIZE - 1) * CHARBITS);
648 x += PERL_WORDSIZE;
649 } while (x + PERL_WORDSIZE <= e);
650 }
651
652# endif
653
654 /* Process per-byte */
655 while (x < e) {
656 if (! UTF8_IS_INVARIANT(*x)) {
657 count++;
658 }
659
660 x++;
661 }
662
663 return count;
664}
665
666#endif
667
aff4cafe
KW
668#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
669# undef PERL_WORDSIZE
670# undef PERL_COUNT_MULTIPLIER
671# undef PERL_WORD_BOUNDARY_MASK
672# undef PERL_VARIANTS_WORD_MASK
673#endif
03c1e4ab 674
7c93d8f0 675/*
5ff889fb
KW
676=for apidoc is_utf8_string
677
82c5d941
KW
678Returns TRUE if the first C<len> bytes of string C<s> form a valid
679Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
680be calculated using C<strlen(s)> (which means if you use this option, that C<s>
681can't have embedded C<NUL> characters and has to have a terminating C<NUL>
682byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
683
2717076a
KW
684This function considers Perl's extended UTF-8 to be valid. That means that
685code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
686considered valid by this function. Use C<L</is_strict_utf8_string>>,
687C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
688code points are considered valid.
5ff889fb 689
9f2abfde
KW
690See also
691C<L</is_utf8_invariant_string>>,
0cbf5865 692C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
693C<L</is_utf8_string_loc>>,
694C<L</is_utf8_string_loclen>>,
8bc127bf
KW
695C<L</is_utf8_fixed_width_buf_flags>>,
696C<L</is_utf8_fixed_width_buf_loc_flags>>,
697C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
698
699=cut
700*/
701
dd237e82 702#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 703
c9cd936b
KW
704#if defined(PERL_CORE) || defined (PERL_EXT)
705
706/*
707=for apidoc is_utf8_non_invariant_string
708
709Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
710C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
711UTF-8; otherwise returns FALSE.
712
713A TRUE return means that at least one code point represented by the sequence
714either is a wide character not representable as a single byte, or the
715representation differs depending on whether the sequence is encoded in UTF-8 or
716not.
717
718See also
719C<L<perlapi/is_utf8_invariant_string>>,
720C<L<perlapi/is_utf8_string>>
721
722=cut
723
724This is commonly used to determine if a SV's UTF-8 flag should be turned on.
b3b93dfe
KW
725It generally needn't be if its string is entirely UTF-8 invariant, and it
726shouldn't be if it otherwise contains invalid UTF-8.
c9cd936b
KW
727
728It is an internal function because khw thinks that XS code shouldn't be working
729at this low a level. A valid use case could change that.
730
731*/
732
733PERL_STATIC_INLINE bool
734S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
735{
736 const U8 * first_variant;
737
738 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
739
740 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
741 return FALSE;
742 }
743
744 return is_utf8_string(first_variant, len - (first_variant - s));
745}
746
747#endif
748
5ff889fb 749/*
9f2abfde
KW
750=for apidoc is_strict_utf8_string
751
752Returns TRUE if the first C<len> bytes of string C<s> form a valid
753UTF-8-encoded string that is fully interchangeable by any application using
754Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
755calculated using C<strlen(s)> (which means if you use this option, that C<s>
756can't have embedded C<NUL> characters and has to have a terminating C<NUL>
757byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
758
759This function returns FALSE for strings containing any
760code points above the Unicode max of 0x10FFFF, surrogate code points, or
761non-character code points.
762
763See also
764C<L</is_utf8_invariant_string>>,
0cbf5865 765C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
766C<L</is_utf8_string>>,
767C<L</is_utf8_string_flags>>,
768C<L</is_utf8_string_loc>>,
769C<L</is_utf8_string_loc_flags>>,
770C<L</is_utf8_string_loclen>>,
771C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
772C<L</is_utf8_fixed_width_buf_flags>>,
773C<L</is_utf8_fixed_width_buf_loc_flags>>,
774C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
775C<L</is_strict_utf8_string_loc>>,
776C<L</is_strict_utf8_string_loclen>>,
777C<L</is_c9strict_utf8_string>>,
778C<L</is_c9strict_utf8_string_loc>>,
779and
780C<L</is_c9strict_utf8_string_loclen>>.
781
782=cut
783*/
784
dd237e82 785#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
786
787/*
788=for apidoc is_c9strict_utf8_string
789
790Returns TRUE if the first C<len> bytes of string C<s> form a valid
791UTF-8-encoded string that conforms to
792L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
793otherwise it returns FALSE. If C<len> is 0, it will be calculated using
794C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
795C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
796characters being ASCII constitute 'a valid UTF-8 string'.
797
798This function returns FALSE for strings containing any code points above the
799Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
800code points per
801L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
802
803See also
804C<L</is_utf8_invariant_string>>,
0cbf5865 805C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
806C<L</is_utf8_string>>,
807C<L</is_utf8_string_flags>>,
808C<L</is_utf8_string_loc>>,
809C<L</is_utf8_string_loc_flags>>,
810C<L</is_utf8_string_loclen>>,
811C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
812C<L</is_utf8_fixed_width_buf_flags>>,
813C<L</is_utf8_fixed_width_buf_loc_flags>>,
814C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
815C<L</is_strict_utf8_string>>,
816C<L</is_strict_utf8_string_loc>>,
817C<L</is_strict_utf8_string_loclen>>,
818C<L</is_c9strict_utf8_string_loc>>,
819and
820C<L</is_c9strict_utf8_string_loclen>>.
821
822=cut
823*/
824
dd237e82 825#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
826
827/*
828=for apidoc is_utf8_string_flags
829
830Returns TRUE if the first C<len> bytes of string C<s> form a valid
831UTF-8 string, subject to the restrictions imposed by C<flags>;
832returns FALSE otherwise. If C<len> is 0, it will be calculated
833using C<strlen(s)> (which means if you use this option, that C<s> can't have
834embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
835that all characters being ASCII constitute 'a valid UTF-8 string'.
836
837If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
838C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
839as C<L</is_strict_utf8_string>>; and if C<flags> is
840C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
841C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
842combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
843C<L</utf8n_to_uvchr>>, with the same meanings.
844
845See also
846C<L</is_utf8_invariant_string>>,
0cbf5865 847C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
848C<L</is_utf8_string>>,
849C<L</is_utf8_string_loc>>,
850C<L</is_utf8_string_loc_flags>>,
851C<L</is_utf8_string_loclen>>,
852C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
853C<L</is_utf8_fixed_width_buf_flags>>,
854C<L</is_utf8_fixed_width_buf_loc_flags>>,
855C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
856C<L</is_strict_utf8_string>>,
857C<L</is_strict_utf8_string_loc>>,
858C<L</is_strict_utf8_string_loclen>>,
859C<L</is_c9strict_utf8_string>>,
860C<L</is_c9strict_utf8_string_loc>>,
861and
862C<L</is_c9strict_utf8_string_loclen>>.
863
864=cut
865*/
866
867PERL_STATIC_INLINE bool
f60f61fd 868S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 869{
33756530 870 const U8 * first_variant;
9f2abfde
KW
871
872 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
873 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 874 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 875
f60f61fd
KW
876 if (len == 0) {
877 len = strlen((const char *)s);
878 }
879
9f2abfde
KW
880 if (flags == 0) {
881 return is_utf8_string(s, len);
882 }
883
d044b7a7 884 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
885 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
886 {
887 return is_strict_utf8_string(s, len);
888 }
889
d044b7a7 890 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
891 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
892 {
893 return is_c9strict_utf8_string(s, len);
894 }
895
33756530
KW
896 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
897 const U8* const send = s + len;
898 const U8* x = first_variant;
899
a0d7f935
KW
900 while (x < send) {
901 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
902 if (UNLIKELY(! cur_len)) {
903 return FALSE;
904 }
905 x += cur_len;
9f2abfde 906 }
33756530 907 }
9f2abfde
KW
908
909 return TRUE;
910}
911
912/*
5ff889fb
KW
913
914=for apidoc is_utf8_string_loc
915
2717076a 916Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 917case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 918"utf8ness success") in the C<ep> pointer.
5ff889fb 919
2717076a 920See also C<L</is_utf8_string_loclen>>.
5ff889fb 921
3964c812
KW
922=cut
923*/
924
925#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
926
927/*
928
5ff889fb
KW
929=for apidoc is_utf8_string_loclen
930
2717076a 931Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 932case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 933"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 934encoded characters in the C<el> pointer.
5ff889fb 935
2717076a 936See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
937
938=cut
939*/
940
56e4cf64 941PERL_STATIC_INLINE bool
33756530 942Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 943{
33756530 944 const U8 * first_variant;
5ff889fb
KW
945
946 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
947
33756530
KW
948 if (len == 0) {
949 len = strlen((const char *) s);
950 }
951
952 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
953 if (el)
954 *el = len;
955
956 if (ep) {
957 *ep = s + len;
958 }
959
960 return TRUE;
961 }
962
963 {
964 const U8* const send = s + len;
965 const U8* x = first_variant;
966 STRLEN outlen = first_variant - s;
967
a0d7f935
KW
968 while (x < send) {
969 const STRLEN cur_len = isUTF8_CHAR(x, send);
970 if (UNLIKELY(! cur_len)) {
971 break;
972 }
973 x += cur_len;
974 outlen++;
5ff889fb 975 }
5ff889fb 976
a0d7f935
KW
977 if (el)
978 *el = outlen;
5ff889fb 979
a0d7f935
KW
980 if (ep) {
981 *ep = x;
982 }
5ff889fb 983
a0d7f935 984 return (x == send);
33756530 985 }
5ff889fb
KW
986}
987
988/*
9f2abfde
KW
989
990=for apidoc is_strict_utf8_string_loc
991
992Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
993case of "utf8ness failure") or the location C<s>+C<len> (in the case of
994"utf8ness success") in the C<ep> pointer.
995
996See also C<L</is_strict_utf8_string_loclen>>.
997
998=cut
999*/
1000
1001#define is_strict_utf8_string_loc(s, len, ep) \
1002 is_strict_utf8_string_loclen(s, len, ep, 0)
1003
1004/*
1005
1006=for apidoc is_strict_utf8_string_loclen
1007
1008Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
1009case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1010"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1011encoded characters in the C<el> pointer.
1012
1013See also C<L</is_strict_utf8_string_loc>>.
1014
1015=cut
1016*/
1017
1018PERL_STATIC_INLINE bool
33756530 1019S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1020{
33756530 1021 const U8 * first_variant;
9f2abfde
KW
1022
1023 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1024
33756530
KW
1025 if (len == 0) {
1026 len = strlen((const char *) s);
1027 }
1028
1029 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1030 if (el)
1031 *el = len;
1032
1033 if (ep) {
1034 *ep = s + len;
1035 }
1036
1037 return TRUE;
1038 }
1039
1040 {
1041 const U8* const send = s + len;
1042 const U8* x = first_variant;
1043 STRLEN outlen = first_variant - s;
1044
a0d7f935
KW
1045 while (x < send) {
1046 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1047 if (UNLIKELY(! cur_len)) {
1048 break;
1049 }
1050 x += cur_len;
1051 outlen++;
9f2abfde 1052 }
9f2abfde 1053
a0d7f935
KW
1054 if (el)
1055 *el = outlen;
9f2abfde 1056
a0d7f935
KW
1057 if (ep) {
1058 *ep = x;
1059 }
9f2abfde 1060
a0d7f935 1061 return (x == send);
33756530 1062 }
9f2abfde
KW
1063}
1064
1065/*
1066
1067=for apidoc is_c9strict_utf8_string_loc
1068
1069Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1070the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1071"utf8ness success") in the C<ep> pointer.
1072
1073See also C<L</is_c9strict_utf8_string_loclen>>.
1074
1075=cut
1076*/
1077
1078#define is_c9strict_utf8_string_loc(s, len, ep) \
1079 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1080
1081/*
1082
1083=for apidoc is_c9strict_utf8_string_loclen
1084
1085Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1086the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1087"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1088characters in the C<el> pointer.
1089
1090See also C<L</is_c9strict_utf8_string_loc>>.
1091
1092=cut
1093*/
1094
1095PERL_STATIC_INLINE bool
33756530 1096S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1097{
33756530 1098 const U8 * first_variant;
9f2abfde
KW
1099
1100 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1101
33756530
KW
1102 if (len == 0) {
1103 len = strlen((const char *) s);
1104 }
1105
1106 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1107 if (el)
1108 *el = len;
1109
1110 if (ep) {
1111 *ep = s + len;
1112 }
1113
1114 return TRUE;
1115 }
1116
1117 {
1118 const U8* const send = s + len;
1119 const U8* x = first_variant;
1120 STRLEN outlen = first_variant - s;
1121
a0d7f935
KW
1122 while (x < send) {
1123 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1124 if (UNLIKELY(! cur_len)) {
1125 break;
1126 }
1127 x += cur_len;
1128 outlen++;
9f2abfde 1129 }
9f2abfde 1130
a0d7f935
KW
1131 if (el)
1132 *el = outlen;
9f2abfde 1133
a0d7f935
KW
1134 if (ep) {
1135 *ep = x;
1136 }
9f2abfde 1137
a0d7f935 1138 return (x == send);
33756530 1139 }
9f2abfde
KW
1140}
1141
1142/*
1143
1144=for apidoc is_utf8_string_loc_flags
1145
1146Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1147case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1148"utf8ness success") in the C<ep> pointer.
1149
1150See also C<L</is_utf8_string_loclen_flags>>.
1151
1152=cut
1153*/
1154
1155#define is_utf8_string_loc_flags(s, len, ep, flags) \
1156 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1157
1158
1159/* The above 3 actual functions could have been moved into the more general one
1160 * just below, and made #defines that call it with the right 'flags'. They are
1161 * currently kept separate to increase their chances of getting inlined */
1162
1163/*
1164
1165=for apidoc is_utf8_string_loclen_flags
1166
1167Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1168case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1169"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1170encoded characters in the C<el> pointer.
1171
1172See also C<L</is_utf8_string_loc_flags>>.
1173
1174=cut
1175*/
1176
1177PERL_STATIC_INLINE bool
f60f61fd 1178S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1179{
33756530 1180 const U8 * first_variant;
9f2abfde
KW
1181
1182 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1183 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1184 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1185
f60f61fd 1186 if (len == 0) {
a0d7f935 1187 len = strlen((const char *) s);
f60f61fd
KW
1188 }
1189
9f2abfde
KW
1190 if (flags == 0) {
1191 return is_utf8_string_loclen(s, len, ep, el);
1192 }
1193
d044b7a7 1194 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1195 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1196 {
1197 return is_strict_utf8_string_loclen(s, len, ep, el);
1198 }
1199
d044b7a7 1200 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1201 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1202 {
1203 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1204 }
1205
33756530
KW
1206 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1207 if (el)
1208 *el = len;
1209
1210 if (ep) {
1211 *ep = s + len;
1212 }
1213
1214 return TRUE;
1215 }
1216
1217 {
1218 const U8* send = s + len;
1219 const U8* x = first_variant;
1220 STRLEN outlen = first_variant - s;
1221
a0d7f935
KW
1222 while (x < send) {
1223 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1224 if (UNLIKELY(! cur_len)) {
1225 break;
1226 }
1227 x += cur_len;
1228 outlen++;
9f2abfde 1229 }
9f2abfde 1230
a0d7f935
KW
1231 if (el)
1232 *el = outlen;
9f2abfde 1233
a0d7f935
KW
1234 if (ep) {
1235 *ep = x;
1236 }
9f2abfde 1237
a0d7f935 1238 return (x == send);
33756530 1239 }
9f2abfde
KW
1240}
1241
1242/*
7c93d8f0
KW
1243=for apidoc utf8_distance
1244
1245Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1246and C<b>.
1247
1248WARNING: use only if you *know* that the pointers point inside the
1249same UTF-8 buffer.
1250
1251=cut
1252*/
1253
1254PERL_STATIC_INLINE IV
1255Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1256{
1257 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1258
1259 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1260}
1261
1262/*
1263=for apidoc utf8_hop
1264
1265Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1266forward or backward.
1267
1268WARNING: do not use the following unless you *know* C<off> is within
1269the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1270on the first byte of character or just after the last byte of a character.
1271
1272=cut
1273*/
1274
1275PERL_STATIC_INLINE U8 *
1276Perl_utf8_hop(const U8 *s, SSize_t off)
1277{
1278 PERL_ARGS_ASSERT_UTF8_HOP;
1279
1280 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1281 * the bitops (especially ~) can create illegal UTF-8.
1282 * In other words: in Perl UTF-8 is not just for Unicode. */
1283
1284 if (off >= 0) {
1285 while (off--)
1286 s += UTF8SKIP(s);
1287 }
1288 else {
1289 while (off++) {
1290 s--;
1291 while (UTF8_IS_CONTINUATION(*s))
1292 s--;
1293 }
1294 }
7347ee54 1295 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1296 return (U8 *)s;
7347ee54 1297 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1298}
1299
4dab108f 1300/*
65df57a8
TC
1301=for apidoc utf8_hop_forward
1302
1303Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1304forward.
1305
1306C<off> must be non-negative.
1307
1308C<s> must be before or equal to C<end>.
1309
1310When moving forward it will not move beyond C<end>.
1311
1312Will not exceed this limit even if the string is not valid "UTF-8".
1313
1314=cut
1315*/
1316
1317PERL_STATIC_INLINE U8 *
1318Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1319{
1320 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1321
1322 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1323 * the bitops (especially ~) can create illegal UTF-8.
1324 * In other words: in Perl UTF-8 is not just for Unicode. */
1325
1326 assert(s <= end);
1327 assert(off >= 0);
1328
1329 while (off--) {
1330 STRLEN skip = UTF8SKIP(s);
de979548 1331 if ((STRLEN)(end - s) <= skip) {
7347ee54 1332 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1333 return (U8 *)end;
7347ee54 1334 GCC_DIAG_RESTORE_STMT;
de979548 1335 }
65df57a8
TC
1336 s += skip;
1337 }
1338
7347ee54 1339 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1340 return (U8 *)s;
7347ee54 1341 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1342}
1343
1344/*
1345=for apidoc utf8_hop_back
1346
1347Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1348backward.
1349
1350C<off> must be non-positive.
1351
1352C<s> must be after or equal to C<start>.
1353
1354When moving backward it will not move before C<start>.
1355
1356Will not exceed this limit even if the string is not valid "UTF-8".
1357
1358=cut
1359*/
1360
1361PERL_STATIC_INLINE U8 *
1362Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1363{
1364 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1365
1366 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1367 * the bitops (especially ~) can create illegal UTF-8.
1368 * In other words: in Perl UTF-8 is not just for Unicode. */
1369
1370 assert(start <= s);
1371 assert(off <= 0);
1372
1373 while (off++ && s > start) {
1374 s--;
1375 while (UTF8_IS_CONTINUATION(*s) && s > start)
1376 s--;
1377 }
1378
7347ee54 1379 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1380 return (U8 *)s;
7347ee54 1381 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1382}
1383
1384/*
1385=for apidoc utf8_hop_safe
1386
1387Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1388either forward or backward.
1389
1390When moving backward it will not move before C<start>.
1391
1392When moving forward it will not move beyond C<end>.
1393
1394Will not exceed those limits even if the string is not valid "UTF-8".
1395
1396=cut
1397*/
1398
1399PERL_STATIC_INLINE U8 *
1400Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1401{
1402 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1403
1404 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1405 * the bitops (especially ~) can create illegal UTF-8.
1406 * In other words: in Perl UTF-8 is not just for Unicode. */
1407
1408 assert(start <= s && s <= end);
1409
1410 if (off >= 0) {
1411 return utf8_hop_forward(s, off, end);
1412 }
1413 else {
1414 return utf8_hop_back(s, off, start);
1415 }
1416}
1417
1418/*
4dab108f
KW
1419
1420=for apidoc is_utf8_valid_partial_char
1421
6cbb9248
KW
1422Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1423S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1424points. Otherwise, it returns 1 if there exists at least one non-empty
1425sequence of bytes that when appended to sequence C<s>, starting at position
1426C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1427otherwise returns 0.
1428
1429In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1430point.
1431
1432This is useful when a fixed-length buffer is being tested for being well-formed
1433UTF-8, but the final few bytes in it don't comprise a full character; that is,
1434it is split somewhere in the middle of the final code point's UTF-8
1435representation. (Presumably when the buffer is refreshed with the next chunk
1436of data, the new first bytes will complete the partial code point.) This
1437function is used to verify that the final bytes in the current buffer are in
1438fact the legal beginning of some code point, so that if they aren't, the
1439failure can be signalled without having to wait for the next read.
4dab108f
KW
1440
1441=cut
1442*/
2717076a
KW
1443#define is_utf8_valid_partial_char(s, e) \
1444 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1445
1446/*
1447
1448=for apidoc is_utf8_valid_partial_char_flags
1449
1450Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1451or not the input is a valid UTF-8 encoded partial character, but it takes an
1452extra parameter, C<flags>, which can further restrict which code points are
1453considered valid.
1454
1455If C<flags> is 0, this behaves identically to
1456C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1457of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1458there is any sequence of bytes that can complete the input partial character in
1459such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1460TRUE; otherwise FALSE. Non character code points cannot be determined based on
1461partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1462determined from just the first one or two bytes.
1463
1464=cut
1465 */
1466
56e4cf64 1467PERL_STATIC_INLINE bool
f1c999a7 1468S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1469{
f1c999a7 1470 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1471
f1c999a7 1472 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1473 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1474
8875bd48 1475 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1476 return FALSE;
1477 }
1478
f1c999a7 1479 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1480}
1481
8bc127bf
KW
1482/*
1483
1484=for apidoc is_utf8_fixed_width_buf_flags
1485
1486Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1487is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1488otherwise it returns FALSE.
1489
1490If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1491without restriction. If the final few bytes of the buffer do not form a
1492complete code point, this will return TRUE anyway, provided that
1493C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1494
1495If C<flags> in non-zero, it can be any combination of the
1496C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1497same meanings.
1498
1499This function differs from C<L</is_utf8_string_flags>> only in that the latter
1500returns FALSE if the final few bytes of the string don't form a complete code
1501point.
1502
1503=cut
1504 */
1505#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1506 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1507
1508/*
1509
1510=for apidoc is_utf8_fixed_width_buf_loc_flags
1511
1512Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1513failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1514to the beginning of any partial character at the end of the buffer; if there is
1515no partial character C<*ep> will contain C<s>+C<len>.
1516
1517See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1518
1519=cut
1520*/
1521
1522#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1523 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1524
1525/*
1526
1527=for apidoc is_utf8_fixed_width_buf_loclen_flags
1528
1529Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1530complete, valid characters found in the C<el> pointer.
1531
1532=cut
1533*/
1534
1535PERL_STATIC_INLINE bool
1536S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1537 STRLEN len,
8bc127bf
KW
1538 const U8 **ep,
1539 STRLEN *el,
1540 const U32 flags)
1541{
1542 const U8 * maybe_partial;
1543
1544 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1545
1546 if (! ep) {
1547 ep = &maybe_partial;
1548 }
1549
1550 /* If it's entirely valid, return that; otherwise see if the only error is
1551 * that the final few bytes are for a partial character */
1552 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1553 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1554}
1555
c8028aa6
TC
1556/* ------------------------------- perl.h ----------------------------- */
1557
1558/*
dcccc8ff
KW
1559=head1 Miscellaneous Functions
1560
41188aa0 1561=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1562
6602b933 1563Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1564If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1565
1566Return TRUE if the name is safe.
1567
796b6530 1568Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1569
1570=cut
1571*/
1572
1573PERL_STATIC_INLINE bool
41188aa0 1574S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1575 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1576 * perl itself uses xce*() functions which accept 8-bit strings.
1577 */
1578
1579 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1580
6c4650b3 1581 if (len > 1) {
c8028aa6 1582 char *null_at;
41188aa0 1583 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1584 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1585 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1586 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1587 what, op_name, pv, null_at+1);
c8028aa6
TC
1588 return FALSE;
1589 }
1590 }
1591
1592 return TRUE;
1593}
1594
1595/*
7cb3f959
TC
1596
1597Return true if the supplied filename has a newline character
fa6c7d00 1598immediately before the first (hopefully only) NUL.
7cb3f959
TC
1599
1600My original look at this incorrectly used the len from SvPV(), but
1601that's incorrect, since we allow for a NUL in pv[len-1].
1602
1603So instead, strlen() and work from there.
1604
1605This allow for the user reading a filename, forgetting to chomp it,
1606then calling:
1607
1608 open my $foo, "$file\0";
1609
1610*/
1611
1612#ifdef PERL_CORE
1613
1614PERL_STATIC_INLINE bool
1615S_should_warn_nl(const char *pv) {
1616 STRLEN len;
1617
1618 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1619
1620 len = strlen(pv);
1621
1622 return len > 0 && pv[len-1] == '\n';
1623}
1624
1625#endif
1626
81d52ecd
JH
1627/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1628
1629#define MAX_CHARSET_NAME_LENGTH 2
1630
1631PERL_STATIC_INLINE const char *
1632get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1633{
1634 /* Returns a string that corresponds to the name of the regex character set
1635 * given by 'flags', and *lenp is set the length of that string, which
1636 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1637
1638 *lenp = 1;
1639 switch (get_regex_charset(flags)) {
1640 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1641 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1642 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1643 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1644 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1645 *lenp = 2;
1646 return ASCII_MORE_RESTRICT_PAT_MODS;
1647 }
1648 /* The NOT_REACHED; hides an assert() which has a rather complex
1649 * definition in perl.h. */
1650 NOT_REACHED; /* NOTREACHED */
1651 return "?"; /* Unknown */
1652}
1653
7cb3f959 1654/*
ed382232
TC
1655
1656Return false if any get magic is on the SV other than taint magic.
1657
1658*/
1659
1660PERL_STATIC_INLINE bool
1661S_sv_only_taint_gmagic(SV *sv) {
1662 MAGIC *mg = SvMAGIC(sv);
1663
1664 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1665
1666 while (mg) {
1667 if (mg->mg_type != PERL_MAGIC_taint
1668 && !(mg->mg_flags & MGf_GSKIP)
1669 && mg->mg_virtual->svt_get) {
1670 return FALSE;
1671 }
1672 mg = mg->mg_moremagic;
1673 }
1674
1675 return TRUE;
1676}
1677
ed8ff0f3
DM
1678/* ------------------ cop.h ------------------------------------------- */
1679
1680
1681/* Enter a block. Push a new base context and return its address. */
1682
1683PERL_STATIC_INLINE PERL_CONTEXT *
1684S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1685{
1686 PERL_CONTEXT * cx;
1687
1688 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1689
1690 CXINC;
1691 cx = CX_CUR();
1692 cx->cx_type = type;
1693 cx->blk_gimme = gimme;
1694 cx->blk_oldsaveix = saveix;
4caf7d8c 1695 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1696 cx->blk_oldcop = PL_curcop;
4caf7d8c 1697 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1698 cx->blk_oldscopesp = PL_scopestack_ix;
1699 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1700 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1701
1702 PL_tmps_floor = PL_tmps_ix;
1703 CX_DEBUG(cx, "PUSH");
1704 return cx;
1705}
1706
1707
1708/* Exit a block (RETURN and LAST). */
1709
1710PERL_STATIC_INLINE void
1711S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1712{
1713 PERL_ARGS_ASSERT_CX_POPBLOCK;
1714
1715 CX_DEBUG(cx, "POP");
1716 /* these 3 are common to cx_popblock and cx_topblock */
1717 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1718 PL_scopestack_ix = cx->blk_oldscopesp;
1719 PL_curpm = cx->blk_oldpm;
1720
1721 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1722 * and leaves a CX entry lying around for repeated use, so
1723 * skip for multicall */ \
1724 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1725 || PL_savestack_ix == cx->blk_oldsaveix);
1726 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1727 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1728}
1729
1730/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1731 * Whereas cx_popblock() restores the state to the point just before
1732 * cx_pushblock() was called, cx_topblock() restores it to the point just
1733 * *after* cx_pushblock() was called. */
1734
1735PERL_STATIC_INLINE void
1736S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1737{
1738 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1739
1740 CX_DEBUG(cx, "TOP");
1741 /* these 3 are common to cx_popblock and cx_topblock */
1742 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1743 PL_scopestack_ix = cx->blk_oldscopesp;
1744 PL_curpm = cx->blk_oldpm;
1745
1746 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1747}
1748
1749
a73d8813
DM
1750PERL_STATIC_INLINE void
1751S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1752{
1753 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1754
1755 PERL_ARGS_ASSERT_CX_PUSHSUB;
1756
3f6bd23a 1757 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1758 cx->blk_sub.cv = cv;
1759 cx->blk_sub.olddepth = CvDEPTH(cv);
1760 cx->blk_sub.prevcomppad = PL_comppad;
1761 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1762 cx->blk_sub.retop = retop;
1763 SvREFCNT_inc_simple_void_NN(cv);
1764 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1765}
1766
1767
1768/* subsets of cx_popsub() */
1769
1770PERL_STATIC_INLINE void
1771S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1772{
1773 CV *cv;
1774
1775 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1776 assert(CxTYPE(cx) == CXt_SUB);
1777
1778 PL_comppad = cx->blk_sub.prevcomppad;
1779 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1780 cv = cx->blk_sub.cv;
1781 CvDEPTH(cv) = cx->blk_sub.olddepth;
1782 cx->blk_sub.cv = NULL;
1783 SvREFCNT_dec(cv);
1784}
1785
1786
1787/* handle the @_ part of leaving a sub */
1788
1789PERL_STATIC_INLINE void
1790S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1791{
1792 AV *av;
1793
1794 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1795 assert(CxTYPE(cx) == CXt_SUB);
1796 assert(AvARRAY(MUTABLE_AV(
1797 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1798 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1799
1800 CX_POP_SAVEARRAY(cx);
1801 av = MUTABLE_AV(PAD_SVl(0));
1802 if (UNLIKELY(AvREAL(av)))
1803 /* abandon @_ if it got reified */
1804 clear_defarray(av, 0);
1805 else {
1806 CLEAR_ARGARRAY(av);
1807 }
1808}
1809
1810
1811PERL_STATIC_INLINE void
1812S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1813{
1814 PERL_ARGS_ASSERT_CX_POPSUB;
1815 assert(CxTYPE(cx) == CXt_SUB);
1816
3f6bd23a 1817 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1818
1819 if (CxHASARGS(cx))
1820 cx_popsub_args(cx);
1821 cx_popsub_common(cx);
1822}
1823
1824
6a7d52cc
DM
1825PERL_STATIC_INLINE void
1826S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1827{
1828 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1829
1830 cx->blk_format.cv = cv;
1831 cx->blk_format.retop = retop;
1832 cx->blk_format.gv = gv;
1833 cx->blk_format.dfoutgv = PL_defoutgv;
1834 cx->blk_format.prevcomppad = PL_comppad;
1835 cx->blk_u16 = 0;
1836
1837 SvREFCNT_inc_simple_void_NN(cv);
1838 CvDEPTH(cv)++;
1839 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1840}
1841
1842
1843PERL_STATIC_INLINE void
1844S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1845{
1846 CV *cv;
1847 GV *dfout;
1848
1849 PERL_ARGS_ASSERT_CX_POPFORMAT;
1850 assert(CxTYPE(cx) == CXt_FORMAT);
1851
1852 dfout = cx->blk_format.dfoutgv;
1853 setdefout(dfout);
1854 cx->blk_format.dfoutgv = NULL;
1855 SvREFCNT_dec_NN(dfout);
1856
1857 PL_comppad = cx->blk_format.prevcomppad;
1858 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1859 cv = cx->blk_format.cv;
1860 cx->blk_format.cv = NULL;
1861 --CvDEPTH(cv);
1862 SvREFCNT_dec_NN(cv);
1863}
1864
1865
13febba5
DM
1866PERL_STATIC_INLINE void
1867S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1868{
1869 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1870
1871 cx->blk_eval.retop = retop;
1872 cx->blk_eval.old_namesv = namesv;
1873 cx->blk_eval.old_eval_root = PL_eval_root;
1874 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1875 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1876 cx->blk_eval.cur_top_env = PL_top_env;
1877
4c57ced5 1878 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1879 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1880 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1881}
1882
1883
1884PERL_STATIC_INLINE void
1885S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1886{
1887 SV *sv;
1888
1889 PERL_ARGS_ASSERT_CX_POPEVAL;
1890 assert(CxTYPE(cx) == CXt_EVAL);
1891
1892 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1893 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1894 PL_eval_root = cx->blk_eval.old_eval_root;
1895 sv = cx->blk_eval.cur_text;
4c57ced5 1896 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1897 cx->blk_eval.cur_text = NULL;
1898 SvREFCNT_dec_NN(sv);
1899 }
1900
1901 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1902 if (sv) {
1903 cx->blk_eval.old_namesv = NULL;
1904 SvREFCNT_dec_NN(sv);
1905 }
13febba5 1906}
6a7d52cc 1907
a73d8813 1908
d1b6bf72
DM
1909/* push a plain loop, i.e.
1910 * { block }
1911 * while (cond) { block }
1912 * for (init;cond;continue) { block }
1913 * This loop can be last/redo'ed etc.
1914 */
1915
1916PERL_STATIC_INLINE void
1917S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1918{
1919 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1920 cx->blk_loop.my_op = cLOOP;
1921}
1922
1923
1924/* push a true for loop, i.e.
1925 * for var (list) { block }
1926 */
1927
1928PERL_STATIC_INLINE void
1929S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1930{
1931 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1932
1933 /* this one line is common with cx_pushloop_plain */
1934 cx->blk_loop.my_op = cLOOP;
1935
1936 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1937 cx->blk_loop.itersave = itersave;
1938#ifdef USE_ITHREADS
1939 cx->blk_loop.oldcomppad = PL_comppad;
1940#endif
1941}
1942
1943
1944/* pop all loop types, including plain */
1945
1946PERL_STATIC_INLINE void
1947S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1948{
1949 PERL_ARGS_ASSERT_CX_POPLOOP;
1950
1951 assert(CxTYPE_is_LOOP(cx));
1952 if ( CxTYPE(cx) == CXt_LOOP_ARY
1953 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1954 {
1955 /* Free ary or cur. This assumes that state_u.ary.ary
1956 * aligns with state_u.lazysv.cur. See cx_dup() */
1957 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1958 cx->blk_loop.state_u.lazysv.cur = NULL;
1959 SvREFCNT_dec_NN(sv);
1960 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1961 sv = cx->blk_loop.state_u.lazysv.end;
1962 cx->blk_loop.state_u.lazysv.end = NULL;
1963 SvREFCNT_dec_NN(sv);
1964 }
1965 }
1966 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1967 SV *cursv;
1968 SV **svp = (cx)->blk_loop.itervar_u.svp;
1969 if ((cx->cx_type & CXp_FOR_GV))
1970 svp = &GvSV((GV*)svp);
1971 cursv = *svp;
1972 *svp = cx->blk_loop.itersave;
1973 cx->blk_loop.itersave = NULL;
1974 SvREFCNT_dec(cursv);
1975 }
1976}
1977
2a7b7c61
DM
1978
1979PERL_STATIC_INLINE void
7896dde7 1980S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1981{
7896dde7 1982 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1983
7896dde7 1984 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1985}
1986
1987
1988PERL_STATIC_INLINE void
7896dde7 1989S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1990{
7896dde7
Z
1991 PERL_ARGS_ASSERT_CX_POPWHEN;
1992 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1993
1994 PERL_UNUSED_ARG(cx);
59a14f30 1995 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1996 /* currently NOOP */
1997}
1998
1999
7896dde7
Z
2000PERL_STATIC_INLINE void
2001S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
2002{
2003 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
2004
2005 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2006 cx->blk_givwhen.defsv_save = orig_defsv;
2007}
2008
2009
2010PERL_STATIC_INLINE void
2011S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
2012{
2013 SV *sv;
2014
2015 PERL_ARGS_ASSERT_CX_POPGIVEN;
2016 assert(CxTYPE(cx) == CXt_GIVEN);
2017
2018 sv = GvSV(PL_defgv);
2019 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2020 cx->blk_givwhen.defsv_save = NULL;
2021 SvREFCNT_dec(sv);
2022}
2023
ec2c235b
KW
2024/* ------------------ util.h ------------------------------------------- */
2025
2026/*
2027=head1 Miscellaneous Functions
2028
2029=for apidoc foldEQ
2030
2031Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2032same
2033case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2034match themselves and their opposite case counterparts. Non-cased and non-ASCII
2035range bytes match only themselves.
2036
2037=cut
2038*/
2039
2040PERL_STATIC_INLINE I32
2041Perl_foldEQ(const char *s1, const char *s2, I32 len)
2042{
2043 const U8 *a = (const U8 *)s1;
2044 const U8 *b = (const U8 *)s2;
2045
2046 PERL_ARGS_ASSERT_FOLDEQ;
2047
2048 assert(len >= 0);
2049
2050 while (len--) {
2051 if (*a != *b && *a != PL_fold[*b])
2052 return 0;
2053 a++,b++;
2054 }
2055 return 1;
2056}
2057
0f9cb40c 2058PERL_STATIC_INLINE I32
ec2c235b
KW
2059Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2060{
2061 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2062 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2063 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2064 * does it check that the strings each have at least 'len' characters */
2065
2066 const U8 *a = (const U8 *)s1;
2067 const U8 *b = (const U8 *)s2;
2068
2069 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2070
2071 assert(len >= 0);
2072
2073 while (len--) {
2074 if (*a != *b && *a != PL_fold_latin1[*b]) {
2075 return 0;
2076 }
2077 a++, b++;
2078 }
2079 return 1;
2080}
2081
2082/*
2083=for apidoc foldEQ_locale
2084
2085Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2086same case-insensitively in the current locale; false otherwise.
2087
2088=cut
2089*/
2090
0f9cb40c 2091PERL_STATIC_INLINE I32
ec2c235b
KW
2092Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2093{
2094 dVAR;
2095 const U8 *a = (const U8 *)s1;
2096 const U8 *b = (const U8 *)s2;
2097
2098 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2099
2100 assert(len >= 0);
2101
2102 while (len--) {
2103 if (*a != *b && *a != PL_fold_locale[*b])
2104 return 0;
2105 a++,b++;
2106 }
2107 return 1;
2108}
2109
6dba01e2
KW
2110#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2111
2112PERL_STATIC_INLINE void *
2113S_my_memrchr(const char * s, const char c, const STRLEN len)
2114{
2115 /* memrchr(), since many platforms lack it */
2116
2117 const char * t = s + len - 1;
2118
2119 PERL_ARGS_ASSERT_MY_MEMRCHR;
2120
2121 while (t >= s) {
2122 if (*t == c) {
2123 return (void *) t;
2124 }
2125 t--;
2126 }
2127
2128 return NULL;
2129}
2130
2131#endif
2132
ed382232 2133/*
c8028aa6
TC
2134 * ex: set ts=8 sts=4 sw=4 et:
2135 */