This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don't use C99 ULL constant suffix
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
385#ifndef EBCDIC
4ab2fd9b
KW
386
387/* This looks like 0x010101... */
388#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
389
390/* This looks like 0x808080... */
391#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
392#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
393#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 394
099e59a4
KW
395/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
396 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
397 * optimized out completely on a 32-bit system, and its mask gets optimized out
398 * on a 64-bit system */
5eabe374
KW
399#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
400 | ( PTR2nat(x) >> 1) \
401 | ( ( (PTR2nat(x) \
402 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4
KW
403
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
1d2af574
KW
441#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
442 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
443
444 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
445 assert(*ep >= s && *ep < send);
446
447 return FALSE;
448
449#else /* If weird byte order, drop into next loop to do byte-at-a-time
450 checks. */
451
46bb68f6 452 break;
1d2af574 453#endif
46bb68f6 454 }
1d2af574 455
46bb68f6 456 x += PERL_WORDSIZE;
1d2af574 457
099e59a4 458 } while (x + PERL_WORDSIZE <= send);
b40579ff 459 }
e17544a6 460
e17544a6
KW
461#endif
462
463 /* Process per-byte */
464 while (x < send) {
465 if (! UTF8_IS_INVARIANT(*x)) {
466 if (ep) {
467 *ep = x;
468 }
0cbf5865 469
e17544a6 470 return FALSE;
0cbf5865 471 }
1e599354 472
e17544a6 473 x++;
1e599354
KW
474 }
475
476 return TRUE;
477}
478
1d2af574
KW
479#ifndef EBCDIC
480
481PERL_STATIC_INLINE unsigned int
482S__variant_byte_number(PERL_UINTMAX_T word)
483{
484
485 /* This returns the position in a word (0..7) of the first variant byte in
486 * it. This is a helper function. Note that there are no branches */
487
488 assert(word);
489
490 /* Get just the msb bits of each byte */
491 word &= PERL_VARIANTS_WORD_MASK;
492
493# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
494
495 /* Bytes are stored like
496 * Byte8 ... Byte2 Byte1
497 * 63..56...15...8 7...0
498 *
499 * Isolate the lsb;
500 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
501 *
502 * The word will look this this, with a rightmost set bit in position 's':
503 * ('x's are don't cares)
504 * s
505 * x..x100..0
506 * x..xx10..0 Right shift (rightmost 0 is shifted off)
507 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
508 * the 1 just to their left into a 0; the remainder is
509 * untouched
510 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
511 * bottom to all 1
512 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
513 *
514 * Another method is to do 'word &= -word'; but it generates a compiler
515 * message on some platforms about taking the negative of an unsigned */
516
517 word >>= 1;
518 word = 1 + (word ^ (word - 1));
519
520# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
521
522 /* Bytes are stored like
523 * Byte1 Byte2 ... Byte8
524 * 63..56 55..47 ... 7...0
525 *
526 * Isolate the msb; http://codeforces.com/blog/entry/10330
527 *
528 * Only the most significant set bit matters. Or'ing word with its right
529 * shift of 1 makes that bit and the next one to its right both 1. Then
530 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
531 * msb and all to the right being 1. */
532 word |= word >> 1;
533 word |= word >> 2;
534 word |= word >> 4;
535 word |= word >> 8;
536 word |= word >> 16;
537 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
538
539 /* Then subtracting the right shift by 1 clears all but the left-most of
540 * the 1 bits, which is our desired result */
541 word -= (word >> 1);
542
543# else
544# error Unexpected byte order
545# endif
546
547 /* Here 'word' has a single bit set, the msb is of the first byte which
548 * has it set. Calculate that position in the word. We can use this
549 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
67e12c5c
KW
550 * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
551 * just get shifted off at compile time) */
552 word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
553 | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
554 | (39 << 24) | (47 << 16)
555 | (55 << 8) | (63 << 0));
1d2af574
KW
556 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
557
558 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
559 word = ((word + 1) >> 3) - 1;
560
561# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
562
563 /* And invert the result */
564 word = CHARBITS - word - 1;
565
566# endif
567
568 return (unsigned int) word;
569}
570
571#endif /* ! EBCDIC */
03c1e4ab
KW
572#if defined(PERL_CORE) || defined(PERL_EXT)
573
574/*
575=for apidoc variant_under_utf8_count
576
577This function looks at the sequence of bytes between C<s> and C<e>, which are
578assumed to be encoded in ASCII/Latin1, and returns how many of them would
579change should the string be translated into UTF-8. Due to the nature of UTF-8,
580each of these would occupy two bytes instead of the single one in the input
581string. Thus, this function returns the precise number of bytes the string
582would expand by when translated to UTF-8.
583
584Unlike most of the other functions that have C<utf8> in their name, the input
585to this function is NOT a UTF-8-encoded string. The function name is slightly
586I<odd> to emphasize this.
587
588This function is internal to Perl because khw thinks that any XS code that
589would want this is probably operating too close to the internals. Presenting a
590valid use case could change that.
591
592See also
593C<L<perlapi/is_utf8_invariant_string>>
594and
595C<L<perlapi/is_utf8_invariant_string_loc>>,
596
597=cut
598
599*/
600
601PERL_STATIC_INLINE Size_t
602S_variant_under_utf8_count(const U8* const s, const U8* const e)
603{
604 const U8* x = s;
605 Size_t count = 0;
606
607 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
608
609# ifndef EBCDIC
610
5d0379de
KW
611 /* Test if the string is long enough to use word-at-a-time. (Logic is the
612 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
613 if ((STRLEN) (e - x) >= PERL_WORDSIZE
614 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
615 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
616 {
617
618 /* Process per-byte until reach word boundary. XXX This loop could be
619 * eliminated if we knew that this platform had fast unaligned reads */
620 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
621 count += ! UTF8_IS_INVARIANT(*x++);
622 }
623
624 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
625 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
626 explanation of how this works */
03c1e4ab
KW
627 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
628 * PERL_COUNT_MULTIPLIER)
629 >> ((PERL_WORDSIZE - 1) * CHARBITS);
630 x += PERL_WORDSIZE;
631 } while (x + PERL_WORDSIZE <= e);
632 }
633
634# endif
635
636 /* Process per-byte */
637 while (x < e) {
638 if (! UTF8_IS_INVARIANT(*x)) {
639 count++;
640 }
641
642 x++;
643 }
644
645 return count;
646}
647
648#endif
649
aff4cafe
KW
650#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
651# undef PERL_WORDSIZE
652# undef PERL_COUNT_MULTIPLIER
653# undef PERL_WORD_BOUNDARY_MASK
654# undef PERL_VARIANTS_WORD_MASK
655#endif
03c1e4ab 656
7c93d8f0 657/*
5ff889fb
KW
658=for apidoc is_utf8_string
659
82c5d941
KW
660Returns TRUE if the first C<len> bytes of string C<s> form a valid
661Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
662be calculated using C<strlen(s)> (which means if you use this option, that C<s>
663can't have embedded C<NUL> characters and has to have a terminating C<NUL>
664byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
665
2717076a
KW
666This function considers Perl's extended UTF-8 to be valid. That means that
667code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
668considered valid by this function. Use C<L</is_strict_utf8_string>>,
669C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
670code points are considered valid.
5ff889fb 671
9f2abfde
KW
672See also
673C<L</is_utf8_invariant_string>>,
0cbf5865 674C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
675C<L</is_utf8_string_loc>>,
676C<L</is_utf8_string_loclen>>,
8bc127bf
KW
677C<L</is_utf8_fixed_width_buf_flags>>,
678C<L</is_utf8_fixed_width_buf_loc_flags>>,
679C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
680
681=cut
682*/
683
dd237e82 684#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 685
c9cd936b
KW
686#if defined(PERL_CORE) || defined (PERL_EXT)
687
688/*
689=for apidoc is_utf8_non_invariant_string
690
691Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
692C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
693UTF-8; otherwise returns FALSE.
694
695A TRUE return means that at least one code point represented by the sequence
696either is a wide character not representable as a single byte, or the
697representation differs depending on whether the sequence is encoded in UTF-8 or
698not.
699
700See also
701C<L<perlapi/is_utf8_invariant_string>>,
702C<L<perlapi/is_utf8_string>>
703
704=cut
705
706This is commonly used to determine if a SV's UTF-8 flag should be turned on.
b3b93dfe
KW
707It generally needn't be if its string is entirely UTF-8 invariant, and it
708shouldn't be if it otherwise contains invalid UTF-8.
c9cd936b
KW
709
710It is an internal function because khw thinks that XS code shouldn't be working
711at this low a level. A valid use case could change that.
712
713*/
714
715PERL_STATIC_INLINE bool
716S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
717{
718 const U8 * first_variant;
719
720 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
721
722 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
723 return FALSE;
724 }
725
726 return is_utf8_string(first_variant, len - (first_variant - s));
727}
728
729#endif
730
5ff889fb 731/*
9f2abfde
KW
732=for apidoc is_strict_utf8_string
733
734Returns TRUE if the first C<len> bytes of string C<s> form a valid
735UTF-8-encoded string that is fully interchangeable by any application using
736Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
737calculated using C<strlen(s)> (which means if you use this option, that C<s>
738can't have embedded C<NUL> characters and has to have a terminating C<NUL>
739byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
740
741This function returns FALSE for strings containing any
742code points above the Unicode max of 0x10FFFF, surrogate code points, or
743non-character code points.
744
745See also
746C<L</is_utf8_invariant_string>>,
0cbf5865 747C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
748C<L</is_utf8_string>>,
749C<L</is_utf8_string_flags>>,
750C<L</is_utf8_string_loc>>,
751C<L</is_utf8_string_loc_flags>>,
752C<L</is_utf8_string_loclen>>,
753C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
754C<L</is_utf8_fixed_width_buf_flags>>,
755C<L</is_utf8_fixed_width_buf_loc_flags>>,
756C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
757C<L</is_strict_utf8_string_loc>>,
758C<L</is_strict_utf8_string_loclen>>,
759C<L</is_c9strict_utf8_string>>,
760C<L</is_c9strict_utf8_string_loc>>,
761and
762C<L</is_c9strict_utf8_string_loclen>>.
763
764=cut
765*/
766
dd237e82 767#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
768
769/*
770=for apidoc is_c9strict_utf8_string
771
772Returns TRUE if the first C<len> bytes of string C<s> form a valid
773UTF-8-encoded string that conforms to
774L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
775otherwise it returns FALSE. If C<len> is 0, it will be calculated using
776C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
777C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
778characters being ASCII constitute 'a valid UTF-8 string'.
779
780This function returns FALSE for strings containing any code points above the
781Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
782code points per
783L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
784
785See also
786C<L</is_utf8_invariant_string>>,
0cbf5865 787C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
788C<L</is_utf8_string>>,
789C<L</is_utf8_string_flags>>,
790C<L</is_utf8_string_loc>>,
791C<L</is_utf8_string_loc_flags>>,
792C<L</is_utf8_string_loclen>>,
793C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
794C<L</is_utf8_fixed_width_buf_flags>>,
795C<L</is_utf8_fixed_width_buf_loc_flags>>,
796C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
797C<L</is_strict_utf8_string>>,
798C<L</is_strict_utf8_string_loc>>,
799C<L</is_strict_utf8_string_loclen>>,
800C<L</is_c9strict_utf8_string_loc>>,
801and
802C<L</is_c9strict_utf8_string_loclen>>.
803
804=cut
805*/
806
dd237e82 807#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
808
809/*
810=for apidoc is_utf8_string_flags
811
812Returns TRUE if the first C<len> bytes of string C<s> form a valid
813UTF-8 string, subject to the restrictions imposed by C<flags>;
814returns FALSE otherwise. If C<len> is 0, it will be calculated
815using C<strlen(s)> (which means if you use this option, that C<s> can't have
816embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
817that all characters being ASCII constitute 'a valid UTF-8 string'.
818
819If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
820C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
821as C<L</is_strict_utf8_string>>; and if C<flags> is
822C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
823C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
824combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
825C<L</utf8n_to_uvchr>>, with the same meanings.
826
827See also
828C<L</is_utf8_invariant_string>>,
0cbf5865 829C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
830C<L</is_utf8_string>>,
831C<L</is_utf8_string_loc>>,
832C<L</is_utf8_string_loc_flags>>,
833C<L</is_utf8_string_loclen>>,
834C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
835C<L</is_utf8_fixed_width_buf_flags>>,
836C<L</is_utf8_fixed_width_buf_loc_flags>>,
837C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
838C<L</is_strict_utf8_string>>,
839C<L</is_strict_utf8_string_loc>>,
840C<L</is_strict_utf8_string_loclen>>,
841C<L</is_c9strict_utf8_string>>,
842C<L</is_c9strict_utf8_string_loc>>,
843and
844C<L</is_c9strict_utf8_string_loclen>>.
845
846=cut
847*/
848
849PERL_STATIC_INLINE bool
f60f61fd 850S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 851{
33756530 852 const U8 * first_variant;
9f2abfde
KW
853
854 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
855 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 856 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 857
f60f61fd
KW
858 if (len == 0) {
859 len = strlen((const char *)s);
860 }
861
9f2abfde
KW
862 if (flags == 0) {
863 return is_utf8_string(s, len);
864 }
865
d044b7a7 866 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
867 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
868 {
869 return is_strict_utf8_string(s, len);
870 }
871
d044b7a7 872 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
873 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
874 {
875 return is_c9strict_utf8_string(s, len);
876 }
877
33756530
KW
878 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
879 const U8* const send = s + len;
880 const U8* x = first_variant;
881
a0d7f935
KW
882 while (x < send) {
883 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
884 if (UNLIKELY(! cur_len)) {
885 return FALSE;
886 }
887 x += cur_len;
9f2abfde 888 }
33756530 889 }
9f2abfde
KW
890
891 return TRUE;
892}
893
894/*
5ff889fb
KW
895
896=for apidoc is_utf8_string_loc
897
2717076a 898Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 899case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 900"utf8ness success") in the C<ep> pointer.
5ff889fb 901
2717076a 902See also C<L</is_utf8_string_loclen>>.
5ff889fb 903
3964c812
KW
904=cut
905*/
906
907#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
908
909/*
910
5ff889fb
KW
911=for apidoc is_utf8_string_loclen
912
2717076a 913Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 914case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 915"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 916encoded characters in the C<el> pointer.
5ff889fb 917
2717076a 918See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
919
920=cut
921*/
922
56e4cf64 923PERL_STATIC_INLINE bool
33756530 924Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 925{
33756530 926 const U8 * first_variant;
5ff889fb
KW
927
928 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
929
33756530
KW
930 if (len == 0) {
931 len = strlen((const char *) s);
932 }
933
934 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
935 if (el)
936 *el = len;
937
938 if (ep) {
939 *ep = s + len;
940 }
941
942 return TRUE;
943 }
944
945 {
946 const U8* const send = s + len;
947 const U8* x = first_variant;
948 STRLEN outlen = first_variant - s;
949
a0d7f935
KW
950 while (x < send) {
951 const STRLEN cur_len = isUTF8_CHAR(x, send);
952 if (UNLIKELY(! cur_len)) {
953 break;
954 }
955 x += cur_len;
956 outlen++;
5ff889fb 957 }
5ff889fb 958
a0d7f935
KW
959 if (el)
960 *el = outlen;
5ff889fb 961
a0d7f935
KW
962 if (ep) {
963 *ep = x;
964 }
5ff889fb 965
a0d7f935 966 return (x == send);
33756530 967 }
5ff889fb
KW
968}
969
970/*
9f2abfde
KW
971
972=for apidoc is_strict_utf8_string_loc
973
974Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
975case of "utf8ness failure") or the location C<s>+C<len> (in the case of
976"utf8ness success") in the C<ep> pointer.
977
978See also C<L</is_strict_utf8_string_loclen>>.
979
980=cut
981*/
982
983#define is_strict_utf8_string_loc(s, len, ep) \
984 is_strict_utf8_string_loclen(s, len, ep, 0)
985
986/*
987
988=for apidoc is_strict_utf8_string_loclen
989
990Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
991case of "utf8ness failure") or the location C<s>+C<len> (in the case of
992"utf8ness success") in the C<ep> pointer, and the number of UTF-8
993encoded characters in the C<el> pointer.
994
995See also C<L</is_strict_utf8_string_loc>>.
996
997=cut
998*/
999
1000PERL_STATIC_INLINE bool
33756530 1001S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1002{
33756530 1003 const U8 * first_variant;
9f2abfde
KW
1004
1005 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1006
33756530
KW
1007 if (len == 0) {
1008 len = strlen((const char *) s);
1009 }
1010
1011 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1012 if (el)
1013 *el = len;
1014
1015 if (ep) {
1016 *ep = s + len;
1017 }
1018
1019 return TRUE;
1020 }
1021
1022 {
1023 const U8* const send = s + len;
1024 const U8* x = first_variant;
1025 STRLEN outlen = first_variant - s;
1026
a0d7f935
KW
1027 while (x < send) {
1028 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1029 if (UNLIKELY(! cur_len)) {
1030 break;
1031 }
1032 x += cur_len;
1033 outlen++;
9f2abfde 1034 }
9f2abfde 1035
a0d7f935
KW
1036 if (el)
1037 *el = outlen;
9f2abfde 1038
a0d7f935
KW
1039 if (ep) {
1040 *ep = x;
1041 }
9f2abfde 1042
a0d7f935 1043 return (x == send);
33756530 1044 }
9f2abfde
KW
1045}
1046
1047/*
1048
1049=for apidoc is_c9strict_utf8_string_loc
1050
1051Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1052the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1053"utf8ness success") in the C<ep> pointer.
1054
1055See also C<L</is_c9strict_utf8_string_loclen>>.
1056
1057=cut
1058*/
1059
1060#define is_c9strict_utf8_string_loc(s, len, ep) \
1061 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1062
1063/*
1064
1065=for apidoc is_c9strict_utf8_string_loclen
1066
1067Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1068the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1069"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1070characters in the C<el> pointer.
1071
1072See also C<L</is_c9strict_utf8_string_loc>>.
1073
1074=cut
1075*/
1076
1077PERL_STATIC_INLINE bool
33756530 1078S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1079{
33756530 1080 const U8 * first_variant;
9f2abfde
KW
1081
1082 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1083
33756530
KW
1084 if (len == 0) {
1085 len = strlen((const char *) s);
1086 }
1087
1088 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1089 if (el)
1090 *el = len;
1091
1092 if (ep) {
1093 *ep = s + len;
1094 }
1095
1096 return TRUE;
1097 }
1098
1099 {
1100 const U8* const send = s + len;
1101 const U8* x = first_variant;
1102 STRLEN outlen = first_variant - s;
1103
a0d7f935
KW
1104 while (x < send) {
1105 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1106 if (UNLIKELY(! cur_len)) {
1107 break;
1108 }
1109 x += cur_len;
1110 outlen++;
9f2abfde 1111 }
9f2abfde 1112
a0d7f935
KW
1113 if (el)
1114 *el = outlen;
9f2abfde 1115
a0d7f935
KW
1116 if (ep) {
1117 *ep = x;
1118 }
9f2abfde 1119
a0d7f935 1120 return (x == send);
33756530 1121 }
9f2abfde
KW
1122}
1123
1124/*
1125
1126=for apidoc is_utf8_string_loc_flags
1127
1128Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1129case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1130"utf8ness success") in the C<ep> pointer.
1131
1132See also C<L</is_utf8_string_loclen_flags>>.
1133
1134=cut
1135*/
1136
1137#define is_utf8_string_loc_flags(s, len, ep, flags) \
1138 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1139
1140
1141/* The above 3 actual functions could have been moved into the more general one
1142 * just below, and made #defines that call it with the right 'flags'. They are
1143 * currently kept separate to increase their chances of getting inlined */
1144
1145/*
1146
1147=for apidoc is_utf8_string_loclen_flags
1148
1149Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1150case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1151"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1152encoded characters in the C<el> pointer.
1153
1154See also C<L</is_utf8_string_loc_flags>>.
1155
1156=cut
1157*/
1158
1159PERL_STATIC_INLINE bool
f60f61fd 1160S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1161{
33756530 1162 const U8 * first_variant;
9f2abfde
KW
1163
1164 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1165 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1166 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1167
f60f61fd 1168 if (len == 0) {
a0d7f935 1169 len = strlen((const char *) s);
f60f61fd
KW
1170 }
1171
9f2abfde
KW
1172 if (flags == 0) {
1173 return is_utf8_string_loclen(s, len, ep, el);
1174 }
1175
d044b7a7 1176 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1177 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1178 {
1179 return is_strict_utf8_string_loclen(s, len, ep, el);
1180 }
1181
d044b7a7 1182 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1183 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1184 {
1185 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1186 }
1187
33756530
KW
1188 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1189 if (el)
1190 *el = len;
1191
1192 if (ep) {
1193 *ep = s + len;
1194 }
1195
1196 return TRUE;
1197 }
1198
1199 {
1200 const U8* send = s + len;
1201 const U8* x = first_variant;
1202 STRLEN outlen = first_variant - s;
1203
a0d7f935
KW
1204 while (x < send) {
1205 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1206 if (UNLIKELY(! cur_len)) {
1207 break;
1208 }
1209 x += cur_len;
1210 outlen++;
9f2abfde 1211 }
9f2abfde 1212
a0d7f935
KW
1213 if (el)
1214 *el = outlen;
9f2abfde 1215
a0d7f935
KW
1216 if (ep) {
1217 *ep = x;
1218 }
9f2abfde 1219
a0d7f935 1220 return (x == send);
33756530 1221 }
9f2abfde
KW
1222}
1223
1224/*
7c93d8f0
KW
1225=for apidoc utf8_distance
1226
1227Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1228and C<b>.
1229
1230WARNING: use only if you *know* that the pointers point inside the
1231same UTF-8 buffer.
1232
1233=cut
1234*/
1235
1236PERL_STATIC_INLINE IV
1237Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1238{
1239 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1240
1241 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1242}
1243
1244/*
1245=for apidoc utf8_hop
1246
1247Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1248forward or backward.
1249
1250WARNING: do not use the following unless you *know* C<off> is within
1251the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1252on the first byte of character or just after the last byte of a character.
1253
1254=cut
1255*/
1256
1257PERL_STATIC_INLINE U8 *
1258Perl_utf8_hop(const U8 *s, SSize_t off)
1259{
1260 PERL_ARGS_ASSERT_UTF8_HOP;
1261
1262 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1263 * the bitops (especially ~) can create illegal UTF-8.
1264 * In other words: in Perl UTF-8 is not just for Unicode. */
1265
1266 if (off >= 0) {
1267 while (off--)
1268 s += UTF8SKIP(s);
1269 }
1270 else {
1271 while (off++) {
1272 s--;
1273 while (UTF8_IS_CONTINUATION(*s))
1274 s--;
1275 }
1276 }
7347ee54 1277 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1278 return (U8 *)s;
7347ee54 1279 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1280}
1281
4dab108f 1282/*
65df57a8
TC
1283=for apidoc utf8_hop_forward
1284
1285Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1286forward.
1287
1288C<off> must be non-negative.
1289
1290C<s> must be before or equal to C<end>.
1291
1292When moving forward it will not move beyond C<end>.
1293
1294Will not exceed this limit even if the string is not valid "UTF-8".
1295
1296=cut
1297*/
1298
1299PERL_STATIC_INLINE U8 *
1300Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1301{
1302 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1303
1304 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1305 * the bitops (especially ~) can create illegal UTF-8.
1306 * In other words: in Perl UTF-8 is not just for Unicode. */
1307
1308 assert(s <= end);
1309 assert(off >= 0);
1310
1311 while (off--) {
1312 STRLEN skip = UTF8SKIP(s);
de979548 1313 if ((STRLEN)(end - s) <= skip) {
7347ee54 1314 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1315 return (U8 *)end;
7347ee54 1316 GCC_DIAG_RESTORE_STMT;
de979548 1317 }
65df57a8
TC
1318 s += skip;
1319 }
1320
7347ee54 1321 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1322 return (U8 *)s;
7347ee54 1323 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1324}
1325
1326/*
1327=for apidoc utf8_hop_back
1328
1329Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1330backward.
1331
1332C<off> must be non-positive.
1333
1334C<s> must be after or equal to C<start>.
1335
1336When moving backward it will not move before C<start>.
1337
1338Will not exceed this limit even if the string is not valid "UTF-8".
1339
1340=cut
1341*/
1342
1343PERL_STATIC_INLINE U8 *
1344Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1345{
1346 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1347
1348 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1349 * the bitops (especially ~) can create illegal UTF-8.
1350 * In other words: in Perl UTF-8 is not just for Unicode. */
1351
1352 assert(start <= s);
1353 assert(off <= 0);
1354
1355 while (off++ && s > start) {
1356 s--;
1357 while (UTF8_IS_CONTINUATION(*s) && s > start)
1358 s--;
1359 }
1360
7347ee54 1361 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1362 return (U8 *)s;
7347ee54 1363 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1364}
1365
1366/*
1367=for apidoc utf8_hop_safe
1368
1369Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1370either forward or backward.
1371
1372When moving backward it will not move before C<start>.
1373
1374When moving forward it will not move beyond C<end>.
1375
1376Will not exceed those limits even if the string is not valid "UTF-8".
1377
1378=cut
1379*/
1380
1381PERL_STATIC_INLINE U8 *
1382Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1383{
1384 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1385
1386 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1387 * the bitops (especially ~) can create illegal UTF-8.
1388 * In other words: in Perl UTF-8 is not just for Unicode. */
1389
1390 assert(start <= s && s <= end);
1391
1392 if (off >= 0) {
1393 return utf8_hop_forward(s, off, end);
1394 }
1395 else {
1396 return utf8_hop_back(s, off, start);
1397 }
1398}
1399
1400/*
4dab108f
KW
1401
1402=for apidoc is_utf8_valid_partial_char
1403
6cbb9248
KW
1404Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1405S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1406points. Otherwise, it returns 1 if there exists at least one non-empty
1407sequence of bytes that when appended to sequence C<s>, starting at position
1408C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1409otherwise returns 0.
1410
1411In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1412point.
1413
1414This is useful when a fixed-length buffer is being tested for being well-formed
1415UTF-8, but the final few bytes in it don't comprise a full character; that is,
1416it is split somewhere in the middle of the final code point's UTF-8
1417representation. (Presumably when the buffer is refreshed with the next chunk
1418of data, the new first bytes will complete the partial code point.) This
1419function is used to verify that the final bytes in the current buffer are in
1420fact the legal beginning of some code point, so that if they aren't, the
1421failure can be signalled without having to wait for the next read.
4dab108f
KW
1422
1423=cut
1424*/
2717076a
KW
1425#define is_utf8_valid_partial_char(s, e) \
1426 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1427
1428/*
1429
1430=for apidoc is_utf8_valid_partial_char_flags
1431
1432Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1433or not the input is a valid UTF-8 encoded partial character, but it takes an
1434extra parameter, C<flags>, which can further restrict which code points are
1435considered valid.
1436
1437If C<flags> is 0, this behaves identically to
1438C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1439of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1440there is any sequence of bytes that can complete the input partial character in
1441such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1442TRUE; otherwise FALSE. Non character code points cannot be determined based on
1443partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1444determined from just the first one or two bytes.
1445
1446=cut
1447 */
1448
56e4cf64 1449PERL_STATIC_INLINE bool
f1c999a7 1450S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1451{
f1c999a7 1452 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1453
f1c999a7 1454 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1455 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1456
8875bd48 1457 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1458 return FALSE;
1459 }
1460
f1c999a7 1461 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1462}
1463
8bc127bf
KW
1464/*
1465
1466=for apidoc is_utf8_fixed_width_buf_flags
1467
1468Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1469is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1470otherwise it returns FALSE.
1471
1472If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1473without restriction. If the final few bytes of the buffer do not form a
1474complete code point, this will return TRUE anyway, provided that
1475C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1476
1477If C<flags> in non-zero, it can be any combination of the
1478C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1479same meanings.
1480
1481This function differs from C<L</is_utf8_string_flags>> only in that the latter
1482returns FALSE if the final few bytes of the string don't form a complete code
1483point.
1484
1485=cut
1486 */
1487#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1488 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1489
1490/*
1491
1492=for apidoc is_utf8_fixed_width_buf_loc_flags
1493
1494Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1495failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1496to the beginning of any partial character at the end of the buffer; if there is
1497no partial character C<*ep> will contain C<s>+C<len>.
1498
1499See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1500
1501=cut
1502*/
1503
1504#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1505 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1506
1507/*
1508
1509=for apidoc is_utf8_fixed_width_buf_loclen_flags
1510
1511Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1512complete, valid characters found in the C<el> pointer.
1513
1514=cut
1515*/
1516
1517PERL_STATIC_INLINE bool
1518S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1519 STRLEN len,
8bc127bf
KW
1520 const U8 **ep,
1521 STRLEN *el,
1522 const U32 flags)
1523{
1524 const U8 * maybe_partial;
1525
1526 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1527
1528 if (! ep) {
1529 ep = &maybe_partial;
1530 }
1531
1532 /* If it's entirely valid, return that; otherwise see if the only error is
1533 * that the final few bytes are for a partial character */
1534 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1535 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1536}
1537
c8028aa6
TC
1538/* ------------------------------- perl.h ----------------------------- */
1539
1540/*
dcccc8ff
KW
1541=head1 Miscellaneous Functions
1542
41188aa0 1543=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1544
6602b933 1545Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1546If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1547
1548Return TRUE if the name is safe.
1549
796b6530 1550Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1551
1552=cut
1553*/
1554
1555PERL_STATIC_INLINE bool
41188aa0 1556S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1557 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1558 * perl itself uses xce*() functions which accept 8-bit strings.
1559 */
1560
1561 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1562
6c4650b3 1563 if (len > 1) {
c8028aa6 1564 char *null_at;
41188aa0 1565 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1566 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1567 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1568 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1569 what, op_name, pv, null_at+1);
c8028aa6
TC
1570 return FALSE;
1571 }
1572 }
1573
1574 return TRUE;
1575}
1576
1577/*
7cb3f959
TC
1578
1579Return true if the supplied filename has a newline character
fa6c7d00 1580immediately before the first (hopefully only) NUL.
7cb3f959
TC
1581
1582My original look at this incorrectly used the len from SvPV(), but
1583that's incorrect, since we allow for a NUL in pv[len-1].
1584
1585So instead, strlen() and work from there.
1586
1587This allow for the user reading a filename, forgetting to chomp it,
1588then calling:
1589
1590 open my $foo, "$file\0";
1591
1592*/
1593
1594#ifdef PERL_CORE
1595
1596PERL_STATIC_INLINE bool
1597S_should_warn_nl(const char *pv) {
1598 STRLEN len;
1599
1600 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1601
1602 len = strlen(pv);
1603
1604 return len > 0 && pv[len-1] == '\n';
1605}
1606
1607#endif
1608
81d52ecd
JH
1609/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1610
1611#define MAX_CHARSET_NAME_LENGTH 2
1612
1613PERL_STATIC_INLINE const char *
1614get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1615{
1616 /* Returns a string that corresponds to the name of the regex character set
1617 * given by 'flags', and *lenp is set the length of that string, which
1618 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1619
1620 *lenp = 1;
1621 switch (get_regex_charset(flags)) {
1622 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1623 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1624 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1625 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1626 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1627 *lenp = 2;
1628 return ASCII_MORE_RESTRICT_PAT_MODS;
1629 }
1630 /* The NOT_REACHED; hides an assert() which has a rather complex
1631 * definition in perl.h. */
1632 NOT_REACHED; /* NOTREACHED */
1633 return "?"; /* Unknown */
1634}
1635
7cb3f959 1636/*
ed382232
TC
1637
1638Return false if any get magic is on the SV other than taint magic.
1639
1640*/
1641
1642PERL_STATIC_INLINE bool
1643S_sv_only_taint_gmagic(SV *sv) {
1644 MAGIC *mg = SvMAGIC(sv);
1645
1646 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1647
1648 while (mg) {
1649 if (mg->mg_type != PERL_MAGIC_taint
1650 && !(mg->mg_flags & MGf_GSKIP)
1651 && mg->mg_virtual->svt_get) {
1652 return FALSE;
1653 }
1654 mg = mg->mg_moremagic;
1655 }
1656
1657 return TRUE;
1658}
1659
ed8ff0f3
DM
1660/* ------------------ cop.h ------------------------------------------- */
1661
1662
1663/* Enter a block. Push a new base context and return its address. */
1664
1665PERL_STATIC_INLINE PERL_CONTEXT *
1666S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1667{
1668 PERL_CONTEXT * cx;
1669
1670 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1671
1672 CXINC;
1673 cx = CX_CUR();
1674 cx->cx_type = type;
1675 cx->blk_gimme = gimme;
1676 cx->blk_oldsaveix = saveix;
4caf7d8c 1677 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1678 cx->blk_oldcop = PL_curcop;
4caf7d8c 1679 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1680 cx->blk_oldscopesp = PL_scopestack_ix;
1681 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1682 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1683
1684 PL_tmps_floor = PL_tmps_ix;
1685 CX_DEBUG(cx, "PUSH");
1686 return cx;
1687}
1688
1689
1690/* Exit a block (RETURN and LAST). */
1691
1692PERL_STATIC_INLINE void
1693S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1694{
1695 PERL_ARGS_ASSERT_CX_POPBLOCK;
1696
1697 CX_DEBUG(cx, "POP");
1698 /* these 3 are common to cx_popblock and cx_topblock */
1699 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1700 PL_scopestack_ix = cx->blk_oldscopesp;
1701 PL_curpm = cx->blk_oldpm;
1702
1703 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1704 * and leaves a CX entry lying around for repeated use, so
1705 * skip for multicall */ \
1706 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1707 || PL_savestack_ix == cx->blk_oldsaveix);
1708 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1709 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1710}
1711
1712/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1713 * Whereas cx_popblock() restores the state to the point just before
1714 * cx_pushblock() was called, cx_topblock() restores it to the point just
1715 * *after* cx_pushblock() was called. */
1716
1717PERL_STATIC_INLINE void
1718S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1719{
1720 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1721
1722 CX_DEBUG(cx, "TOP");
1723 /* these 3 are common to cx_popblock and cx_topblock */
1724 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1725 PL_scopestack_ix = cx->blk_oldscopesp;
1726 PL_curpm = cx->blk_oldpm;
1727
1728 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1729}
1730
1731
a73d8813
DM
1732PERL_STATIC_INLINE void
1733S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1734{
1735 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1736
1737 PERL_ARGS_ASSERT_CX_PUSHSUB;
1738
3f6bd23a 1739 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1740 cx->blk_sub.cv = cv;
1741 cx->blk_sub.olddepth = CvDEPTH(cv);
1742 cx->blk_sub.prevcomppad = PL_comppad;
1743 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1744 cx->blk_sub.retop = retop;
1745 SvREFCNT_inc_simple_void_NN(cv);
1746 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1747}
1748
1749
1750/* subsets of cx_popsub() */
1751
1752PERL_STATIC_INLINE void
1753S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1754{
1755 CV *cv;
1756
1757 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1758 assert(CxTYPE(cx) == CXt_SUB);
1759
1760 PL_comppad = cx->blk_sub.prevcomppad;
1761 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1762 cv = cx->blk_sub.cv;
1763 CvDEPTH(cv) = cx->blk_sub.olddepth;
1764 cx->blk_sub.cv = NULL;
1765 SvREFCNT_dec(cv);
1766}
1767
1768
1769/* handle the @_ part of leaving a sub */
1770
1771PERL_STATIC_INLINE void
1772S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1773{
1774 AV *av;
1775
1776 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1777 assert(CxTYPE(cx) == CXt_SUB);
1778 assert(AvARRAY(MUTABLE_AV(
1779 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1780 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1781
1782 CX_POP_SAVEARRAY(cx);
1783 av = MUTABLE_AV(PAD_SVl(0));
1784 if (UNLIKELY(AvREAL(av)))
1785 /* abandon @_ if it got reified */
1786 clear_defarray(av, 0);
1787 else {
1788 CLEAR_ARGARRAY(av);
1789 }
1790}
1791
1792
1793PERL_STATIC_INLINE void
1794S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1795{
1796 PERL_ARGS_ASSERT_CX_POPSUB;
1797 assert(CxTYPE(cx) == CXt_SUB);
1798
3f6bd23a 1799 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1800
1801 if (CxHASARGS(cx))
1802 cx_popsub_args(cx);
1803 cx_popsub_common(cx);
1804}
1805
1806
6a7d52cc
DM
1807PERL_STATIC_INLINE void
1808S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1809{
1810 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1811
1812 cx->blk_format.cv = cv;
1813 cx->blk_format.retop = retop;
1814 cx->blk_format.gv = gv;
1815 cx->blk_format.dfoutgv = PL_defoutgv;
1816 cx->blk_format.prevcomppad = PL_comppad;
1817 cx->blk_u16 = 0;
1818
1819 SvREFCNT_inc_simple_void_NN(cv);
1820 CvDEPTH(cv)++;
1821 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1822}
1823
1824
1825PERL_STATIC_INLINE void
1826S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1827{
1828 CV *cv;
1829 GV *dfout;
1830
1831 PERL_ARGS_ASSERT_CX_POPFORMAT;
1832 assert(CxTYPE(cx) == CXt_FORMAT);
1833
1834 dfout = cx->blk_format.dfoutgv;
1835 setdefout(dfout);
1836 cx->blk_format.dfoutgv = NULL;
1837 SvREFCNT_dec_NN(dfout);
1838
1839 PL_comppad = cx->blk_format.prevcomppad;
1840 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1841 cv = cx->blk_format.cv;
1842 cx->blk_format.cv = NULL;
1843 --CvDEPTH(cv);
1844 SvREFCNT_dec_NN(cv);
1845}
1846
1847
13febba5
DM
1848PERL_STATIC_INLINE void
1849S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1850{
1851 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1852
1853 cx->blk_eval.retop = retop;
1854 cx->blk_eval.old_namesv = namesv;
1855 cx->blk_eval.old_eval_root = PL_eval_root;
1856 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1857 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1858 cx->blk_eval.cur_top_env = PL_top_env;
1859
4c57ced5 1860 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1861 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1862 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1863}
1864
1865
1866PERL_STATIC_INLINE void
1867S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1868{
1869 SV *sv;
1870
1871 PERL_ARGS_ASSERT_CX_POPEVAL;
1872 assert(CxTYPE(cx) == CXt_EVAL);
1873
1874 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1875 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1876 PL_eval_root = cx->blk_eval.old_eval_root;
1877 sv = cx->blk_eval.cur_text;
4c57ced5 1878 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1879 cx->blk_eval.cur_text = NULL;
1880 SvREFCNT_dec_NN(sv);
1881 }
1882
1883 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1884 if (sv) {
1885 cx->blk_eval.old_namesv = NULL;
1886 SvREFCNT_dec_NN(sv);
1887 }
13febba5 1888}
6a7d52cc 1889
a73d8813 1890
d1b6bf72
DM
1891/* push a plain loop, i.e.
1892 * { block }
1893 * while (cond) { block }
1894 * for (init;cond;continue) { block }
1895 * This loop can be last/redo'ed etc.
1896 */
1897
1898PERL_STATIC_INLINE void
1899S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1900{
1901 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1902 cx->blk_loop.my_op = cLOOP;
1903}
1904
1905
1906/* push a true for loop, i.e.
1907 * for var (list) { block }
1908 */
1909
1910PERL_STATIC_INLINE void
1911S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1912{
1913 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1914
1915 /* this one line is common with cx_pushloop_plain */
1916 cx->blk_loop.my_op = cLOOP;
1917
1918 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1919 cx->blk_loop.itersave = itersave;
1920#ifdef USE_ITHREADS
1921 cx->blk_loop.oldcomppad = PL_comppad;
1922#endif
1923}
1924
1925
1926/* pop all loop types, including plain */
1927
1928PERL_STATIC_INLINE void
1929S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1930{
1931 PERL_ARGS_ASSERT_CX_POPLOOP;
1932
1933 assert(CxTYPE_is_LOOP(cx));
1934 if ( CxTYPE(cx) == CXt_LOOP_ARY
1935 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1936 {
1937 /* Free ary or cur. This assumes that state_u.ary.ary
1938 * aligns with state_u.lazysv.cur. See cx_dup() */
1939 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1940 cx->blk_loop.state_u.lazysv.cur = NULL;
1941 SvREFCNT_dec_NN(sv);
1942 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1943 sv = cx->blk_loop.state_u.lazysv.end;
1944 cx->blk_loop.state_u.lazysv.end = NULL;
1945 SvREFCNT_dec_NN(sv);
1946 }
1947 }
1948 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1949 SV *cursv;
1950 SV **svp = (cx)->blk_loop.itervar_u.svp;
1951 if ((cx->cx_type & CXp_FOR_GV))
1952 svp = &GvSV((GV*)svp);
1953 cursv = *svp;
1954 *svp = cx->blk_loop.itersave;
1955 cx->blk_loop.itersave = NULL;
1956 SvREFCNT_dec(cursv);
1957 }
1958}
1959
2a7b7c61
DM
1960
1961PERL_STATIC_INLINE void
7896dde7 1962S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1963{
7896dde7 1964 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1965
7896dde7 1966 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1967}
1968
1969
1970PERL_STATIC_INLINE void
7896dde7 1971S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1972{
7896dde7
Z
1973 PERL_ARGS_ASSERT_CX_POPWHEN;
1974 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1975
1976 PERL_UNUSED_ARG(cx);
59a14f30 1977 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1978 /* currently NOOP */
1979}
1980
1981
7896dde7
Z
1982PERL_STATIC_INLINE void
1983S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1984{
1985 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1986
1987 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1988 cx->blk_givwhen.defsv_save = orig_defsv;
1989}
1990
1991
1992PERL_STATIC_INLINE void
1993S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1994{
1995 SV *sv;
1996
1997 PERL_ARGS_ASSERT_CX_POPGIVEN;
1998 assert(CxTYPE(cx) == CXt_GIVEN);
1999
2000 sv = GvSV(PL_defgv);
2001 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2002 cx->blk_givwhen.defsv_save = NULL;
2003 SvREFCNT_dec(sv);
2004}
2005
ec2c235b
KW
2006/* ------------------ util.h ------------------------------------------- */
2007
2008/*
2009=head1 Miscellaneous Functions
2010
2011=for apidoc foldEQ
2012
2013Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2014same
2015case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2016match themselves and their opposite case counterparts. Non-cased and non-ASCII
2017range bytes match only themselves.
2018
2019=cut
2020*/
2021
2022PERL_STATIC_INLINE I32
2023Perl_foldEQ(const char *s1, const char *s2, I32 len)
2024{
2025 const U8 *a = (const U8 *)s1;
2026 const U8 *b = (const U8 *)s2;
2027
2028 PERL_ARGS_ASSERT_FOLDEQ;
2029
2030 assert(len >= 0);
2031
2032 while (len--) {
2033 if (*a != *b && *a != PL_fold[*b])
2034 return 0;
2035 a++,b++;
2036 }
2037 return 1;
2038}
2039
0f9cb40c 2040PERL_STATIC_INLINE I32
ec2c235b
KW
2041Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2042{
2043 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2044 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2045 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2046 * does it check that the strings each have at least 'len' characters */
2047
2048 const U8 *a = (const U8 *)s1;
2049 const U8 *b = (const U8 *)s2;
2050
2051 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2052
2053 assert(len >= 0);
2054
2055 while (len--) {
2056 if (*a != *b && *a != PL_fold_latin1[*b]) {
2057 return 0;
2058 }
2059 a++, b++;
2060 }
2061 return 1;
2062}
2063
2064/*
2065=for apidoc foldEQ_locale
2066
2067Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2068same case-insensitively in the current locale; false otherwise.
2069
2070=cut
2071*/
2072
0f9cb40c 2073PERL_STATIC_INLINE I32
ec2c235b
KW
2074Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2075{
2076 dVAR;
2077 const U8 *a = (const U8 *)s1;
2078 const U8 *b = (const U8 *)s2;
2079
2080 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2081
2082 assert(len >= 0);
2083
2084 while (len--) {
2085 if (*a != *b && *a != PL_fold_locale[*b])
2086 return 0;
2087 a++,b++;
2088 }
2089 return 1;
2090}
2091
6dba01e2
KW
2092#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2093
2094PERL_STATIC_INLINE void *
2095S_my_memrchr(const char * s, const char c, const STRLEN len)
2096{
2097 /* memrchr(), since many platforms lack it */
2098
2099 const char * t = s + len - 1;
2100
2101 PERL_ARGS_ASSERT_MY_MEMRCHR;
2102
2103 while (t >= s) {
2104 if (*t == c) {
2105 return (void *) t;
2106 }
2107 t--;
2108 }
2109
2110 return NULL;
2111}
2112
2113#endif
2114
ed382232 2115/*
c8028aa6
TC
2116 * ex: set ts=8 sts=4 sw=4 et:
2117 */