This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/APItest.xs: Simplify mappings
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
385#ifndef EBCDIC
4ab2fd9b
KW
386
387/* This looks like 0x010101... */
388#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
389
390/* This looks like 0x808080... */
391#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
392#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
393#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 394
099e59a4
KW
395/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
396 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
397 * optimized out completely on a 32-bit system, and its mask gets optimized out
398 * on a 64-bit system */
5eabe374
KW
399#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
400 | ( PTR2nat(x) >> 1) \
401 | ( ( (PTR2nat(x) \
402 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4 403
0b08cab0
KW
404# ifndef USING_MSVC6
405
099e59a4
KW
406 /* Do the word-at-a-time iff there is at least one usable full word. That
407 * means that after advancing to a word boundary, there still is at least a
408 * full word left. The number of bytes needed to advance is 'wordsize -
409 * offset' unless offset is 0. */
410 if ((STRLEN) (send - x) >= PERL_WORDSIZE
411
412 /* This term is wordsize if subword; 0 if not */
413 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
414
415 /* 'offset' */
416 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
417 {
b40579ff 418
46bb68f6
KW
419 /* Process per-byte until reach word boundary. XXX This loop could be
420 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 421 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
422 if (! UTF8_IS_INVARIANT(*x)) {
423 if (ep) {
424 *ep = x;
425 }
e17544a6 426
46bb68f6
KW
427 return FALSE;
428 }
429 x++;
e17544a6 430 }
e17544a6 431
099e59a4
KW
432 /* Here, we know we have at least one full word to process. Process
433 * per-word as long as we have at least a full word left */
434 do {
4ab2fd9b 435 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 436
46bb68f6
KW
437 /* Found a variant. Just return if caller doesn't want its
438 * exact position */
439 if (! ep) {
440 return FALSE;
441 }
e17544a6 442
1d2af574
KW
443#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
444 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
445
446 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
447 assert(*ep >= s && *ep < send);
448
449 return FALSE;
450
451#else /* If weird byte order, drop into next loop to do byte-at-a-time
452 checks. */
453
46bb68f6 454 break;
1d2af574 455#endif
46bb68f6 456 }
1d2af574 457
46bb68f6 458 x += PERL_WORDSIZE;
1d2af574 459
099e59a4 460 } while (x + PERL_WORDSIZE <= send);
b40579ff 461 }
e17544a6 462
0b08cab0
KW
463# endif /* End of ! MSVC6 */
464#endif /* End of ! EBCDIC */
e17544a6
KW
465
466 /* Process per-byte */
467 while (x < send) {
468 if (! UTF8_IS_INVARIANT(*x)) {
469 if (ep) {
470 *ep = x;
471 }
0cbf5865 472
e17544a6 473 return FALSE;
0cbf5865 474 }
1e599354 475
e17544a6 476 x++;
1e599354
KW
477 }
478
479 return TRUE;
480}
481
0b08cab0
KW
482#if ! defined(EBCDIC) && ! defined(USING_MSVC6)
483
484/* Apparent compiler error with MSVC6, so can't use this function. All callers
485 * to it must be compiled to use the EBCDIC fallback on MSVC6 */
1d2af574
KW
486
487PERL_STATIC_INLINE unsigned int
488S__variant_byte_number(PERL_UINTMAX_T word)
489{
490
491 /* This returns the position in a word (0..7) of the first variant byte in
492 * it. This is a helper function. Note that there are no branches */
493
494 assert(word);
495
496 /* Get just the msb bits of each byte */
497 word &= PERL_VARIANTS_WORD_MASK;
498
499# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
500
501 /* Bytes are stored like
502 * Byte8 ... Byte2 Byte1
503 * 63..56...15...8 7...0
504 *
505 * Isolate the lsb;
506 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
507 *
508 * The word will look this this, with a rightmost set bit in position 's':
509 * ('x's are don't cares)
510 * s
511 * x..x100..0
512 * x..xx10..0 Right shift (rightmost 0 is shifted off)
513 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
514 * the 1 just to their left into a 0; the remainder is
515 * untouched
516 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
517 * bottom to all 1
518 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
519 *
520 * Another method is to do 'word &= -word'; but it generates a compiler
521 * message on some platforms about taking the negative of an unsigned */
522
523 word >>= 1;
524 word = 1 + (word ^ (word - 1));
525
526# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
527
528 /* Bytes are stored like
529 * Byte1 Byte2 ... Byte8
530 * 63..56 55..47 ... 7...0
531 *
532 * Isolate the msb; http://codeforces.com/blog/entry/10330
533 *
534 * Only the most significant set bit matters. Or'ing word with its right
535 * shift of 1 makes that bit and the next one to its right both 1. Then
536 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
537 * msb and all to the right being 1. */
538 word |= word >> 1;
539 word |= word >> 2;
540 word |= word >> 4;
541 word |= word >> 8;
542 word |= word >> 16;
543 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
544
545 /* Then subtracting the right shift by 1 clears all but the left-most of
546 * the 1 bits, which is our desired result */
547 word -= (word >> 1);
548
549# else
550# error Unexpected byte order
551# endif
552
7cf2d6c7
KW
553 /* Here 'word' has a single bit set: the msb of the first byte in which it
554 * is set. Calculate that position in the word. We can use this
1d2af574 555 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
67e12c5c
KW
556 * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
557 * just get shifted off at compile time) */
558 word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
559 | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
560 | (39 << 24) | (47 << 16)
561 | (55 << 8) | (63 << 0));
1d2af574
KW
562 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
563
564 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
565 word = ((word + 1) >> 3) - 1;
566
567# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
568
569 /* And invert the result */
570 word = CHARBITS - word - 1;
571
572# endif
573
574 return (unsigned int) word;
575}
576
577#endif /* ! EBCDIC */
03c1e4ab
KW
578#if defined(PERL_CORE) || defined(PERL_EXT)
579
580/*
581=for apidoc variant_under_utf8_count
582
583This function looks at the sequence of bytes between C<s> and C<e>, which are
584assumed to be encoded in ASCII/Latin1, and returns how many of them would
585change should the string be translated into UTF-8. Due to the nature of UTF-8,
586each of these would occupy two bytes instead of the single one in the input
587string. Thus, this function returns the precise number of bytes the string
588would expand by when translated to UTF-8.
589
590Unlike most of the other functions that have C<utf8> in their name, the input
591to this function is NOT a UTF-8-encoded string. The function name is slightly
592I<odd> to emphasize this.
593
594This function is internal to Perl because khw thinks that any XS code that
595would want this is probably operating too close to the internals. Presenting a
596valid use case could change that.
597
598See also
599C<L<perlapi/is_utf8_invariant_string>>
600and
601C<L<perlapi/is_utf8_invariant_string_loc>>,
602
603=cut
604
605*/
606
607PERL_STATIC_INLINE Size_t
608S_variant_under_utf8_count(const U8* const s, const U8* const e)
609{
610 const U8* x = s;
611 Size_t count = 0;
612
613 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
614
615# ifndef EBCDIC
616
5d0379de
KW
617 /* Test if the string is long enough to use word-at-a-time. (Logic is the
618 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
619 if ((STRLEN) (e - x) >= PERL_WORDSIZE
620 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
621 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
622 {
623
624 /* Process per-byte until reach word boundary. XXX This loop could be
625 * eliminated if we knew that this platform had fast unaligned reads */
626 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
627 count += ! UTF8_IS_INVARIANT(*x++);
628 }
629
630 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
631 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
632 explanation of how this works */
03c1e4ab
KW
633 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
634 * PERL_COUNT_MULTIPLIER)
635 >> ((PERL_WORDSIZE - 1) * CHARBITS);
636 x += PERL_WORDSIZE;
637 } while (x + PERL_WORDSIZE <= e);
638 }
639
640# endif
641
642 /* Process per-byte */
643 while (x < e) {
644 if (! UTF8_IS_INVARIANT(*x)) {
645 count++;
646 }
647
648 x++;
649 }
650
651 return count;
652}
653
654#endif
655
aff4cafe
KW
656#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
657# undef PERL_WORDSIZE
658# undef PERL_COUNT_MULTIPLIER
659# undef PERL_WORD_BOUNDARY_MASK
660# undef PERL_VARIANTS_WORD_MASK
661#endif
03c1e4ab 662
7c93d8f0 663/*
5ff889fb
KW
664=for apidoc is_utf8_string
665
82c5d941
KW
666Returns TRUE if the first C<len> bytes of string C<s> form a valid
667Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
668be calculated using C<strlen(s)> (which means if you use this option, that C<s>
669can't have embedded C<NUL> characters and has to have a terminating C<NUL>
670byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
671
2717076a
KW
672This function considers Perl's extended UTF-8 to be valid. That means that
673code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
674considered valid by this function. Use C<L</is_strict_utf8_string>>,
675C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
676code points are considered valid.
5ff889fb 677
9f2abfde
KW
678See also
679C<L</is_utf8_invariant_string>>,
0cbf5865 680C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
681C<L</is_utf8_string_loc>>,
682C<L</is_utf8_string_loclen>>,
8bc127bf
KW
683C<L</is_utf8_fixed_width_buf_flags>>,
684C<L</is_utf8_fixed_width_buf_loc_flags>>,
685C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
686
687=cut
688*/
689
dd237e82 690#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 691
c9cd936b
KW
692#if defined(PERL_CORE) || defined (PERL_EXT)
693
694/*
695=for apidoc is_utf8_non_invariant_string
696
697Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
698C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
699UTF-8; otherwise returns FALSE.
700
701A TRUE return means that at least one code point represented by the sequence
702either is a wide character not representable as a single byte, or the
703representation differs depending on whether the sequence is encoded in UTF-8 or
704not.
705
706See also
707C<L<perlapi/is_utf8_invariant_string>>,
708C<L<perlapi/is_utf8_string>>
709
710=cut
711
712This is commonly used to determine if a SV's UTF-8 flag should be turned on.
b3b93dfe
KW
713It generally needn't be if its string is entirely UTF-8 invariant, and it
714shouldn't be if it otherwise contains invalid UTF-8.
c9cd936b
KW
715
716It is an internal function because khw thinks that XS code shouldn't be working
717at this low a level. A valid use case could change that.
718
719*/
720
721PERL_STATIC_INLINE bool
722S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
723{
724 const U8 * first_variant;
725
726 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
727
728 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
729 return FALSE;
730 }
731
732 return is_utf8_string(first_variant, len - (first_variant - s));
733}
734
735#endif
736
5ff889fb 737/*
9f2abfde
KW
738=for apidoc is_strict_utf8_string
739
740Returns TRUE if the first C<len> bytes of string C<s> form a valid
741UTF-8-encoded string that is fully interchangeable by any application using
742Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
743calculated using C<strlen(s)> (which means if you use this option, that C<s>
744can't have embedded C<NUL> characters and has to have a terminating C<NUL>
745byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
746
747This function returns FALSE for strings containing any
748code points above the Unicode max of 0x10FFFF, surrogate code points, or
749non-character code points.
750
751See also
752C<L</is_utf8_invariant_string>>,
0cbf5865 753C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
754C<L</is_utf8_string>>,
755C<L</is_utf8_string_flags>>,
756C<L</is_utf8_string_loc>>,
757C<L</is_utf8_string_loc_flags>>,
758C<L</is_utf8_string_loclen>>,
759C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
760C<L</is_utf8_fixed_width_buf_flags>>,
761C<L</is_utf8_fixed_width_buf_loc_flags>>,
762C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
763C<L</is_strict_utf8_string_loc>>,
764C<L</is_strict_utf8_string_loclen>>,
765C<L</is_c9strict_utf8_string>>,
766C<L</is_c9strict_utf8_string_loc>>,
767and
768C<L</is_c9strict_utf8_string_loclen>>.
769
770=cut
771*/
772
dd237e82 773#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
774
775/*
776=for apidoc is_c9strict_utf8_string
777
778Returns TRUE if the first C<len> bytes of string C<s> form a valid
779UTF-8-encoded string that conforms to
780L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
781otherwise it returns FALSE. If C<len> is 0, it will be calculated using
782C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
783C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
784characters being ASCII constitute 'a valid UTF-8 string'.
785
786This function returns FALSE for strings containing any code points above the
787Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
788code points per
789L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
790
791See also
792C<L</is_utf8_invariant_string>>,
0cbf5865 793C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
794C<L</is_utf8_string>>,
795C<L</is_utf8_string_flags>>,
796C<L</is_utf8_string_loc>>,
797C<L</is_utf8_string_loc_flags>>,
798C<L</is_utf8_string_loclen>>,
799C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
800C<L</is_utf8_fixed_width_buf_flags>>,
801C<L</is_utf8_fixed_width_buf_loc_flags>>,
802C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
803C<L</is_strict_utf8_string>>,
804C<L</is_strict_utf8_string_loc>>,
805C<L</is_strict_utf8_string_loclen>>,
806C<L</is_c9strict_utf8_string_loc>>,
807and
808C<L</is_c9strict_utf8_string_loclen>>.
809
810=cut
811*/
812
dd237e82 813#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
814
815/*
816=for apidoc is_utf8_string_flags
817
818Returns TRUE if the first C<len> bytes of string C<s> form a valid
819UTF-8 string, subject to the restrictions imposed by C<flags>;
820returns FALSE otherwise. If C<len> is 0, it will be calculated
821using C<strlen(s)> (which means if you use this option, that C<s> can't have
822embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
823that all characters being ASCII constitute 'a valid UTF-8 string'.
824
825If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
826C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
827as C<L</is_strict_utf8_string>>; and if C<flags> is
828C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
829C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
830combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
831C<L</utf8n_to_uvchr>>, with the same meanings.
832
833See also
834C<L</is_utf8_invariant_string>>,
0cbf5865 835C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
836C<L</is_utf8_string>>,
837C<L</is_utf8_string_loc>>,
838C<L</is_utf8_string_loc_flags>>,
839C<L</is_utf8_string_loclen>>,
840C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
841C<L</is_utf8_fixed_width_buf_flags>>,
842C<L</is_utf8_fixed_width_buf_loc_flags>>,
843C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
844C<L</is_strict_utf8_string>>,
845C<L</is_strict_utf8_string_loc>>,
846C<L</is_strict_utf8_string_loclen>>,
847C<L</is_c9strict_utf8_string>>,
848C<L</is_c9strict_utf8_string_loc>>,
849and
850C<L</is_c9strict_utf8_string_loclen>>.
851
852=cut
853*/
854
855PERL_STATIC_INLINE bool
f60f61fd 856S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 857{
33756530 858 const U8 * first_variant;
9f2abfde
KW
859
860 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
861 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 862 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 863
f60f61fd
KW
864 if (len == 0) {
865 len = strlen((const char *)s);
866 }
867
9f2abfde
KW
868 if (flags == 0) {
869 return is_utf8_string(s, len);
870 }
871
d044b7a7 872 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
873 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
874 {
875 return is_strict_utf8_string(s, len);
876 }
877
d044b7a7 878 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
879 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
880 {
881 return is_c9strict_utf8_string(s, len);
882 }
883
33756530
KW
884 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
885 const U8* const send = s + len;
886 const U8* x = first_variant;
887
a0d7f935
KW
888 while (x < send) {
889 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
890 if (UNLIKELY(! cur_len)) {
891 return FALSE;
892 }
893 x += cur_len;
9f2abfde 894 }
33756530 895 }
9f2abfde
KW
896
897 return TRUE;
898}
899
900/*
5ff889fb
KW
901
902=for apidoc is_utf8_string_loc
903
2717076a 904Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 905case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 906"utf8ness success") in the C<ep> pointer.
5ff889fb 907
2717076a 908See also C<L</is_utf8_string_loclen>>.
5ff889fb 909
3964c812
KW
910=cut
911*/
912
913#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
914
915/*
916
5ff889fb
KW
917=for apidoc is_utf8_string_loclen
918
2717076a 919Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 920case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 921"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 922encoded characters in the C<el> pointer.
5ff889fb 923
2717076a 924See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
925
926=cut
927*/
928
56e4cf64 929PERL_STATIC_INLINE bool
33756530 930Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 931{
33756530 932 const U8 * first_variant;
5ff889fb
KW
933
934 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
935
33756530
KW
936 if (len == 0) {
937 len = strlen((const char *) s);
938 }
939
940 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
941 if (el)
942 *el = len;
943
944 if (ep) {
945 *ep = s + len;
946 }
947
948 return TRUE;
949 }
950
951 {
952 const U8* const send = s + len;
953 const U8* x = first_variant;
954 STRLEN outlen = first_variant - s;
955
a0d7f935
KW
956 while (x < send) {
957 const STRLEN cur_len = isUTF8_CHAR(x, send);
958 if (UNLIKELY(! cur_len)) {
959 break;
960 }
961 x += cur_len;
962 outlen++;
5ff889fb 963 }
5ff889fb 964
a0d7f935
KW
965 if (el)
966 *el = outlen;
5ff889fb 967
a0d7f935
KW
968 if (ep) {
969 *ep = x;
970 }
5ff889fb 971
a0d7f935 972 return (x == send);
33756530 973 }
5ff889fb
KW
974}
975
976/*
9f2abfde
KW
977
978=for apidoc is_strict_utf8_string_loc
979
980Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
981case of "utf8ness failure") or the location C<s>+C<len> (in the case of
982"utf8ness success") in the C<ep> pointer.
983
984See also C<L</is_strict_utf8_string_loclen>>.
985
986=cut
987*/
988
989#define is_strict_utf8_string_loc(s, len, ep) \
990 is_strict_utf8_string_loclen(s, len, ep, 0)
991
992/*
993
994=for apidoc is_strict_utf8_string_loclen
995
996Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
997case of "utf8ness failure") or the location C<s>+C<len> (in the case of
998"utf8ness success") in the C<ep> pointer, and the number of UTF-8
999encoded characters in the C<el> pointer.
1000
1001See also C<L</is_strict_utf8_string_loc>>.
1002
1003=cut
1004*/
1005
1006PERL_STATIC_INLINE bool
33756530 1007S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1008{
33756530 1009 const U8 * first_variant;
9f2abfde
KW
1010
1011 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1012
33756530
KW
1013 if (len == 0) {
1014 len = strlen((const char *) s);
1015 }
1016
1017 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1018 if (el)
1019 *el = len;
1020
1021 if (ep) {
1022 *ep = s + len;
1023 }
1024
1025 return TRUE;
1026 }
1027
1028 {
1029 const U8* const send = s + len;
1030 const U8* x = first_variant;
1031 STRLEN outlen = first_variant - s;
1032
a0d7f935
KW
1033 while (x < send) {
1034 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1035 if (UNLIKELY(! cur_len)) {
1036 break;
1037 }
1038 x += cur_len;
1039 outlen++;
9f2abfde 1040 }
9f2abfde 1041
a0d7f935
KW
1042 if (el)
1043 *el = outlen;
9f2abfde 1044
a0d7f935
KW
1045 if (ep) {
1046 *ep = x;
1047 }
9f2abfde 1048
a0d7f935 1049 return (x == send);
33756530 1050 }
9f2abfde
KW
1051}
1052
1053/*
1054
1055=for apidoc is_c9strict_utf8_string_loc
1056
1057Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1058the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1059"utf8ness success") in the C<ep> pointer.
1060
1061See also C<L</is_c9strict_utf8_string_loclen>>.
1062
1063=cut
1064*/
1065
1066#define is_c9strict_utf8_string_loc(s, len, ep) \
1067 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1068
1069/*
1070
1071=for apidoc is_c9strict_utf8_string_loclen
1072
1073Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1074the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1075"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1076characters in the C<el> pointer.
1077
1078See also C<L</is_c9strict_utf8_string_loc>>.
1079
1080=cut
1081*/
1082
1083PERL_STATIC_INLINE bool
33756530 1084S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1085{
33756530 1086 const U8 * first_variant;
9f2abfde
KW
1087
1088 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1089
33756530
KW
1090 if (len == 0) {
1091 len = strlen((const char *) s);
1092 }
1093
1094 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1095 if (el)
1096 *el = len;
1097
1098 if (ep) {
1099 *ep = s + len;
1100 }
1101
1102 return TRUE;
1103 }
1104
1105 {
1106 const U8* const send = s + len;
1107 const U8* x = first_variant;
1108 STRLEN outlen = first_variant - s;
1109
a0d7f935
KW
1110 while (x < send) {
1111 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1112 if (UNLIKELY(! cur_len)) {
1113 break;
1114 }
1115 x += cur_len;
1116 outlen++;
9f2abfde 1117 }
9f2abfde 1118
a0d7f935
KW
1119 if (el)
1120 *el = outlen;
9f2abfde 1121
a0d7f935
KW
1122 if (ep) {
1123 *ep = x;
1124 }
9f2abfde 1125
a0d7f935 1126 return (x == send);
33756530 1127 }
9f2abfde
KW
1128}
1129
1130/*
1131
1132=for apidoc is_utf8_string_loc_flags
1133
1134Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1135case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1136"utf8ness success") in the C<ep> pointer.
1137
1138See also C<L</is_utf8_string_loclen_flags>>.
1139
1140=cut
1141*/
1142
1143#define is_utf8_string_loc_flags(s, len, ep, flags) \
1144 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1145
1146
1147/* The above 3 actual functions could have been moved into the more general one
1148 * just below, and made #defines that call it with the right 'flags'. They are
1149 * currently kept separate to increase their chances of getting inlined */
1150
1151/*
1152
1153=for apidoc is_utf8_string_loclen_flags
1154
1155Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1156case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1157"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1158encoded characters in the C<el> pointer.
1159
1160See also C<L</is_utf8_string_loc_flags>>.
1161
1162=cut
1163*/
1164
1165PERL_STATIC_INLINE bool
f60f61fd 1166S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1167{
33756530 1168 const U8 * first_variant;
9f2abfde
KW
1169
1170 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1171 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1172 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1173
f60f61fd 1174 if (len == 0) {
a0d7f935 1175 len = strlen((const char *) s);
f60f61fd
KW
1176 }
1177
9f2abfde
KW
1178 if (flags == 0) {
1179 return is_utf8_string_loclen(s, len, ep, el);
1180 }
1181
d044b7a7 1182 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1183 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1184 {
1185 return is_strict_utf8_string_loclen(s, len, ep, el);
1186 }
1187
d044b7a7 1188 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1189 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1190 {
1191 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1192 }
1193
33756530
KW
1194 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1195 if (el)
1196 *el = len;
1197
1198 if (ep) {
1199 *ep = s + len;
1200 }
1201
1202 return TRUE;
1203 }
1204
1205 {
1206 const U8* send = s + len;
1207 const U8* x = first_variant;
1208 STRLEN outlen = first_variant - s;
1209
a0d7f935
KW
1210 while (x < send) {
1211 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1212 if (UNLIKELY(! cur_len)) {
1213 break;
1214 }
1215 x += cur_len;
1216 outlen++;
9f2abfde 1217 }
9f2abfde 1218
a0d7f935
KW
1219 if (el)
1220 *el = outlen;
9f2abfde 1221
a0d7f935
KW
1222 if (ep) {
1223 *ep = x;
1224 }
9f2abfde 1225
a0d7f935 1226 return (x == send);
33756530 1227 }
9f2abfde
KW
1228}
1229
1230/*
7c93d8f0
KW
1231=for apidoc utf8_distance
1232
1233Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1234and C<b>.
1235
1236WARNING: use only if you *know* that the pointers point inside the
1237same UTF-8 buffer.
1238
1239=cut
1240*/
1241
1242PERL_STATIC_INLINE IV
1243Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1244{
1245 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1246
1247 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1248}
1249
1250/*
1251=for apidoc utf8_hop
1252
1253Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1254forward or backward.
1255
1256WARNING: do not use the following unless you *know* C<off> is within
1257the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1258on the first byte of character or just after the last byte of a character.
1259
1260=cut
1261*/
1262
1263PERL_STATIC_INLINE U8 *
1264Perl_utf8_hop(const U8 *s, SSize_t off)
1265{
1266 PERL_ARGS_ASSERT_UTF8_HOP;
1267
1268 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1269 * the bitops (especially ~) can create illegal UTF-8.
1270 * In other words: in Perl UTF-8 is not just for Unicode. */
1271
1272 if (off >= 0) {
1273 while (off--)
1274 s += UTF8SKIP(s);
1275 }
1276 else {
1277 while (off++) {
1278 s--;
1279 while (UTF8_IS_CONTINUATION(*s))
1280 s--;
1281 }
1282 }
7347ee54 1283 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1284 return (U8 *)s;
7347ee54 1285 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1286}
1287
4dab108f 1288/*
65df57a8
TC
1289=for apidoc utf8_hop_forward
1290
1291Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1292forward.
1293
1294C<off> must be non-negative.
1295
1296C<s> must be before or equal to C<end>.
1297
1298When moving forward it will not move beyond C<end>.
1299
1300Will not exceed this limit even if the string is not valid "UTF-8".
1301
1302=cut
1303*/
1304
1305PERL_STATIC_INLINE U8 *
1306Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1307{
1308 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1309
1310 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1311 * the bitops (especially ~) can create illegal UTF-8.
1312 * In other words: in Perl UTF-8 is not just for Unicode. */
1313
1314 assert(s <= end);
1315 assert(off >= 0);
1316
1317 while (off--) {
1318 STRLEN skip = UTF8SKIP(s);
de979548 1319 if ((STRLEN)(end - s) <= skip) {
7347ee54 1320 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1321 return (U8 *)end;
7347ee54 1322 GCC_DIAG_RESTORE_STMT;
de979548 1323 }
65df57a8
TC
1324 s += skip;
1325 }
1326
7347ee54 1327 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1328 return (U8 *)s;
7347ee54 1329 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1330}
1331
1332/*
1333=for apidoc utf8_hop_back
1334
1335Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1336backward.
1337
1338C<off> must be non-positive.
1339
1340C<s> must be after or equal to C<start>.
1341
1342When moving backward it will not move before C<start>.
1343
1344Will not exceed this limit even if the string is not valid "UTF-8".
1345
1346=cut
1347*/
1348
1349PERL_STATIC_INLINE U8 *
1350Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1351{
1352 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1353
1354 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1355 * the bitops (especially ~) can create illegal UTF-8.
1356 * In other words: in Perl UTF-8 is not just for Unicode. */
1357
1358 assert(start <= s);
1359 assert(off <= 0);
1360
1361 while (off++ && s > start) {
1362 s--;
1363 while (UTF8_IS_CONTINUATION(*s) && s > start)
1364 s--;
1365 }
1366
7347ee54 1367 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1368 return (U8 *)s;
7347ee54 1369 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1370}
1371
1372/*
1373=for apidoc utf8_hop_safe
1374
1375Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1376either forward or backward.
1377
1378When moving backward it will not move before C<start>.
1379
1380When moving forward it will not move beyond C<end>.
1381
1382Will not exceed those limits even if the string is not valid "UTF-8".
1383
1384=cut
1385*/
1386
1387PERL_STATIC_INLINE U8 *
1388Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1389{
1390 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1391
1392 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1393 * the bitops (especially ~) can create illegal UTF-8.
1394 * In other words: in Perl UTF-8 is not just for Unicode. */
1395
1396 assert(start <= s && s <= end);
1397
1398 if (off >= 0) {
1399 return utf8_hop_forward(s, off, end);
1400 }
1401 else {
1402 return utf8_hop_back(s, off, start);
1403 }
1404}
1405
1406/*
4dab108f
KW
1407
1408=for apidoc is_utf8_valid_partial_char
1409
6cbb9248
KW
1410Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1411S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1412points. Otherwise, it returns 1 if there exists at least one non-empty
1413sequence of bytes that when appended to sequence C<s>, starting at position
1414C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1415otherwise returns 0.
1416
1417In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1418point.
1419
1420This is useful when a fixed-length buffer is being tested for being well-formed
1421UTF-8, but the final few bytes in it don't comprise a full character; that is,
1422it is split somewhere in the middle of the final code point's UTF-8
1423representation. (Presumably when the buffer is refreshed with the next chunk
1424of data, the new first bytes will complete the partial code point.) This
1425function is used to verify that the final bytes in the current buffer are in
1426fact the legal beginning of some code point, so that if they aren't, the
1427failure can be signalled without having to wait for the next read.
4dab108f
KW
1428
1429=cut
1430*/
2717076a
KW
1431#define is_utf8_valid_partial_char(s, e) \
1432 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1433
1434/*
1435
1436=for apidoc is_utf8_valid_partial_char_flags
1437
1438Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1439or not the input is a valid UTF-8 encoded partial character, but it takes an
1440extra parameter, C<flags>, which can further restrict which code points are
1441considered valid.
1442
1443If C<flags> is 0, this behaves identically to
1444C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1445of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1446there is any sequence of bytes that can complete the input partial character in
1447such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1448TRUE; otherwise FALSE. Non character code points cannot be determined based on
1449partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1450determined from just the first one or two bytes.
1451
1452=cut
1453 */
1454
56e4cf64 1455PERL_STATIC_INLINE bool
f1c999a7 1456S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1457{
f1c999a7 1458 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1459
f1c999a7 1460 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1461 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1462
8875bd48 1463 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1464 return FALSE;
1465 }
1466
f1c999a7 1467 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1468}
1469
8bc127bf
KW
1470/*
1471
1472=for apidoc is_utf8_fixed_width_buf_flags
1473
1474Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1475is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1476otherwise it returns FALSE.
1477
1478If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1479without restriction. If the final few bytes of the buffer do not form a
1480complete code point, this will return TRUE anyway, provided that
1481C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1482
1483If C<flags> in non-zero, it can be any combination of the
1484C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1485same meanings.
1486
1487This function differs from C<L</is_utf8_string_flags>> only in that the latter
1488returns FALSE if the final few bytes of the string don't form a complete code
1489point.
1490
1491=cut
1492 */
1493#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1494 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1495
1496/*
1497
1498=for apidoc is_utf8_fixed_width_buf_loc_flags
1499
1500Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1501failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1502to the beginning of any partial character at the end of the buffer; if there is
1503no partial character C<*ep> will contain C<s>+C<len>.
1504
1505See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1506
1507=cut
1508*/
1509
1510#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1511 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1512
1513/*
1514
1515=for apidoc is_utf8_fixed_width_buf_loclen_flags
1516
1517Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1518complete, valid characters found in the C<el> pointer.
1519
1520=cut
1521*/
1522
1523PERL_STATIC_INLINE bool
1524S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1525 STRLEN len,
8bc127bf
KW
1526 const U8 **ep,
1527 STRLEN *el,
1528 const U32 flags)
1529{
1530 const U8 * maybe_partial;
1531
1532 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1533
1534 if (! ep) {
1535 ep = &maybe_partial;
1536 }
1537
1538 /* If it's entirely valid, return that; otherwise see if the only error is
1539 * that the final few bytes are for a partial character */
1540 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1541 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1542}
1543
c8028aa6
TC
1544/* ------------------------------- perl.h ----------------------------- */
1545
1546/*
dcccc8ff
KW
1547=head1 Miscellaneous Functions
1548
41188aa0 1549=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1550
6602b933 1551Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1552If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1553
1554Return TRUE if the name is safe.
1555
796b6530 1556Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1557
1558=cut
1559*/
1560
1561PERL_STATIC_INLINE bool
41188aa0 1562S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1563 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1564 * perl itself uses xce*() functions which accept 8-bit strings.
1565 */
1566
1567 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1568
6c4650b3 1569 if (len > 1) {
c8028aa6 1570 char *null_at;
41188aa0 1571 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1572 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1573 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1574 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1575 what, op_name, pv, null_at+1);
c8028aa6
TC
1576 return FALSE;
1577 }
1578 }
1579
1580 return TRUE;
1581}
1582
1583/*
7cb3f959
TC
1584
1585Return true if the supplied filename has a newline character
fa6c7d00 1586immediately before the first (hopefully only) NUL.
7cb3f959
TC
1587
1588My original look at this incorrectly used the len from SvPV(), but
1589that's incorrect, since we allow for a NUL in pv[len-1].
1590
1591So instead, strlen() and work from there.
1592
1593This allow for the user reading a filename, forgetting to chomp it,
1594then calling:
1595
1596 open my $foo, "$file\0";
1597
1598*/
1599
1600#ifdef PERL_CORE
1601
1602PERL_STATIC_INLINE bool
1603S_should_warn_nl(const char *pv) {
1604 STRLEN len;
1605
1606 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1607
1608 len = strlen(pv);
1609
1610 return len > 0 && pv[len-1] == '\n';
1611}
1612
1613#endif
1614
81d52ecd
JH
1615/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1616
1617#define MAX_CHARSET_NAME_LENGTH 2
1618
1619PERL_STATIC_INLINE const char *
1620get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1621{
1622 /* Returns a string that corresponds to the name of the regex character set
1623 * given by 'flags', and *lenp is set the length of that string, which
1624 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1625
1626 *lenp = 1;
1627 switch (get_regex_charset(flags)) {
1628 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1629 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1630 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1631 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1632 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1633 *lenp = 2;
1634 return ASCII_MORE_RESTRICT_PAT_MODS;
1635 }
1636 /* The NOT_REACHED; hides an assert() which has a rather complex
1637 * definition in perl.h. */
1638 NOT_REACHED; /* NOTREACHED */
1639 return "?"; /* Unknown */
1640}
1641
7cb3f959 1642/*
ed382232
TC
1643
1644Return false if any get magic is on the SV other than taint magic.
1645
1646*/
1647
1648PERL_STATIC_INLINE bool
1649S_sv_only_taint_gmagic(SV *sv) {
1650 MAGIC *mg = SvMAGIC(sv);
1651
1652 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1653
1654 while (mg) {
1655 if (mg->mg_type != PERL_MAGIC_taint
1656 && !(mg->mg_flags & MGf_GSKIP)
1657 && mg->mg_virtual->svt_get) {
1658 return FALSE;
1659 }
1660 mg = mg->mg_moremagic;
1661 }
1662
1663 return TRUE;
1664}
1665
ed8ff0f3
DM
1666/* ------------------ cop.h ------------------------------------------- */
1667
1668
1669/* Enter a block. Push a new base context and return its address. */
1670
1671PERL_STATIC_INLINE PERL_CONTEXT *
1672S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1673{
1674 PERL_CONTEXT * cx;
1675
1676 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1677
1678 CXINC;
1679 cx = CX_CUR();
1680 cx->cx_type = type;
1681 cx->blk_gimme = gimme;
1682 cx->blk_oldsaveix = saveix;
4caf7d8c 1683 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1684 cx->blk_oldcop = PL_curcop;
4caf7d8c 1685 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1686 cx->blk_oldscopesp = PL_scopestack_ix;
1687 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1688 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1689
1690 PL_tmps_floor = PL_tmps_ix;
1691 CX_DEBUG(cx, "PUSH");
1692 return cx;
1693}
1694
1695
1696/* Exit a block (RETURN and LAST). */
1697
1698PERL_STATIC_INLINE void
1699S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1700{
1701 PERL_ARGS_ASSERT_CX_POPBLOCK;
1702
1703 CX_DEBUG(cx, "POP");
1704 /* these 3 are common to cx_popblock and cx_topblock */
1705 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1706 PL_scopestack_ix = cx->blk_oldscopesp;
1707 PL_curpm = cx->blk_oldpm;
1708
1709 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1710 * and leaves a CX entry lying around for repeated use, so
1711 * skip for multicall */ \
1712 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1713 || PL_savestack_ix == cx->blk_oldsaveix);
1714 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1715 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1716}
1717
1718/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1719 * Whereas cx_popblock() restores the state to the point just before
1720 * cx_pushblock() was called, cx_topblock() restores it to the point just
1721 * *after* cx_pushblock() was called. */
1722
1723PERL_STATIC_INLINE void
1724S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1725{
1726 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1727
1728 CX_DEBUG(cx, "TOP");
1729 /* these 3 are common to cx_popblock and cx_topblock */
1730 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1731 PL_scopestack_ix = cx->blk_oldscopesp;
1732 PL_curpm = cx->blk_oldpm;
1733
1734 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1735}
1736
1737
a73d8813
DM
1738PERL_STATIC_INLINE void
1739S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1740{
1741 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1742
1743 PERL_ARGS_ASSERT_CX_PUSHSUB;
1744
3f6bd23a 1745 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1746 cx->blk_sub.cv = cv;
1747 cx->blk_sub.olddepth = CvDEPTH(cv);
1748 cx->blk_sub.prevcomppad = PL_comppad;
1749 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1750 cx->blk_sub.retop = retop;
1751 SvREFCNT_inc_simple_void_NN(cv);
1752 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1753}
1754
1755
1756/* subsets of cx_popsub() */
1757
1758PERL_STATIC_INLINE void
1759S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1760{
1761 CV *cv;
1762
1763 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1764 assert(CxTYPE(cx) == CXt_SUB);
1765
1766 PL_comppad = cx->blk_sub.prevcomppad;
1767 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1768 cv = cx->blk_sub.cv;
1769 CvDEPTH(cv) = cx->blk_sub.olddepth;
1770 cx->blk_sub.cv = NULL;
1771 SvREFCNT_dec(cv);
1772}
1773
1774
1775/* handle the @_ part of leaving a sub */
1776
1777PERL_STATIC_INLINE void
1778S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1779{
1780 AV *av;
1781
1782 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1783 assert(CxTYPE(cx) == CXt_SUB);
1784 assert(AvARRAY(MUTABLE_AV(
1785 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1786 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1787
1788 CX_POP_SAVEARRAY(cx);
1789 av = MUTABLE_AV(PAD_SVl(0));
1790 if (UNLIKELY(AvREAL(av)))
1791 /* abandon @_ if it got reified */
1792 clear_defarray(av, 0);
1793 else {
1794 CLEAR_ARGARRAY(av);
1795 }
1796}
1797
1798
1799PERL_STATIC_INLINE void
1800S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1801{
1802 PERL_ARGS_ASSERT_CX_POPSUB;
1803 assert(CxTYPE(cx) == CXt_SUB);
1804
3f6bd23a 1805 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1806
1807 if (CxHASARGS(cx))
1808 cx_popsub_args(cx);
1809 cx_popsub_common(cx);
1810}
1811
1812
6a7d52cc
DM
1813PERL_STATIC_INLINE void
1814S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1815{
1816 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1817
1818 cx->blk_format.cv = cv;
1819 cx->blk_format.retop = retop;
1820 cx->blk_format.gv = gv;
1821 cx->blk_format.dfoutgv = PL_defoutgv;
1822 cx->blk_format.prevcomppad = PL_comppad;
1823 cx->blk_u16 = 0;
1824
1825 SvREFCNT_inc_simple_void_NN(cv);
1826 CvDEPTH(cv)++;
1827 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1828}
1829
1830
1831PERL_STATIC_INLINE void
1832S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1833{
1834 CV *cv;
1835 GV *dfout;
1836
1837 PERL_ARGS_ASSERT_CX_POPFORMAT;
1838 assert(CxTYPE(cx) == CXt_FORMAT);
1839
1840 dfout = cx->blk_format.dfoutgv;
1841 setdefout(dfout);
1842 cx->blk_format.dfoutgv = NULL;
1843 SvREFCNT_dec_NN(dfout);
1844
1845 PL_comppad = cx->blk_format.prevcomppad;
1846 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1847 cv = cx->blk_format.cv;
1848 cx->blk_format.cv = NULL;
1849 --CvDEPTH(cv);
1850 SvREFCNT_dec_NN(cv);
1851}
1852
1853
13febba5
DM
1854PERL_STATIC_INLINE void
1855S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1856{
1857 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1858
1859 cx->blk_eval.retop = retop;
1860 cx->blk_eval.old_namesv = namesv;
1861 cx->blk_eval.old_eval_root = PL_eval_root;
1862 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1863 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1864 cx->blk_eval.cur_top_env = PL_top_env;
1865
4c57ced5 1866 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1867 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1868 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1869}
1870
1871
1872PERL_STATIC_INLINE void
1873S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1874{
1875 SV *sv;
1876
1877 PERL_ARGS_ASSERT_CX_POPEVAL;
1878 assert(CxTYPE(cx) == CXt_EVAL);
1879
1880 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1881 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1882 PL_eval_root = cx->blk_eval.old_eval_root;
1883 sv = cx->blk_eval.cur_text;
4c57ced5 1884 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1885 cx->blk_eval.cur_text = NULL;
1886 SvREFCNT_dec_NN(sv);
1887 }
1888
1889 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1890 if (sv) {
1891 cx->blk_eval.old_namesv = NULL;
1892 SvREFCNT_dec_NN(sv);
1893 }
13febba5 1894}
6a7d52cc 1895
a73d8813 1896
d1b6bf72
DM
1897/* push a plain loop, i.e.
1898 * { block }
1899 * while (cond) { block }
1900 * for (init;cond;continue) { block }
1901 * This loop can be last/redo'ed etc.
1902 */
1903
1904PERL_STATIC_INLINE void
1905S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1906{
1907 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1908 cx->blk_loop.my_op = cLOOP;
1909}
1910
1911
1912/* push a true for loop, i.e.
1913 * for var (list) { block }
1914 */
1915
1916PERL_STATIC_INLINE void
1917S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1918{
1919 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1920
1921 /* this one line is common with cx_pushloop_plain */
1922 cx->blk_loop.my_op = cLOOP;
1923
1924 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1925 cx->blk_loop.itersave = itersave;
1926#ifdef USE_ITHREADS
1927 cx->blk_loop.oldcomppad = PL_comppad;
1928#endif
1929}
1930
1931
1932/* pop all loop types, including plain */
1933
1934PERL_STATIC_INLINE void
1935S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1936{
1937 PERL_ARGS_ASSERT_CX_POPLOOP;
1938
1939 assert(CxTYPE_is_LOOP(cx));
1940 if ( CxTYPE(cx) == CXt_LOOP_ARY
1941 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1942 {
1943 /* Free ary or cur. This assumes that state_u.ary.ary
1944 * aligns with state_u.lazysv.cur. See cx_dup() */
1945 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1946 cx->blk_loop.state_u.lazysv.cur = NULL;
1947 SvREFCNT_dec_NN(sv);
1948 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1949 sv = cx->blk_loop.state_u.lazysv.end;
1950 cx->blk_loop.state_u.lazysv.end = NULL;
1951 SvREFCNT_dec_NN(sv);
1952 }
1953 }
1954 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1955 SV *cursv;
1956 SV **svp = (cx)->blk_loop.itervar_u.svp;
1957 if ((cx->cx_type & CXp_FOR_GV))
1958 svp = &GvSV((GV*)svp);
1959 cursv = *svp;
1960 *svp = cx->blk_loop.itersave;
1961 cx->blk_loop.itersave = NULL;
1962 SvREFCNT_dec(cursv);
1963 }
1964}
1965
2a7b7c61
DM
1966
1967PERL_STATIC_INLINE void
7896dde7 1968S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1969{
7896dde7 1970 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1971
7896dde7 1972 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1973}
1974
1975
1976PERL_STATIC_INLINE void
7896dde7 1977S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1978{
7896dde7
Z
1979 PERL_ARGS_ASSERT_CX_POPWHEN;
1980 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1981
1982 PERL_UNUSED_ARG(cx);
59a14f30 1983 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1984 /* currently NOOP */
1985}
1986
1987
7896dde7
Z
1988PERL_STATIC_INLINE void
1989S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1990{
1991 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1992
1993 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1994 cx->blk_givwhen.defsv_save = orig_defsv;
1995}
1996
1997
1998PERL_STATIC_INLINE void
1999S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
2000{
2001 SV *sv;
2002
2003 PERL_ARGS_ASSERT_CX_POPGIVEN;
2004 assert(CxTYPE(cx) == CXt_GIVEN);
2005
2006 sv = GvSV(PL_defgv);
2007 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2008 cx->blk_givwhen.defsv_save = NULL;
2009 SvREFCNT_dec(sv);
2010}
2011
ec2c235b
KW
2012/* ------------------ util.h ------------------------------------------- */
2013
2014/*
2015=head1 Miscellaneous Functions
2016
2017=for apidoc foldEQ
2018
2019Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2020same
2021case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2022match themselves and their opposite case counterparts. Non-cased and non-ASCII
2023range bytes match only themselves.
2024
2025=cut
2026*/
2027
2028PERL_STATIC_INLINE I32
2029Perl_foldEQ(const char *s1, const char *s2, I32 len)
2030{
2031 const U8 *a = (const U8 *)s1;
2032 const U8 *b = (const U8 *)s2;
2033
2034 PERL_ARGS_ASSERT_FOLDEQ;
2035
2036 assert(len >= 0);
2037
2038 while (len--) {
2039 if (*a != *b && *a != PL_fold[*b])
2040 return 0;
2041 a++,b++;
2042 }
2043 return 1;
2044}
2045
0f9cb40c 2046PERL_STATIC_INLINE I32
ec2c235b
KW
2047Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2048{
2049 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2050 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2051 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2052 * does it check that the strings each have at least 'len' characters */
2053
2054 const U8 *a = (const U8 *)s1;
2055 const U8 *b = (const U8 *)s2;
2056
2057 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2058
2059 assert(len >= 0);
2060
2061 while (len--) {
2062 if (*a != *b && *a != PL_fold_latin1[*b]) {
2063 return 0;
2064 }
2065 a++, b++;
2066 }
2067 return 1;
2068}
2069
2070/*
2071=for apidoc foldEQ_locale
2072
2073Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2074same case-insensitively in the current locale; false otherwise.
2075
2076=cut
2077*/
2078
0f9cb40c 2079PERL_STATIC_INLINE I32
ec2c235b
KW
2080Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2081{
2082 dVAR;
2083 const U8 *a = (const U8 *)s1;
2084 const U8 *b = (const U8 *)s2;
2085
2086 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2087
2088 assert(len >= 0);
2089
2090 while (len--) {
2091 if (*a != *b && *a != PL_fold_locale[*b])
2092 return 0;
2093 a++,b++;
2094 }
2095 return 1;
2096}
2097
6dba01e2
KW
2098#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2099
2100PERL_STATIC_INLINE void *
2101S_my_memrchr(const char * s, const char c, const STRLEN len)
2102{
2103 /* memrchr(), since many platforms lack it */
2104
2105 const char * t = s + len - 1;
2106
2107 PERL_ARGS_ASSERT_MY_MEMRCHR;
2108
2109 while (t >= s) {
2110 if (*t == c) {
2111 return (void *) t;
2112 }
2113 t--;
2114 }
2115
2116 return NULL;
2117}
2118
2119#endif
2120
ed382232 2121/*
c8028aa6
TC
2122 * ex: set ts=8 sts=4 sw=4 et:
2123 */