This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
t/loc_tools.pl: Special case openbsd
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
4ab2fd9b 385/* This looks like 0x010101... */
2c5c8af5 386# define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
4ab2fd9b
KW
387
388/* This looks like 0x808080... */
2c5c8af5
KW
389# define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
390# define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
391# define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 392
099e59a4
KW
393/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
394 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
395 * optimized out completely on a 32-bit system, and its mask gets optimized out
396 * on a 64-bit system */
2c5c8af5 397# define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
5eabe374
KW
398 | ( PTR2nat(x) >> 1) \
399 | ( ( (PTR2nat(x) \
400 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4 401
3f515a2e
KW
402#ifndef EBCDIC
403
099e59a4
KW
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
2c5c8af5
KW
441# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
442 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
1d2af574
KW
443
444 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
445 assert(*ep >= s && *ep < send);
446
447 return FALSE;
448
2c5c8af5 449# else /* If weird byte order, drop into next loop to do byte-at-a-time
1d2af574
KW
450 checks. */
451
46bb68f6 452 break;
2c5c8af5 453# endif
46bb68f6 454 }
1d2af574 455
46bb68f6 456 x += PERL_WORDSIZE;
1d2af574 457
099e59a4 458 } while (x + PERL_WORDSIZE <= send);
b40579ff 459 }
e17544a6 460
0b08cab0 461#endif /* End of ! EBCDIC */
e17544a6
KW
462
463 /* Process per-byte */
464 while (x < send) {
465 if (! UTF8_IS_INVARIANT(*x)) {
466 if (ep) {
467 *ep = x;
468 }
0cbf5865 469
e17544a6 470 return FALSE;
0cbf5865 471 }
1e599354 472
e17544a6 473 x++;
1e599354
KW
474 }
475
476 return TRUE;
477}
478
23a7ee81
KW
479#ifndef EBCDIC
480
1d2af574
KW
481PERL_STATIC_INLINE unsigned int
482S__variant_byte_number(PERL_UINTMAX_T word)
483{
484
485 /* This returns the position in a word (0..7) of the first variant byte in
486 * it. This is a helper function. Note that there are no branches */
487
488 assert(word);
489
490 /* Get just the msb bits of each byte */
491 word &= PERL_VARIANTS_WORD_MASK;
492
597ee3f4
KW
493# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
494 easiest thing is to hide that from the callers */
495 {
496 unsigned int i;
497 const U8 * s = (U8 *) &word;
498 dTHX;
499
500 for (i = 0; i < sizeof(word); i++ ) {
501 if (s[i]) {
502 return i;
503 }
504 }
505
506 Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
507 __FILE__, __LINE__);
508 }
509
510# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
1d2af574
KW
511
512 /* Bytes are stored like
513 * Byte8 ... Byte2 Byte1
514 * 63..56...15...8 7...0
515 *
516 * Isolate the lsb;
517 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
518 *
519 * The word will look this this, with a rightmost set bit in position 's':
520 * ('x's are don't cares)
521 * s
522 * x..x100..0
523 * x..xx10..0 Right shift (rightmost 0 is shifted off)
524 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
525 * the 1 just to their left into a 0; the remainder is
526 * untouched
527 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
528 * bottom to all 1
529 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
530 *
531 * Another method is to do 'word &= -word'; but it generates a compiler
532 * message on some platforms about taking the negative of an unsigned */
533
534 word >>= 1;
535 word = 1 + (word ^ (word - 1));
536
537# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
538
539 /* Bytes are stored like
540 * Byte1 Byte2 ... Byte8
541 * 63..56 55..47 ... 7...0
542 *
543 * Isolate the msb; http://codeforces.com/blog/entry/10330
544 *
545 * Only the most significant set bit matters. Or'ing word with its right
546 * shift of 1 makes that bit and the next one to its right both 1. Then
547 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
548 * msb and all to the right being 1. */
549 word |= word >> 1;
550 word |= word >> 2;
551 word |= word >> 4;
552 word |= word >> 8;
553 word |= word >> 16;
554 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
555
556 /* Then subtracting the right shift by 1 clears all but the left-most of
557 * the 1 bits, which is our desired result */
558 word -= (word >> 1);
559
560# else
561# error Unexpected byte order
562# endif
563
7cf2d6c7
KW
564 /* Here 'word' has a single bit set: the msb of the first byte in which it
565 * is set. Calculate that position in the word. We can use this
1d2af574 566 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
67e12c5c
KW
567 * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
568 * just get shifted off at compile time) */
569 word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
570 | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
571 | (39 << 24) | (47 << 16)
572 | (55 << 8) | (63 << 0));
1d2af574
KW
573 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
574
575 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
576 word = ((word + 1) >> 3) - 1;
577
578# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
579
580 /* And invert the result */
581 word = CHARBITS - word - 1;
582
583# endif
584
585 return (unsigned int) word;
586}
587
23a7ee81 588#endif
03c1e4ab
KW
589#if defined(PERL_CORE) || defined(PERL_EXT)
590
591/*
592=for apidoc variant_under_utf8_count
593
594This function looks at the sequence of bytes between C<s> and C<e>, which are
595assumed to be encoded in ASCII/Latin1, and returns how many of them would
596change should the string be translated into UTF-8. Due to the nature of UTF-8,
597each of these would occupy two bytes instead of the single one in the input
598string. Thus, this function returns the precise number of bytes the string
599would expand by when translated to UTF-8.
600
601Unlike most of the other functions that have C<utf8> in their name, the input
602to this function is NOT a UTF-8-encoded string. The function name is slightly
603I<odd> to emphasize this.
604
605This function is internal to Perl because khw thinks that any XS code that
606would want this is probably operating too close to the internals. Presenting a
607valid use case could change that.
608
609See also
610C<L<perlapi/is_utf8_invariant_string>>
611and
612C<L<perlapi/is_utf8_invariant_string_loc>>,
613
614=cut
615
616*/
617
618PERL_STATIC_INLINE Size_t
619S_variant_under_utf8_count(const U8* const s, const U8* const e)
620{
621 const U8* x = s;
622 Size_t count = 0;
623
624 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
625
626# ifndef EBCDIC
627
5d0379de
KW
628 /* Test if the string is long enough to use word-at-a-time. (Logic is the
629 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
630 if ((STRLEN) (e - x) >= PERL_WORDSIZE
631 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
632 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
633 {
634
635 /* Process per-byte until reach word boundary. XXX This loop could be
636 * eliminated if we knew that this platform had fast unaligned reads */
637 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
638 count += ! UTF8_IS_INVARIANT(*x++);
639 }
640
641 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
642 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
643 explanation of how this works */
e5863284
KW
644 PERL_UINTMAX_T increment
645 = ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
03c1e4ab
KW
646 * PERL_COUNT_MULTIPLIER)
647 >> ((PERL_WORDSIZE - 1) * CHARBITS);
e5863284 648 count += (Size_t) increment;
03c1e4ab
KW
649 x += PERL_WORDSIZE;
650 } while (x + PERL_WORDSIZE <= e);
651 }
652
653# endif
654
655 /* Process per-byte */
656 while (x < e) {
657 if (! UTF8_IS_INVARIANT(*x)) {
658 count++;
659 }
660
661 x++;
662 }
663
664 return count;
665}
666
667#endif
668
aff4cafe
KW
669#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
670# undef PERL_WORDSIZE
671# undef PERL_COUNT_MULTIPLIER
672# undef PERL_WORD_BOUNDARY_MASK
673# undef PERL_VARIANTS_WORD_MASK
674#endif
03c1e4ab 675
7c93d8f0 676/*
5ff889fb
KW
677=for apidoc is_utf8_string
678
82c5d941
KW
679Returns TRUE if the first C<len> bytes of string C<s> form a valid
680Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
681be calculated using C<strlen(s)> (which means if you use this option, that C<s>
682can't have embedded C<NUL> characters and has to have a terminating C<NUL>
683byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
684
2717076a
KW
685This function considers Perl's extended UTF-8 to be valid. That means that
686code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
687considered valid by this function. Use C<L</is_strict_utf8_string>>,
688C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
689code points are considered valid.
5ff889fb 690
9f2abfde
KW
691See also
692C<L</is_utf8_invariant_string>>,
0cbf5865 693C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
694C<L</is_utf8_string_loc>>,
695C<L</is_utf8_string_loclen>>,
8bc127bf
KW
696C<L</is_utf8_fixed_width_buf_flags>>,
697C<L</is_utf8_fixed_width_buf_loc_flags>>,
698C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
699
700=cut
701*/
702
dd237e82 703#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 704
c9cd936b
KW
705#if defined(PERL_CORE) || defined (PERL_EXT)
706
707/*
708=for apidoc is_utf8_non_invariant_string
709
710Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
711C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
712UTF-8; otherwise returns FALSE.
713
714A TRUE return means that at least one code point represented by the sequence
715either is a wide character not representable as a single byte, or the
716representation differs depending on whether the sequence is encoded in UTF-8 or
717not.
718
719See also
720C<L<perlapi/is_utf8_invariant_string>>,
721C<L<perlapi/is_utf8_string>>
722
723=cut
724
725This is commonly used to determine if a SV's UTF-8 flag should be turned on.
b3b93dfe
KW
726It generally needn't be if its string is entirely UTF-8 invariant, and it
727shouldn't be if it otherwise contains invalid UTF-8.
c9cd936b
KW
728
729It is an internal function because khw thinks that XS code shouldn't be working
730at this low a level. A valid use case could change that.
731
732*/
733
734PERL_STATIC_INLINE bool
735S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
736{
737 const U8 * first_variant;
738
739 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
740
741 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
742 return FALSE;
743 }
744
745 return is_utf8_string(first_variant, len - (first_variant - s));
746}
747
748#endif
749
5ff889fb 750/*
9f2abfde
KW
751=for apidoc is_strict_utf8_string
752
753Returns TRUE if the first C<len> bytes of string C<s> form a valid
754UTF-8-encoded string that is fully interchangeable by any application using
755Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
756calculated using C<strlen(s)> (which means if you use this option, that C<s>
757can't have embedded C<NUL> characters and has to have a terminating C<NUL>
758byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
759
760This function returns FALSE for strings containing any
761code points above the Unicode max of 0x10FFFF, surrogate code points, or
762non-character code points.
763
764See also
765C<L</is_utf8_invariant_string>>,
0cbf5865 766C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
767C<L</is_utf8_string>>,
768C<L</is_utf8_string_flags>>,
769C<L</is_utf8_string_loc>>,
770C<L</is_utf8_string_loc_flags>>,
771C<L</is_utf8_string_loclen>>,
772C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
773C<L</is_utf8_fixed_width_buf_flags>>,
774C<L</is_utf8_fixed_width_buf_loc_flags>>,
775C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
776C<L</is_strict_utf8_string_loc>>,
777C<L</is_strict_utf8_string_loclen>>,
778C<L</is_c9strict_utf8_string>>,
779C<L</is_c9strict_utf8_string_loc>>,
780and
781C<L</is_c9strict_utf8_string_loclen>>.
782
783=cut
784*/
785
dd237e82 786#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
787
788/*
789=for apidoc is_c9strict_utf8_string
790
791Returns TRUE if the first C<len> bytes of string C<s> form a valid
792UTF-8-encoded string that conforms to
793L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
794otherwise it returns FALSE. If C<len> is 0, it will be calculated using
795C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
796C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
797characters being ASCII constitute 'a valid UTF-8 string'.
798
799This function returns FALSE for strings containing any code points above the
800Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
801code points per
802L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
803
804See also
805C<L</is_utf8_invariant_string>>,
0cbf5865 806C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
807C<L</is_utf8_string>>,
808C<L</is_utf8_string_flags>>,
809C<L</is_utf8_string_loc>>,
810C<L</is_utf8_string_loc_flags>>,
811C<L</is_utf8_string_loclen>>,
812C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
813C<L</is_utf8_fixed_width_buf_flags>>,
814C<L</is_utf8_fixed_width_buf_loc_flags>>,
815C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
816C<L</is_strict_utf8_string>>,
817C<L</is_strict_utf8_string_loc>>,
818C<L</is_strict_utf8_string_loclen>>,
819C<L</is_c9strict_utf8_string_loc>>,
820and
821C<L</is_c9strict_utf8_string_loclen>>.
822
823=cut
824*/
825
dd237e82 826#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
827
828/*
829=for apidoc is_utf8_string_flags
830
831Returns TRUE if the first C<len> bytes of string C<s> form a valid
832UTF-8 string, subject to the restrictions imposed by C<flags>;
833returns FALSE otherwise. If C<len> is 0, it will be calculated
834using C<strlen(s)> (which means if you use this option, that C<s> can't have
835embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
836that all characters being ASCII constitute 'a valid UTF-8 string'.
837
838If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
839C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
840as C<L</is_strict_utf8_string>>; and if C<flags> is
841C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
842C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
843combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
844C<L</utf8n_to_uvchr>>, with the same meanings.
845
846See also
847C<L</is_utf8_invariant_string>>,
0cbf5865 848C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
849C<L</is_utf8_string>>,
850C<L</is_utf8_string_loc>>,
851C<L</is_utf8_string_loc_flags>>,
852C<L</is_utf8_string_loclen>>,
853C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
854C<L</is_utf8_fixed_width_buf_flags>>,
855C<L</is_utf8_fixed_width_buf_loc_flags>>,
856C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
857C<L</is_strict_utf8_string>>,
858C<L</is_strict_utf8_string_loc>>,
859C<L</is_strict_utf8_string_loclen>>,
860C<L</is_c9strict_utf8_string>>,
861C<L</is_c9strict_utf8_string_loc>>,
862and
863C<L</is_c9strict_utf8_string_loclen>>.
864
865=cut
866*/
867
868PERL_STATIC_INLINE bool
f60f61fd 869S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 870{
33756530 871 const U8 * first_variant;
9f2abfde
KW
872
873 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
874 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 875 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 876
f60f61fd
KW
877 if (len == 0) {
878 len = strlen((const char *)s);
879 }
880
9f2abfde
KW
881 if (flags == 0) {
882 return is_utf8_string(s, len);
883 }
884
d044b7a7 885 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
886 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
887 {
888 return is_strict_utf8_string(s, len);
889 }
890
d044b7a7 891 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
892 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
893 {
894 return is_c9strict_utf8_string(s, len);
895 }
896
33756530
KW
897 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
898 const U8* const send = s + len;
899 const U8* x = first_variant;
900
a0d7f935
KW
901 while (x < send) {
902 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
903 if (UNLIKELY(! cur_len)) {
904 return FALSE;
905 }
906 x += cur_len;
9f2abfde 907 }
33756530 908 }
9f2abfde
KW
909
910 return TRUE;
911}
912
913/*
5ff889fb
KW
914
915=for apidoc is_utf8_string_loc
916
2717076a 917Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 918case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 919"utf8ness success") in the C<ep> pointer.
5ff889fb 920
2717076a 921See also C<L</is_utf8_string_loclen>>.
5ff889fb 922
3964c812
KW
923=cut
924*/
925
926#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
927
928/*
929
5ff889fb
KW
930=for apidoc is_utf8_string_loclen
931
2717076a 932Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 933case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 934"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 935encoded characters in the C<el> pointer.
5ff889fb 936
2717076a 937See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
938
939=cut
940*/
941
56e4cf64 942PERL_STATIC_INLINE bool
33756530 943Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 944{
33756530 945 const U8 * first_variant;
5ff889fb
KW
946
947 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
948
33756530
KW
949 if (len == 0) {
950 len = strlen((const char *) s);
951 }
952
953 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
954 if (el)
955 *el = len;
956
957 if (ep) {
958 *ep = s + len;
959 }
960
961 return TRUE;
962 }
963
964 {
965 const U8* const send = s + len;
966 const U8* x = first_variant;
967 STRLEN outlen = first_variant - s;
968
a0d7f935
KW
969 while (x < send) {
970 const STRLEN cur_len = isUTF8_CHAR(x, send);
971 if (UNLIKELY(! cur_len)) {
972 break;
973 }
974 x += cur_len;
975 outlen++;
5ff889fb 976 }
5ff889fb 977
a0d7f935
KW
978 if (el)
979 *el = outlen;
5ff889fb 980
a0d7f935
KW
981 if (ep) {
982 *ep = x;
983 }
5ff889fb 984
a0d7f935 985 return (x == send);
33756530 986 }
5ff889fb
KW
987}
988
989/*
9f2abfde
KW
990
991=for apidoc is_strict_utf8_string_loc
992
993Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
994case of "utf8ness failure") or the location C<s>+C<len> (in the case of
995"utf8ness success") in the C<ep> pointer.
996
997See also C<L</is_strict_utf8_string_loclen>>.
998
999=cut
1000*/
1001
1002#define is_strict_utf8_string_loc(s, len, ep) \
1003 is_strict_utf8_string_loclen(s, len, ep, 0)
1004
1005/*
1006
1007=for apidoc is_strict_utf8_string_loclen
1008
1009Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
1010case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1011"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1012encoded characters in the C<el> pointer.
1013
1014See also C<L</is_strict_utf8_string_loc>>.
1015
1016=cut
1017*/
1018
1019PERL_STATIC_INLINE bool
33756530 1020S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1021{
33756530 1022 const U8 * first_variant;
9f2abfde
KW
1023
1024 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1025
33756530
KW
1026 if (len == 0) {
1027 len = strlen((const char *) s);
1028 }
1029
1030 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1031 if (el)
1032 *el = len;
1033
1034 if (ep) {
1035 *ep = s + len;
1036 }
1037
1038 return TRUE;
1039 }
1040
1041 {
1042 const U8* const send = s + len;
1043 const U8* x = first_variant;
1044 STRLEN outlen = first_variant - s;
1045
a0d7f935
KW
1046 while (x < send) {
1047 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1048 if (UNLIKELY(! cur_len)) {
1049 break;
1050 }
1051 x += cur_len;
1052 outlen++;
9f2abfde 1053 }
9f2abfde 1054
a0d7f935
KW
1055 if (el)
1056 *el = outlen;
9f2abfde 1057
a0d7f935
KW
1058 if (ep) {
1059 *ep = x;
1060 }
9f2abfde 1061
a0d7f935 1062 return (x == send);
33756530 1063 }
9f2abfde
KW
1064}
1065
1066/*
1067
1068=for apidoc is_c9strict_utf8_string_loc
1069
1070Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1071the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1072"utf8ness success") in the C<ep> pointer.
1073
1074See also C<L</is_c9strict_utf8_string_loclen>>.
1075
1076=cut
1077*/
1078
1079#define is_c9strict_utf8_string_loc(s, len, ep) \
1080 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1081
1082/*
1083
1084=for apidoc is_c9strict_utf8_string_loclen
1085
1086Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1087the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1088"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1089characters in the C<el> pointer.
1090
1091See also C<L</is_c9strict_utf8_string_loc>>.
1092
1093=cut
1094*/
1095
1096PERL_STATIC_INLINE bool
33756530 1097S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1098{
33756530 1099 const U8 * first_variant;
9f2abfde
KW
1100
1101 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1102
33756530
KW
1103 if (len == 0) {
1104 len = strlen((const char *) s);
1105 }
1106
1107 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1108 if (el)
1109 *el = len;
1110
1111 if (ep) {
1112 *ep = s + len;
1113 }
1114
1115 return TRUE;
1116 }
1117
1118 {
1119 const U8* const send = s + len;
1120 const U8* x = first_variant;
1121 STRLEN outlen = first_variant - s;
1122
a0d7f935
KW
1123 while (x < send) {
1124 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1125 if (UNLIKELY(! cur_len)) {
1126 break;
1127 }
1128 x += cur_len;
1129 outlen++;
9f2abfde 1130 }
9f2abfde 1131
a0d7f935
KW
1132 if (el)
1133 *el = outlen;
9f2abfde 1134
a0d7f935
KW
1135 if (ep) {
1136 *ep = x;
1137 }
9f2abfde 1138
a0d7f935 1139 return (x == send);
33756530 1140 }
9f2abfde
KW
1141}
1142
1143/*
1144
1145=for apidoc is_utf8_string_loc_flags
1146
1147Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1148case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1149"utf8ness success") in the C<ep> pointer.
1150
1151See also C<L</is_utf8_string_loclen_flags>>.
1152
1153=cut
1154*/
1155
1156#define is_utf8_string_loc_flags(s, len, ep, flags) \
1157 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1158
1159
1160/* The above 3 actual functions could have been moved into the more general one
1161 * just below, and made #defines that call it with the right 'flags'. They are
1162 * currently kept separate to increase their chances of getting inlined */
1163
1164/*
1165
1166=for apidoc is_utf8_string_loclen_flags
1167
1168Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1169case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1170"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1171encoded characters in the C<el> pointer.
1172
1173See also C<L</is_utf8_string_loc_flags>>.
1174
1175=cut
1176*/
1177
1178PERL_STATIC_INLINE bool
f60f61fd 1179S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1180{
33756530 1181 const U8 * first_variant;
9f2abfde
KW
1182
1183 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1184 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1185 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1186
f60f61fd 1187 if (len == 0) {
a0d7f935 1188 len = strlen((const char *) s);
f60f61fd
KW
1189 }
1190
9f2abfde
KW
1191 if (flags == 0) {
1192 return is_utf8_string_loclen(s, len, ep, el);
1193 }
1194
d044b7a7 1195 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1196 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1197 {
1198 return is_strict_utf8_string_loclen(s, len, ep, el);
1199 }
1200
d044b7a7 1201 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1202 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1203 {
1204 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1205 }
1206
33756530
KW
1207 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1208 if (el)
1209 *el = len;
1210
1211 if (ep) {
1212 *ep = s + len;
1213 }
1214
1215 return TRUE;
1216 }
1217
1218 {
1219 const U8* send = s + len;
1220 const U8* x = first_variant;
1221 STRLEN outlen = first_variant - s;
1222
a0d7f935
KW
1223 while (x < send) {
1224 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1225 if (UNLIKELY(! cur_len)) {
1226 break;
1227 }
1228 x += cur_len;
1229 outlen++;
9f2abfde 1230 }
9f2abfde 1231
a0d7f935
KW
1232 if (el)
1233 *el = outlen;
9f2abfde 1234
a0d7f935
KW
1235 if (ep) {
1236 *ep = x;
1237 }
9f2abfde 1238
a0d7f935 1239 return (x == send);
33756530 1240 }
9f2abfde
KW
1241}
1242
1243/*
7c93d8f0
KW
1244=for apidoc utf8_distance
1245
1246Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1247and C<b>.
1248
1249WARNING: use only if you *know* that the pointers point inside the
1250same UTF-8 buffer.
1251
1252=cut
1253*/
1254
1255PERL_STATIC_INLINE IV
1256Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1257{
1258 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1259
1260 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1261}
1262
1263/*
1264=for apidoc utf8_hop
1265
1266Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1267forward or backward.
1268
1269WARNING: do not use the following unless you *know* C<off> is within
1270the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1271on the first byte of character or just after the last byte of a character.
1272
1273=cut
1274*/
1275
1276PERL_STATIC_INLINE U8 *
1277Perl_utf8_hop(const U8 *s, SSize_t off)
1278{
1279 PERL_ARGS_ASSERT_UTF8_HOP;
1280
1281 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1282 * the bitops (especially ~) can create illegal UTF-8.
1283 * In other words: in Perl UTF-8 is not just for Unicode. */
1284
1285 if (off >= 0) {
1286 while (off--)
1287 s += UTF8SKIP(s);
1288 }
1289 else {
1290 while (off++) {
1291 s--;
1292 while (UTF8_IS_CONTINUATION(*s))
1293 s--;
1294 }
1295 }
7347ee54 1296 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1297 return (U8 *)s;
7347ee54 1298 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1299}
1300
4dab108f 1301/*
65df57a8
TC
1302=for apidoc utf8_hop_forward
1303
1304Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1305forward.
1306
1307C<off> must be non-negative.
1308
1309C<s> must be before or equal to C<end>.
1310
1311When moving forward it will not move beyond C<end>.
1312
1313Will not exceed this limit even if the string is not valid "UTF-8".
1314
1315=cut
1316*/
1317
1318PERL_STATIC_INLINE U8 *
1319Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1320{
1321 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1322
1323 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1324 * the bitops (especially ~) can create illegal UTF-8.
1325 * In other words: in Perl UTF-8 is not just for Unicode. */
1326
1327 assert(s <= end);
1328 assert(off >= 0);
1329
1330 while (off--) {
1331 STRLEN skip = UTF8SKIP(s);
de979548 1332 if ((STRLEN)(end - s) <= skip) {
7347ee54 1333 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1334 return (U8 *)end;
7347ee54 1335 GCC_DIAG_RESTORE_STMT;
de979548 1336 }
65df57a8
TC
1337 s += skip;
1338 }
1339
7347ee54 1340 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1341 return (U8 *)s;
7347ee54 1342 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1343}
1344
1345/*
1346=for apidoc utf8_hop_back
1347
1348Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1349backward.
1350
1351C<off> must be non-positive.
1352
1353C<s> must be after or equal to C<start>.
1354
1355When moving backward it will not move before C<start>.
1356
1357Will not exceed this limit even if the string is not valid "UTF-8".
1358
1359=cut
1360*/
1361
1362PERL_STATIC_INLINE U8 *
1363Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1364{
1365 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1366
1367 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1368 * the bitops (especially ~) can create illegal UTF-8.
1369 * In other words: in Perl UTF-8 is not just for Unicode. */
1370
1371 assert(start <= s);
1372 assert(off <= 0);
1373
1374 while (off++ && s > start) {
1375 s--;
1376 while (UTF8_IS_CONTINUATION(*s) && s > start)
1377 s--;
1378 }
1379
7347ee54 1380 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1381 return (U8 *)s;
7347ee54 1382 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1383}
1384
1385/*
1386=for apidoc utf8_hop_safe
1387
1388Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1389either forward or backward.
1390
1391When moving backward it will not move before C<start>.
1392
1393When moving forward it will not move beyond C<end>.
1394
1395Will not exceed those limits even if the string is not valid "UTF-8".
1396
1397=cut
1398*/
1399
1400PERL_STATIC_INLINE U8 *
1401Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1402{
1403 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1404
1405 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1406 * the bitops (especially ~) can create illegal UTF-8.
1407 * In other words: in Perl UTF-8 is not just for Unicode. */
1408
1409 assert(start <= s && s <= end);
1410
1411 if (off >= 0) {
1412 return utf8_hop_forward(s, off, end);
1413 }
1414 else {
1415 return utf8_hop_back(s, off, start);
1416 }
1417}
1418
1419/*
4dab108f
KW
1420
1421=for apidoc is_utf8_valid_partial_char
1422
6cbb9248
KW
1423Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1424S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1425points. Otherwise, it returns 1 if there exists at least one non-empty
1426sequence of bytes that when appended to sequence C<s>, starting at position
1427C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1428otherwise returns 0.
1429
1430In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1431point.
1432
1433This is useful when a fixed-length buffer is being tested for being well-formed
1434UTF-8, but the final few bytes in it don't comprise a full character; that is,
1435it is split somewhere in the middle of the final code point's UTF-8
1436representation. (Presumably when the buffer is refreshed with the next chunk
1437of data, the new first bytes will complete the partial code point.) This
1438function is used to verify that the final bytes in the current buffer are in
1439fact the legal beginning of some code point, so that if they aren't, the
1440failure can be signalled without having to wait for the next read.
4dab108f
KW
1441
1442=cut
1443*/
2717076a
KW
1444#define is_utf8_valid_partial_char(s, e) \
1445 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1446
1447/*
1448
1449=for apidoc is_utf8_valid_partial_char_flags
1450
1451Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1452or not the input is a valid UTF-8 encoded partial character, but it takes an
1453extra parameter, C<flags>, which can further restrict which code points are
1454considered valid.
1455
1456If C<flags> is 0, this behaves identically to
1457C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1458of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1459there is any sequence of bytes that can complete the input partial character in
1460such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1461TRUE; otherwise FALSE. Non character code points cannot be determined based on
1462partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1463determined from just the first one or two bytes.
1464
1465=cut
1466 */
1467
56e4cf64 1468PERL_STATIC_INLINE bool
f1c999a7 1469S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1470{
f1c999a7 1471 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1472
f1c999a7 1473 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1474 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1475
8875bd48 1476 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1477 return FALSE;
1478 }
1479
f1c999a7 1480 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1481}
1482
8bc127bf
KW
1483/*
1484
1485=for apidoc is_utf8_fixed_width_buf_flags
1486
1487Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1488is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1489otherwise it returns FALSE.
1490
1491If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1492without restriction. If the final few bytes of the buffer do not form a
1493complete code point, this will return TRUE anyway, provided that
1494C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1495
1496If C<flags> in non-zero, it can be any combination of the
1497C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1498same meanings.
1499
1500This function differs from C<L</is_utf8_string_flags>> only in that the latter
1501returns FALSE if the final few bytes of the string don't form a complete code
1502point.
1503
1504=cut
1505 */
1506#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1507 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1508
1509/*
1510
1511=for apidoc is_utf8_fixed_width_buf_loc_flags
1512
1513Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1514failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1515to the beginning of any partial character at the end of the buffer; if there is
1516no partial character C<*ep> will contain C<s>+C<len>.
1517
1518See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1519
1520=cut
1521*/
1522
1523#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1524 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1525
1526/*
1527
1528=for apidoc is_utf8_fixed_width_buf_loclen_flags
1529
1530Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1531complete, valid characters found in the C<el> pointer.
1532
1533=cut
1534*/
1535
1536PERL_STATIC_INLINE bool
1537S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1538 STRLEN len,
8bc127bf
KW
1539 const U8 **ep,
1540 STRLEN *el,
1541 const U32 flags)
1542{
1543 const U8 * maybe_partial;
1544
1545 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1546
1547 if (! ep) {
1548 ep = &maybe_partial;
1549 }
1550
1551 /* If it's entirely valid, return that; otherwise see if the only error is
1552 * that the final few bytes are for a partial character */
1553 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1554 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1555}
1556
c8028aa6
TC
1557/* ------------------------------- perl.h ----------------------------- */
1558
1559/*
dcccc8ff
KW
1560=head1 Miscellaneous Functions
1561
41188aa0 1562=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1563
6602b933 1564Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1565If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1566
1567Return TRUE if the name is safe.
1568
796b6530 1569Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1570
1571=cut
1572*/
1573
1574PERL_STATIC_INLINE bool
41188aa0 1575S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1576 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1577 * perl itself uses xce*() functions which accept 8-bit strings.
1578 */
1579
1580 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1581
6c4650b3 1582 if (len > 1) {
c8028aa6 1583 char *null_at;
41188aa0 1584 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1585 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1586 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1587 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1588 what, op_name, pv, null_at+1);
c8028aa6
TC
1589 return FALSE;
1590 }
1591 }
1592
1593 return TRUE;
1594}
1595
1596/*
7cb3f959
TC
1597
1598Return true if the supplied filename has a newline character
fa6c7d00 1599immediately before the first (hopefully only) NUL.
7cb3f959
TC
1600
1601My original look at this incorrectly used the len from SvPV(), but
1602that's incorrect, since we allow for a NUL in pv[len-1].
1603
1604So instead, strlen() and work from there.
1605
1606This allow for the user reading a filename, forgetting to chomp it,
1607then calling:
1608
1609 open my $foo, "$file\0";
1610
1611*/
1612
1613#ifdef PERL_CORE
1614
1615PERL_STATIC_INLINE bool
1616S_should_warn_nl(const char *pv) {
1617 STRLEN len;
1618
1619 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1620
1621 len = strlen(pv);
1622
1623 return len > 0 && pv[len-1] == '\n';
1624}
1625
1626#endif
1627
81d52ecd
JH
1628/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1629
1630#define MAX_CHARSET_NAME_LENGTH 2
1631
1632PERL_STATIC_INLINE const char *
1633get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1634{
1635 /* Returns a string that corresponds to the name of the regex character set
1636 * given by 'flags', and *lenp is set the length of that string, which
1637 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1638
1639 *lenp = 1;
1640 switch (get_regex_charset(flags)) {
1641 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1642 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1643 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1644 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1645 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1646 *lenp = 2;
1647 return ASCII_MORE_RESTRICT_PAT_MODS;
1648 }
1649 /* The NOT_REACHED; hides an assert() which has a rather complex
1650 * definition in perl.h. */
1651 NOT_REACHED; /* NOTREACHED */
1652 return "?"; /* Unknown */
1653}
1654
7cb3f959 1655/*
ed382232
TC
1656
1657Return false if any get magic is on the SV other than taint magic.
1658
1659*/
1660
1661PERL_STATIC_INLINE bool
1662S_sv_only_taint_gmagic(SV *sv) {
1663 MAGIC *mg = SvMAGIC(sv);
1664
1665 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1666
1667 while (mg) {
1668 if (mg->mg_type != PERL_MAGIC_taint
1669 && !(mg->mg_flags & MGf_GSKIP)
1670 && mg->mg_virtual->svt_get) {
1671 return FALSE;
1672 }
1673 mg = mg->mg_moremagic;
1674 }
1675
1676 return TRUE;
1677}
1678
ed8ff0f3
DM
1679/* ------------------ cop.h ------------------------------------------- */
1680
1681
1682/* Enter a block. Push a new base context and return its address. */
1683
1684PERL_STATIC_INLINE PERL_CONTEXT *
1685S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1686{
1687 PERL_CONTEXT * cx;
1688
1689 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1690
1691 CXINC;
1692 cx = CX_CUR();
1693 cx->cx_type = type;
1694 cx->blk_gimme = gimme;
1695 cx->blk_oldsaveix = saveix;
4caf7d8c 1696 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1697 cx->blk_oldcop = PL_curcop;
4caf7d8c 1698 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1699 cx->blk_oldscopesp = PL_scopestack_ix;
1700 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1701 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1702
1703 PL_tmps_floor = PL_tmps_ix;
1704 CX_DEBUG(cx, "PUSH");
1705 return cx;
1706}
1707
1708
1709/* Exit a block (RETURN and LAST). */
1710
1711PERL_STATIC_INLINE void
1712S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1713{
1714 PERL_ARGS_ASSERT_CX_POPBLOCK;
1715
1716 CX_DEBUG(cx, "POP");
1717 /* these 3 are common to cx_popblock and cx_topblock */
1718 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1719 PL_scopestack_ix = cx->blk_oldscopesp;
1720 PL_curpm = cx->blk_oldpm;
1721
1722 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1723 * and leaves a CX entry lying around for repeated use, so
1724 * skip for multicall */ \
1725 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1726 || PL_savestack_ix == cx->blk_oldsaveix);
1727 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1728 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1729}
1730
1731/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1732 * Whereas cx_popblock() restores the state to the point just before
1733 * cx_pushblock() was called, cx_topblock() restores it to the point just
1734 * *after* cx_pushblock() was called. */
1735
1736PERL_STATIC_INLINE void
1737S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1738{
1739 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1740
1741 CX_DEBUG(cx, "TOP");
1742 /* these 3 are common to cx_popblock and cx_topblock */
1743 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1744 PL_scopestack_ix = cx->blk_oldscopesp;
1745 PL_curpm = cx->blk_oldpm;
1746
1747 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1748}
1749
1750
a73d8813
DM
1751PERL_STATIC_INLINE void
1752S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1753{
1754 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1755
1756 PERL_ARGS_ASSERT_CX_PUSHSUB;
1757
3f6bd23a 1758 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1759 cx->blk_sub.cv = cv;
1760 cx->blk_sub.olddepth = CvDEPTH(cv);
1761 cx->blk_sub.prevcomppad = PL_comppad;
1762 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1763 cx->blk_sub.retop = retop;
1764 SvREFCNT_inc_simple_void_NN(cv);
1765 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1766}
1767
1768
1769/* subsets of cx_popsub() */
1770
1771PERL_STATIC_INLINE void
1772S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1773{
1774 CV *cv;
1775
1776 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1777 assert(CxTYPE(cx) == CXt_SUB);
1778
1779 PL_comppad = cx->blk_sub.prevcomppad;
1780 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1781 cv = cx->blk_sub.cv;
1782 CvDEPTH(cv) = cx->blk_sub.olddepth;
1783 cx->blk_sub.cv = NULL;
1784 SvREFCNT_dec(cv);
1785}
1786
1787
1788/* handle the @_ part of leaving a sub */
1789
1790PERL_STATIC_INLINE void
1791S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1792{
1793 AV *av;
1794
1795 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1796 assert(CxTYPE(cx) == CXt_SUB);
1797 assert(AvARRAY(MUTABLE_AV(
1798 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1799 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1800
1801 CX_POP_SAVEARRAY(cx);
1802 av = MUTABLE_AV(PAD_SVl(0));
1803 if (UNLIKELY(AvREAL(av)))
1804 /* abandon @_ if it got reified */
1805 clear_defarray(av, 0);
1806 else {
1807 CLEAR_ARGARRAY(av);
1808 }
1809}
1810
1811
1812PERL_STATIC_INLINE void
1813S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1814{
1815 PERL_ARGS_ASSERT_CX_POPSUB;
1816 assert(CxTYPE(cx) == CXt_SUB);
1817
3f6bd23a 1818 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1819
1820 if (CxHASARGS(cx))
1821 cx_popsub_args(cx);
1822 cx_popsub_common(cx);
1823}
1824
1825
6a7d52cc
DM
1826PERL_STATIC_INLINE void
1827S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1828{
1829 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1830
1831 cx->blk_format.cv = cv;
1832 cx->blk_format.retop = retop;
1833 cx->blk_format.gv = gv;
1834 cx->blk_format.dfoutgv = PL_defoutgv;
1835 cx->blk_format.prevcomppad = PL_comppad;
1836 cx->blk_u16 = 0;
1837
1838 SvREFCNT_inc_simple_void_NN(cv);
1839 CvDEPTH(cv)++;
1840 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1841}
1842
1843
1844PERL_STATIC_INLINE void
1845S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1846{
1847 CV *cv;
1848 GV *dfout;
1849
1850 PERL_ARGS_ASSERT_CX_POPFORMAT;
1851 assert(CxTYPE(cx) == CXt_FORMAT);
1852
1853 dfout = cx->blk_format.dfoutgv;
1854 setdefout(dfout);
1855 cx->blk_format.dfoutgv = NULL;
1856 SvREFCNT_dec_NN(dfout);
1857
1858 PL_comppad = cx->blk_format.prevcomppad;
1859 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1860 cv = cx->blk_format.cv;
1861 cx->blk_format.cv = NULL;
1862 --CvDEPTH(cv);
1863 SvREFCNT_dec_NN(cv);
1864}
1865
1866
13febba5
DM
1867PERL_STATIC_INLINE void
1868S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1869{
1870 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1871
1872 cx->blk_eval.retop = retop;
1873 cx->blk_eval.old_namesv = namesv;
1874 cx->blk_eval.old_eval_root = PL_eval_root;
1875 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1876 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1877 cx->blk_eval.cur_top_env = PL_top_env;
1878
4c57ced5 1879 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1880 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1881 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1882}
1883
1884
1885PERL_STATIC_INLINE void
1886S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1887{
1888 SV *sv;
1889
1890 PERL_ARGS_ASSERT_CX_POPEVAL;
1891 assert(CxTYPE(cx) == CXt_EVAL);
1892
1893 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1894 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1895 PL_eval_root = cx->blk_eval.old_eval_root;
1896 sv = cx->blk_eval.cur_text;
4c57ced5 1897 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1898 cx->blk_eval.cur_text = NULL;
1899 SvREFCNT_dec_NN(sv);
1900 }
1901
1902 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1903 if (sv) {
1904 cx->blk_eval.old_namesv = NULL;
1905 SvREFCNT_dec_NN(sv);
1906 }
13febba5 1907}
6a7d52cc 1908
a73d8813 1909
d1b6bf72
DM
1910/* push a plain loop, i.e.
1911 * { block }
1912 * while (cond) { block }
1913 * for (init;cond;continue) { block }
1914 * This loop can be last/redo'ed etc.
1915 */
1916
1917PERL_STATIC_INLINE void
1918S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1919{
1920 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1921 cx->blk_loop.my_op = cLOOP;
1922}
1923
1924
1925/* push a true for loop, i.e.
1926 * for var (list) { block }
1927 */
1928
1929PERL_STATIC_INLINE void
1930S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1931{
1932 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1933
1934 /* this one line is common with cx_pushloop_plain */
1935 cx->blk_loop.my_op = cLOOP;
1936
1937 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1938 cx->blk_loop.itersave = itersave;
1939#ifdef USE_ITHREADS
1940 cx->blk_loop.oldcomppad = PL_comppad;
1941#endif
1942}
1943
1944
1945/* pop all loop types, including plain */
1946
1947PERL_STATIC_INLINE void
1948S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1949{
1950 PERL_ARGS_ASSERT_CX_POPLOOP;
1951
1952 assert(CxTYPE_is_LOOP(cx));
1953 if ( CxTYPE(cx) == CXt_LOOP_ARY
1954 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1955 {
1956 /* Free ary or cur. This assumes that state_u.ary.ary
1957 * aligns with state_u.lazysv.cur. See cx_dup() */
1958 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1959 cx->blk_loop.state_u.lazysv.cur = NULL;
1960 SvREFCNT_dec_NN(sv);
1961 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1962 sv = cx->blk_loop.state_u.lazysv.end;
1963 cx->blk_loop.state_u.lazysv.end = NULL;
1964 SvREFCNT_dec_NN(sv);
1965 }
1966 }
1967 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1968 SV *cursv;
1969 SV **svp = (cx)->blk_loop.itervar_u.svp;
1970 if ((cx->cx_type & CXp_FOR_GV))
1971 svp = &GvSV((GV*)svp);
1972 cursv = *svp;
1973 *svp = cx->blk_loop.itersave;
1974 cx->blk_loop.itersave = NULL;
1975 SvREFCNT_dec(cursv);
1976 }
1977}
1978
2a7b7c61
DM
1979
1980PERL_STATIC_INLINE void
7896dde7 1981S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1982{
7896dde7 1983 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1984
7896dde7 1985 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1986}
1987
1988
1989PERL_STATIC_INLINE void
7896dde7 1990S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1991{
7896dde7
Z
1992 PERL_ARGS_ASSERT_CX_POPWHEN;
1993 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1994
1995 PERL_UNUSED_ARG(cx);
59a14f30 1996 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1997 /* currently NOOP */
1998}
1999
2000
7896dde7
Z
2001PERL_STATIC_INLINE void
2002S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
2003{
2004 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
2005
2006 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2007 cx->blk_givwhen.defsv_save = orig_defsv;
2008}
2009
2010
2011PERL_STATIC_INLINE void
2012S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
2013{
2014 SV *sv;
2015
2016 PERL_ARGS_ASSERT_CX_POPGIVEN;
2017 assert(CxTYPE(cx) == CXt_GIVEN);
2018
2019 sv = GvSV(PL_defgv);
2020 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2021 cx->blk_givwhen.defsv_save = NULL;
2022 SvREFCNT_dec(sv);
2023}
2024
ec2c235b
KW
2025/* ------------------ util.h ------------------------------------------- */
2026
2027/*
2028=head1 Miscellaneous Functions
2029
2030=for apidoc foldEQ
2031
2032Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2033same
2034case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2035match themselves and their opposite case counterparts. Non-cased and non-ASCII
2036range bytes match only themselves.
2037
2038=cut
2039*/
2040
2041PERL_STATIC_INLINE I32
2042Perl_foldEQ(const char *s1, const char *s2, I32 len)
2043{
2044 const U8 *a = (const U8 *)s1;
2045 const U8 *b = (const U8 *)s2;
2046
2047 PERL_ARGS_ASSERT_FOLDEQ;
2048
2049 assert(len >= 0);
2050
2051 while (len--) {
2052 if (*a != *b && *a != PL_fold[*b])
2053 return 0;
2054 a++,b++;
2055 }
2056 return 1;
2057}
2058
0f9cb40c 2059PERL_STATIC_INLINE I32
ec2c235b
KW
2060Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2061{
2062 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2063 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2064 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2065 * does it check that the strings each have at least 'len' characters */
2066
2067 const U8 *a = (const U8 *)s1;
2068 const U8 *b = (const U8 *)s2;
2069
2070 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2071
2072 assert(len >= 0);
2073
2074 while (len--) {
2075 if (*a != *b && *a != PL_fold_latin1[*b]) {
2076 return 0;
2077 }
2078 a++, b++;
2079 }
2080 return 1;
2081}
2082
2083/*
2084=for apidoc foldEQ_locale
2085
2086Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2087same case-insensitively in the current locale; false otherwise.
2088
2089=cut
2090*/
2091
0f9cb40c 2092PERL_STATIC_INLINE I32
ec2c235b
KW
2093Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2094{
2095 dVAR;
2096 const U8 *a = (const U8 *)s1;
2097 const U8 *b = (const U8 *)s2;
2098
2099 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2100
2101 assert(len >= 0);
2102
2103 while (len--) {
2104 if (*a != *b && *a != PL_fold_locale[*b])
2105 return 0;
2106 a++,b++;
2107 }
2108 return 1;
2109}
2110
6dba01e2
KW
2111#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2112
2113PERL_STATIC_INLINE void *
2114S_my_memrchr(const char * s, const char c, const STRLEN len)
2115{
2116 /* memrchr(), since many platforms lack it */
2117
2118 const char * t = s + len - 1;
2119
2120 PERL_ARGS_ASSERT_MY_MEMRCHR;
2121
2122 while (t >= s) {
2123 if (*t == c) {
2124 return (void *) t;
2125 }
2126 t--;
2127 }
2128
2129 return NULL;
2130}
2131
2132#endif
2133
ed382232 2134/*
c8028aa6
TC
2135 * ex: set ts=8 sts=4 sw=4 et:
2136 */