This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PATCH: [perl #132163] regex assertion failure
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
4ab2fd9b 385/* This looks like 0x010101... */
2c5c8af5 386# define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
4ab2fd9b
KW
387
388/* This looks like 0x808080... */
2c5c8af5
KW
389# define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
390# define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
391# define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 392
099e59a4
KW
393/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
394 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
395 * optimized out completely on a 32-bit system, and its mask gets optimized out
396 * on a 64-bit system */
2c5c8af5 397# define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
5eabe374
KW
398 | ( PTR2nat(x) >> 1) \
399 | ( ( (PTR2nat(x) \
400 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4 401
3f515a2e
KW
402#ifndef EBCDIC
403
099e59a4
KW
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
2c5c8af5
KW
441# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
442 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
1d2af574
KW
443
444 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
445 assert(*ep >= s && *ep < send);
446
447 return FALSE;
448
2c5c8af5 449# else /* If weird byte order, drop into next loop to do byte-at-a-time
1d2af574
KW
450 checks. */
451
46bb68f6 452 break;
2c5c8af5 453# endif
46bb68f6 454 }
1d2af574 455
46bb68f6 456 x += PERL_WORDSIZE;
1d2af574 457
099e59a4 458 } while (x + PERL_WORDSIZE <= send);
b40579ff 459 }
e17544a6 460
0b08cab0 461#endif /* End of ! EBCDIC */
e17544a6
KW
462
463 /* Process per-byte */
464 while (x < send) {
465 if (! UTF8_IS_INVARIANT(*x)) {
466 if (ep) {
467 *ep = x;
468 }
0cbf5865 469
e17544a6 470 return FALSE;
0cbf5865 471 }
1e599354 472
e17544a6 473 x++;
1e599354
KW
474 }
475
476 return TRUE;
477}
478
23a7ee81
KW
479#ifndef EBCDIC
480
1d2af574
KW
481PERL_STATIC_INLINE unsigned int
482S__variant_byte_number(PERL_UINTMAX_T word)
483{
484
485 /* This returns the position in a word (0..7) of the first variant byte in
486 * it. This is a helper function. Note that there are no branches */
487
488 assert(word);
489
490 /* Get just the msb bits of each byte */
491 word &= PERL_VARIANTS_WORD_MASK;
492
597ee3f4
KW
493# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
494 easiest thing is to hide that from the callers */
495 {
496 unsigned int i;
497 const U8 * s = (U8 *) &word;
498 dTHX;
499
500 for (i = 0; i < sizeof(word); i++ ) {
501 if (s[i]) {
502 return i;
503 }
504 }
505
506 Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
507 __FILE__, __LINE__);
508 }
509
510# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
1d2af574
KW
511
512 /* Bytes are stored like
513 * Byte8 ... Byte2 Byte1
514 * 63..56...15...8 7...0
515 *
516 * Isolate the lsb;
517 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
518 *
519 * The word will look this this, with a rightmost set bit in position 's':
520 * ('x's are don't cares)
521 * s
522 * x..x100..0
523 * x..xx10..0 Right shift (rightmost 0 is shifted off)
524 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
525 * the 1 just to their left into a 0; the remainder is
526 * untouched
527 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
528 * bottom to all 1
529 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
530 *
531 * Another method is to do 'word &= -word'; but it generates a compiler
532 * message on some platforms about taking the negative of an unsigned */
533
534 word >>= 1;
535 word = 1 + (word ^ (word - 1));
536
537# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
538
539 /* Bytes are stored like
540 * Byte1 Byte2 ... Byte8
541 * 63..56 55..47 ... 7...0
542 *
543 * Isolate the msb; http://codeforces.com/blog/entry/10330
544 *
545 * Only the most significant set bit matters. Or'ing word with its right
546 * shift of 1 makes that bit and the next one to its right both 1. Then
547 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
548 * msb and all to the right being 1. */
549 word |= word >> 1;
550 word |= word >> 2;
551 word |= word >> 4;
552 word |= word >> 8;
553 word |= word >> 16;
554 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
555
556 /* Then subtracting the right shift by 1 clears all but the left-most of
557 * the 1 bits, which is our desired result */
558 word -= (word >> 1);
559
560# else
561# error Unexpected byte order
562# endif
563
7cf2d6c7
KW
564 /* Here 'word' has a single bit set: the msb of the first byte in which it
565 * is set. Calculate that position in the word. We can use this
1d2af574 566 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
67e12c5c
KW
567 * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
568 * just get shifted off at compile time) */
569 word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
570 | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
571 | (39 << 24) | (47 << 16)
572 | (55 << 8) | (63 << 0));
1d2af574
KW
573 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
574
575 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
576 word = ((word + 1) >> 3) - 1;
577
578# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
579
580 /* And invert the result */
581 word = CHARBITS - word - 1;
582
583# endif
584
585 return (unsigned int) word;
586}
587
23a7ee81 588#endif
03c1e4ab
KW
589#if defined(PERL_CORE) || defined(PERL_EXT)
590
591/*
592=for apidoc variant_under_utf8_count
593
594This function looks at the sequence of bytes between C<s> and C<e>, which are
595assumed to be encoded in ASCII/Latin1, and returns how many of them would
596change should the string be translated into UTF-8. Due to the nature of UTF-8,
597each of these would occupy two bytes instead of the single one in the input
598string. Thus, this function returns the precise number of bytes the string
599would expand by when translated to UTF-8.
600
601Unlike most of the other functions that have C<utf8> in their name, the input
602to this function is NOT a UTF-8-encoded string. The function name is slightly
603I<odd> to emphasize this.
604
605This function is internal to Perl because khw thinks that any XS code that
606would want this is probably operating too close to the internals. Presenting a
607valid use case could change that.
608
609See also
610C<L<perlapi/is_utf8_invariant_string>>
611and
612C<L<perlapi/is_utf8_invariant_string_loc>>,
613
614=cut
615
616*/
617
618PERL_STATIC_INLINE Size_t
619S_variant_under_utf8_count(const U8* const s, const U8* const e)
620{
621 const U8* x = s;
622 Size_t count = 0;
623
624 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
625
626# ifndef EBCDIC
627
5d0379de
KW
628 /* Test if the string is long enough to use word-at-a-time. (Logic is the
629 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
630 if ((STRLEN) (e - x) >= PERL_WORDSIZE
631 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
632 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
633 {
634
635 /* Process per-byte until reach word boundary. XXX This loop could be
636 * eliminated if we knew that this platform had fast unaligned reads */
637 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
638 count += ! UTF8_IS_INVARIANT(*x++);
639 }
640
641 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
642 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
643 explanation of how this works */
03c1e4ab
KW
644 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
645 * PERL_COUNT_MULTIPLIER)
646 >> ((PERL_WORDSIZE - 1) * CHARBITS);
647 x += PERL_WORDSIZE;
648 } while (x + PERL_WORDSIZE <= e);
649 }
650
651# endif
652
653 /* Process per-byte */
654 while (x < e) {
655 if (! UTF8_IS_INVARIANT(*x)) {
656 count++;
657 }
658
659 x++;
660 }
661
662 return count;
663}
664
665#endif
666
aff4cafe
KW
667#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
668# undef PERL_WORDSIZE
669# undef PERL_COUNT_MULTIPLIER
670# undef PERL_WORD_BOUNDARY_MASK
671# undef PERL_VARIANTS_WORD_MASK
672#endif
03c1e4ab 673
7c93d8f0 674/*
5ff889fb
KW
675=for apidoc is_utf8_string
676
82c5d941
KW
677Returns TRUE if the first C<len> bytes of string C<s> form a valid
678Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
679be calculated using C<strlen(s)> (which means if you use this option, that C<s>
680can't have embedded C<NUL> characters and has to have a terminating C<NUL>
681byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
682
2717076a
KW
683This function considers Perl's extended UTF-8 to be valid. That means that
684code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
685considered valid by this function. Use C<L</is_strict_utf8_string>>,
686C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
687code points are considered valid.
5ff889fb 688
9f2abfde
KW
689See also
690C<L</is_utf8_invariant_string>>,
0cbf5865 691C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
692C<L</is_utf8_string_loc>>,
693C<L</is_utf8_string_loclen>>,
8bc127bf
KW
694C<L</is_utf8_fixed_width_buf_flags>>,
695C<L</is_utf8_fixed_width_buf_loc_flags>>,
696C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
697
698=cut
699*/
700
dd237e82 701#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 702
c9cd936b
KW
703#if defined(PERL_CORE) || defined (PERL_EXT)
704
705/*
706=for apidoc is_utf8_non_invariant_string
707
708Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
709C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
710UTF-8; otherwise returns FALSE.
711
712A TRUE return means that at least one code point represented by the sequence
713either is a wide character not representable as a single byte, or the
714representation differs depending on whether the sequence is encoded in UTF-8 or
715not.
716
717See also
718C<L<perlapi/is_utf8_invariant_string>>,
719C<L<perlapi/is_utf8_string>>
720
721=cut
722
723This is commonly used to determine if a SV's UTF-8 flag should be turned on.
b3b93dfe
KW
724It generally needn't be if its string is entirely UTF-8 invariant, and it
725shouldn't be if it otherwise contains invalid UTF-8.
c9cd936b
KW
726
727It is an internal function because khw thinks that XS code shouldn't be working
728at this low a level. A valid use case could change that.
729
730*/
731
732PERL_STATIC_INLINE bool
733S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
734{
735 const U8 * first_variant;
736
737 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
738
739 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
740 return FALSE;
741 }
742
743 return is_utf8_string(first_variant, len - (first_variant - s));
744}
745
746#endif
747
5ff889fb 748/*
9f2abfde
KW
749=for apidoc is_strict_utf8_string
750
751Returns TRUE if the first C<len> bytes of string C<s> form a valid
752UTF-8-encoded string that is fully interchangeable by any application using
753Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
754calculated using C<strlen(s)> (which means if you use this option, that C<s>
755can't have embedded C<NUL> characters and has to have a terminating C<NUL>
756byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
757
758This function returns FALSE for strings containing any
759code points above the Unicode max of 0x10FFFF, surrogate code points, or
760non-character code points.
761
762See also
763C<L</is_utf8_invariant_string>>,
0cbf5865 764C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
765C<L</is_utf8_string>>,
766C<L</is_utf8_string_flags>>,
767C<L</is_utf8_string_loc>>,
768C<L</is_utf8_string_loc_flags>>,
769C<L</is_utf8_string_loclen>>,
770C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
771C<L</is_utf8_fixed_width_buf_flags>>,
772C<L</is_utf8_fixed_width_buf_loc_flags>>,
773C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
774C<L</is_strict_utf8_string_loc>>,
775C<L</is_strict_utf8_string_loclen>>,
776C<L</is_c9strict_utf8_string>>,
777C<L</is_c9strict_utf8_string_loc>>,
778and
779C<L</is_c9strict_utf8_string_loclen>>.
780
781=cut
782*/
783
dd237e82 784#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
785
786/*
787=for apidoc is_c9strict_utf8_string
788
789Returns TRUE if the first C<len> bytes of string C<s> form a valid
790UTF-8-encoded string that conforms to
791L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
792otherwise it returns FALSE. If C<len> is 0, it will be calculated using
793C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
794C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
795characters being ASCII constitute 'a valid UTF-8 string'.
796
797This function returns FALSE for strings containing any code points above the
798Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
799code points per
800L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
801
802See also
803C<L</is_utf8_invariant_string>>,
0cbf5865 804C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
805C<L</is_utf8_string>>,
806C<L</is_utf8_string_flags>>,
807C<L</is_utf8_string_loc>>,
808C<L</is_utf8_string_loc_flags>>,
809C<L</is_utf8_string_loclen>>,
810C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
811C<L</is_utf8_fixed_width_buf_flags>>,
812C<L</is_utf8_fixed_width_buf_loc_flags>>,
813C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
814C<L</is_strict_utf8_string>>,
815C<L</is_strict_utf8_string_loc>>,
816C<L</is_strict_utf8_string_loclen>>,
817C<L</is_c9strict_utf8_string_loc>>,
818and
819C<L</is_c9strict_utf8_string_loclen>>.
820
821=cut
822*/
823
dd237e82 824#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
825
826/*
827=for apidoc is_utf8_string_flags
828
829Returns TRUE if the first C<len> bytes of string C<s> form a valid
830UTF-8 string, subject to the restrictions imposed by C<flags>;
831returns FALSE otherwise. If C<len> is 0, it will be calculated
832using C<strlen(s)> (which means if you use this option, that C<s> can't have
833embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
834that all characters being ASCII constitute 'a valid UTF-8 string'.
835
836If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
837C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
838as C<L</is_strict_utf8_string>>; and if C<flags> is
839C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
840C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
841combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
842C<L</utf8n_to_uvchr>>, with the same meanings.
843
844See also
845C<L</is_utf8_invariant_string>>,
0cbf5865 846C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
847C<L</is_utf8_string>>,
848C<L</is_utf8_string_loc>>,
849C<L</is_utf8_string_loc_flags>>,
850C<L</is_utf8_string_loclen>>,
851C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
852C<L</is_utf8_fixed_width_buf_flags>>,
853C<L</is_utf8_fixed_width_buf_loc_flags>>,
854C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
855C<L</is_strict_utf8_string>>,
856C<L</is_strict_utf8_string_loc>>,
857C<L</is_strict_utf8_string_loclen>>,
858C<L</is_c9strict_utf8_string>>,
859C<L</is_c9strict_utf8_string_loc>>,
860and
861C<L</is_c9strict_utf8_string_loclen>>.
862
863=cut
864*/
865
866PERL_STATIC_INLINE bool
f60f61fd 867S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 868{
33756530 869 const U8 * first_variant;
9f2abfde
KW
870
871 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
872 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 873 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 874
f60f61fd
KW
875 if (len == 0) {
876 len = strlen((const char *)s);
877 }
878
9f2abfde
KW
879 if (flags == 0) {
880 return is_utf8_string(s, len);
881 }
882
d044b7a7 883 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
884 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
885 {
886 return is_strict_utf8_string(s, len);
887 }
888
d044b7a7 889 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
890 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
891 {
892 return is_c9strict_utf8_string(s, len);
893 }
894
33756530
KW
895 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
896 const U8* const send = s + len;
897 const U8* x = first_variant;
898
a0d7f935
KW
899 while (x < send) {
900 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
901 if (UNLIKELY(! cur_len)) {
902 return FALSE;
903 }
904 x += cur_len;
9f2abfde 905 }
33756530 906 }
9f2abfde
KW
907
908 return TRUE;
909}
910
911/*
5ff889fb
KW
912
913=for apidoc is_utf8_string_loc
914
2717076a 915Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 916case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 917"utf8ness success") in the C<ep> pointer.
5ff889fb 918
2717076a 919See also C<L</is_utf8_string_loclen>>.
5ff889fb 920
3964c812
KW
921=cut
922*/
923
924#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
925
926/*
927
5ff889fb
KW
928=for apidoc is_utf8_string_loclen
929
2717076a 930Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 931case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 932"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 933encoded characters in the C<el> pointer.
5ff889fb 934
2717076a 935See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
936
937=cut
938*/
939
56e4cf64 940PERL_STATIC_INLINE bool
33756530 941Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 942{
33756530 943 const U8 * first_variant;
5ff889fb
KW
944
945 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
946
33756530
KW
947 if (len == 0) {
948 len = strlen((const char *) s);
949 }
950
951 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
952 if (el)
953 *el = len;
954
955 if (ep) {
956 *ep = s + len;
957 }
958
959 return TRUE;
960 }
961
962 {
963 const U8* const send = s + len;
964 const U8* x = first_variant;
965 STRLEN outlen = first_variant - s;
966
a0d7f935
KW
967 while (x < send) {
968 const STRLEN cur_len = isUTF8_CHAR(x, send);
969 if (UNLIKELY(! cur_len)) {
970 break;
971 }
972 x += cur_len;
973 outlen++;
5ff889fb 974 }
5ff889fb 975
a0d7f935
KW
976 if (el)
977 *el = outlen;
5ff889fb 978
a0d7f935
KW
979 if (ep) {
980 *ep = x;
981 }
5ff889fb 982
a0d7f935 983 return (x == send);
33756530 984 }
5ff889fb
KW
985}
986
987/*
9f2abfde
KW
988
989=for apidoc is_strict_utf8_string_loc
990
991Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
992case of "utf8ness failure") or the location C<s>+C<len> (in the case of
993"utf8ness success") in the C<ep> pointer.
994
995See also C<L</is_strict_utf8_string_loclen>>.
996
997=cut
998*/
999
1000#define is_strict_utf8_string_loc(s, len, ep) \
1001 is_strict_utf8_string_loclen(s, len, ep, 0)
1002
1003/*
1004
1005=for apidoc is_strict_utf8_string_loclen
1006
1007Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
1008case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1009"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1010encoded characters in the C<el> pointer.
1011
1012See also C<L</is_strict_utf8_string_loc>>.
1013
1014=cut
1015*/
1016
1017PERL_STATIC_INLINE bool
33756530 1018S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1019{
33756530 1020 const U8 * first_variant;
9f2abfde
KW
1021
1022 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1023
33756530
KW
1024 if (len == 0) {
1025 len = strlen((const char *) s);
1026 }
1027
1028 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1029 if (el)
1030 *el = len;
1031
1032 if (ep) {
1033 *ep = s + len;
1034 }
1035
1036 return TRUE;
1037 }
1038
1039 {
1040 const U8* const send = s + len;
1041 const U8* x = first_variant;
1042 STRLEN outlen = first_variant - s;
1043
a0d7f935
KW
1044 while (x < send) {
1045 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1046 if (UNLIKELY(! cur_len)) {
1047 break;
1048 }
1049 x += cur_len;
1050 outlen++;
9f2abfde 1051 }
9f2abfde 1052
a0d7f935
KW
1053 if (el)
1054 *el = outlen;
9f2abfde 1055
a0d7f935
KW
1056 if (ep) {
1057 *ep = x;
1058 }
9f2abfde 1059
a0d7f935 1060 return (x == send);
33756530 1061 }
9f2abfde
KW
1062}
1063
1064/*
1065
1066=for apidoc is_c9strict_utf8_string_loc
1067
1068Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1069the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1070"utf8ness success") in the C<ep> pointer.
1071
1072See also C<L</is_c9strict_utf8_string_loclen>>.
1073
1074=cut
1075*/
1076
1077#define is_c9strict_utf8_string_loc(s, len, ep) \
1078 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1079
1080/*
1081
1082=for apidoc is_c9strict_utf8_string_loclen
1083
1084Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1085the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1086"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1087characters in the C<el> pointer.
1088
1089See also C<L</is_c9strict_utf8_string_loc>>.
1090
1091=cut
1092*/
1093
1094PERL_STATIC_INLINE bool
33756530 1095S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1096{
33756530 1097 const U8 * first_variant;
9f2abfde
KW
1098
1099 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1100
33756530
KW
1101 if (len == 0) {
1102 len = strlen((const char *) s);
1103 }
1104
1105 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1106 if (el)
1107 *el = len;
1108
1109 if (ep) {
1110 *ep = s + len;
1111 }
1112
1113 return TRUE;
1114 }
1115
1116 {
1117 const U8* const send = s + len;
1118 const U8* x = first_variant;
1119 STRLEN outlen = first_variant - s;
1120
a0d7f935
KW
1121 while (x < send) {
1122 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1123 if (UNLIKELY(! cur_len)) {
1124 break;
1125 }
1126 x += cur_len;
1127 outlen++;
9f2abfde 1128 }
9f2abfde 1129
a0d7f935
KW
1130 if (el)
1131 *el = outlen;
9f2abfde 1132
a0d7f935
KW
1133 if (ep) {
1134 *ep = x;
1135 }
9f2abfde 1136
a0d7f935 1137 return (x == send);
33756530 1138 }
9f2abfde
KW
1139}
1140
1141/*
1142
1143=for apidoc is_utf8_string_loc_flags
1144
1145Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1146case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1147"utf8ness success") in the C<ep> pointer.
1148
1149See also C<L</is_utf8_string_loclen_flags>>.
1150
1151=cut
1152*/
1153
1154#define is_utf8_string_loc_flags(s, len, ep, flags) \
1155 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1156
1157
1158/* The above 3 actual functions could have been moved into the more general one
1159 * just below, and made #defines that call it with the right 'flags'. They are
1160 * currently kept separate to increase their chances of getting inlined */
1161
1162/*
1163
1164=for apidoc is_utf8_string_loclen_flags
1165
1166Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1167case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1168"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1169encoded characters in the C<el> pointer.
1170
1171See also C<L</is_utf8_string_loc_flags>>.
1172
1173=cut
1174*/
1175
1176PERL_STATIC_INLINE bool
f60f61fd 1177S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1178{
33756530 1179 const U8 * first_variant;
9f2abfde
KW
1180
1181 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1182 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1183 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1184
f60f61fd 1185 if (len == 0) {
a0d7f935 1186 len = strlen((const char *) s);
f60f61fd
KW
1187 }
1188
9f2abfde
KW
1189 if (flags == 0) {
1190 return is_utf8_string_loclen(s, len, ep, el);
1191 }
1192
d044b7a7 1193 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1194 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1195 {
1196 return is_strict_utf8_string_loclen(s, len, ep, el);
1197 }
1198
d044b7a7 1199 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1200 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1201 {
1202 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1203 }
1204
33756530
KW
1205 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1206 if (el)
1207 *el = len;
1208
1209 if (ep) {
1210 *ep = s + len;
1211 }
1212
1213 return TRUE;
1214 }
1215
1216 {
1217 const U8* send = s + len;
1218 const U8* x = first_variant;
1219 STRLEN outlen = first_variant - s;
1220
a0d7f935
KW
1221 while (x < send) {
1222 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1223 if (UNLIKELY(! cur_len)) {
1224 break;
1225 }
1226 x += cur_len;
1227 outlen++;
9f2abfde 1228 }
9f2abfde 1229
a0d7f935
KW
1230 if (el)
1231 *el = outlen;
9f2abfde 1232
a0d7f935
KW
1233 if (ep) {
1234 *ep = x;
1235 }
9f2abfde 1236
a0d7f935 1237 return (x == send);
33756530 1238 }
9f2abfde
KW
1239}
1240
1241/*
7c93d8f0
KW
1242=for apidoc utf8_distance
1243
1244Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1245and C<b>.
1246
1247WARNING: use only if you *know* that the pointers point inside the
1248same UTF-8 buffer.
1249
1250=cut
1251*/
1252
1253PERL_STATIC_INLINE IV
1254Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1255{
1256 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1257
1258 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1259}
1260
1261/*
1262=for apidoc utf8_hop
1263
1264Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1265forward or backward.
1266
1267WARNING: do not use the following unless you *know* C<off> is within
1268the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1269on the first byte of character or just after the last byte of a character.
1270
1271=cut
1272*/
1273
1274PERL_STATIC_INLINE U8 *
1275Perl_utf8_hop(const U8 *s, SSize_t off)
1276{
1277 PERL_ARGS_ASSERT_UTF8_HOP;
1278
1279 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1280 * the bitops (especially ~) can create illegal UTF-8.
1281 * In other words: in Perl UTF-8 is not just for Unicode. */
1282
1283 if (off >= 0) {
1284 while (off--)
1285 s += UTF8SKIP(s);
1286 }
1287 else {
1288 while (off++) {
1289 s--;
1290 while (UTF8_IS_CONTINUATION(*s))
1291 s--;
1292 }
1293 }
7347ee54 1294 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1295 return (U8 *)s;
7347ee54 1296 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1297}
1298
4dab108f 1299/*
65df57a8
TC
1300=for apidoc utf8_hop_forward
1301
1302Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1303forward.
1304
1305C<off> must be non-negative.
1306
1307C<s> must be before or equal to C<end>.
1308
1309When moving forward it will not move beyond C<end>.
1310
1311Will not exceed this limit even if the string is not valid "UTF-8".
1312
1313=cut
1314*/
1315
1316PERL_STATIC_INLINE U8 *
1317Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1318{
1319 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1320
1321 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1322 * the bitops (especially ~) can create illegal UTF-8.
1323 * In other words: in Perl UTF-8 is not just for Unicode. */
1324
1325 assert(s <= end);
1326 assert(off >= 0);
1327
1328 while (off--) {
1329 STRLEN skip = UTF8SKIP(s);
de979548 1330 if ((STRLEN)(end - s) <= skip) {
7347ee54 1331 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1332 return (U8 *)end;
7347ee54 1333 GCC_DIAG_RESTORE_STMT;
de979548 1334 }
65df57a8
TC
1335 s += skip;
1336 }
1337
7347ee54 1338 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1339 return (U8 *)s;
7347ee54 1340 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1341}
1342
1343/*
1344=for apidoc utf8_hop_back
1345
1346Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1347backward.
1348
1349C<off> must be non-positive.
1350
1351C<s> must be after or equal to C<start>.
1352
1353When moving backward it will not move before C<start>.
1354
1355Will not exceed this limit even if the string is not valid "UTF-8".
1356
1357=cut
1358*/
1359
1360PERL_STATIC_INLINE U8 *
1361Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1362{
1363 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1364
1365 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1366 * the bitops (especially ~) can create illegal UTF-8.
1367 * In other words: in Perl UTF-8 is not just for Unicode. */
1368
1369 assert(start <= s);
1370 assert(off <= 0);
1371
1372 while (off++ && s > start) {
1373 s--;
1374 while (UTF8_IS_CONTINUATION(*s) && s > start)
1375 s--;
1376 }
1377
7347ee54 1378 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1379 return (U8 *)s;
7347ee54 1380 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1381}
1382
1383/*
1384=for apidoc utf8_hop_safe
1385
1386Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1387either forward or backward.
1388
1389When moving backward it will not move before C<start>.
1390
1391When moving forward it will not move beyond C<end>.
1392
1393Will not exceed those limits even if the string is not valid "UTF-8".
1394
1395=cut
1396*/
1397
1398PERL_STATIC_INLINE U8 *
1399Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1400{
1401 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1402
1403 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1404 * the bitops (especially ~) can create illegal UTF-8.
1405 * In other words: in Perl UTF-8 is not just for Unicode. */
1406
1407 assert(start <= s && s <= end);
1408
1409 if (off >= 0) {
1410 return utf8_hop_forward(s, off, end);
1411 }
1412 else {
1413 return utf8_hop_back(s, off, start);
1414 }
1415}
1416
1417/*
4dab108f
KW
1418
1419=for apidoc is_utf8_valid_partial_char
1420
6cbb9248
KW
1421Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1422S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1423points. Otherwise, it returns 1 if there exists at least one non-empty
1424sequence of bytes that when appended to sequence C<s>, starting at position
1425C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1426otherwise returns 0.
1427
1428In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1429point.
1430
1431This is useful when a fixed-length buffer is being tested for being well-formed
1432UTF-8, but the final few bytes in it don't comprise a full character; that is,
1433it is split somewhere in the middle of the final code point's UTF-8
1434representation. (Presumably when the buffer is refreshed with the next chunk
1435of data, the new first bytes will complete the partial code point.) This
1436function is used to verify that the final bytes in the current buffer are in
1437fact the legal beginning of some code point, so that if they aren't, the
1438failure can be signalled without having to wait for the next read.
4dab108f
KW
1439
1440=cut
1441*/
2717076a
KW
1442#define is_utf8_valid_partial_char(s, e) \
1443 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1444
1445/*
1446
1447=for apidoc is_utf8_valid_partial_char_flags
1448
1449Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1450or not the input is a valid UTF-8 encoded partial character, but it takes an
1451extra parameter, C<flags>, which can further restrict which code points are
1452considered valid.
1453
1454If C<flags> is 0, this behaves identically to
1455C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1456of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1457there is any sequence of bytes that can complete the input partial character in
1458such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1459TRUE; otherwise FALSE. Non character code points cannot be determined based on
1460partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1461determined from just the first one or two bytes.
1462
1463=cut
1464 */
1465
56e4cf64 1466PERL_STATIC_INLINE bool
f1c999a7 1467S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1468{
f1c999a7 1469 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1470
f1c999a7 1471 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1472 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1473
8875bd48 1474 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1475 return FALSE;
1476 }
1477
f1c999a7 1478 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1479}
1480
8bc127bf
KW
1481/*
1482
1483=for apidoc is_utf8_fixed_width_buf_flags
1484
1485Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1486is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1487otherwise it returns FALSE.
1488
1489If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1490without restriction. If the final few bytes of the buffer do not form a
1491complete code point, this will return TRUE anyway, provided that
1492C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1493
1494If C<flags> in non-zero, it can be any combination of the
1495C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1496same meanings.
1497
1498This function differs from C<L</is_utf8_string_flags>> only in that the latter
1499returns FALSE if the final few bytes of the string don't form a complete code
1500point.
1501
1502=cut
1503 */
1504#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1505 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1506
1507/*
1508
1509=for apidoc is_utf8_fixed_width_buf_loc_flags
1510
1511Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1512failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1513to the beginning of any partial character at the end of the buffer; if there is
1514no partial character C<*ep> will contain C<s>+C<len>.
1515
1516See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1517
1518=cut
1519*/
1520
1521#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1522 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1523
1524/*
1525
1526=for apidoc is_utf8_fixed_width_buf_loclen_flags
1527
1528Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1529complete, valid characters found in the C<el> pointer.
1530
1531=cut
1532*/
1533
1534PERL_STATIC_INLINE bool
1535S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1536 STRLEN len,
8bc127bf
KW
1537 const U8 **ep,
1538 STRLEN *el,
1539 const U32 flags)
1540{
1541 const U8 * maybe_partial;
1542
1543 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1544
1545 if (! ep) {
1546 ep = &maybe_partial;
1547 }
1548
1549 /* If it's entirely valid, return that; otherwise see if the only error is
1550 * that the final few bytes are for a partial character */
1551 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1552 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1553}
1554
c8028aa6
TC
1555/* ------------------------------- perl.h ----------------------------- */
1556
1557/*
dcccc8ff
KW
1558=head1 Miscellaneous Functions
1559
41188aa0 1560=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1561
6602b933 1562Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1563If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1564
1565Return TRUE if the name is safe.
1566
796b6530 1567Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1568
1569=cut
1570*/
1571
1572PERL_STATIC_INLINE bool
41188aa0 1573S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1574 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1575 * perl itself uses xce*() functions which accept 8-bit strings.
1576 */
1577
1578 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1579
6c4650b3 1580 if (len > 1) {
c8028aa6 1581 char *null_at;
41188aa0 1582 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1583 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1584 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1585 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1586 what, op_name, pv, null_at+1);
c8028aa6
TC
1587 return FALSE;
1588 }
1589 }
1590
1591 return TRUE;
1592}
1593
1594/*
7cb3f959
TC
1595
1596Return true if the supplied filename has a newline character
fa6c7d00 1597immediately before the first (hopefully only) NUL.
7cb3f959
TC
1598
1599My original look at this incorrectly used the len from SvPV(), but
1600that's incorrect, since we allow for a NUL in pv[len-1].
1601
1602So instead, strlen() and work from there.
1603
1604This allow for the user reading a filename, forgetting to chomp it,
1605then calling:
1606
1607 open my $foo, "$file\0";
1608
1609*/
1610
1611#ifdef PERL_CORE
1612
1613PERL_STATIC_INLINE bool
1614S_should_warn_nl(const char *pv) {
1615 STRLEN len;
1616
1617 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1618
1619 len = strlen(pv);
1620
1621 return len > 0 && pv[len-1] == '\n';
1622}
1623
1624#endif
1625
81d52ecd
JH
1626/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1627
1628#define MAX_CHARSET_NAME_LENGTH 2
1629
1630PERL_STATIC_INLINE const char *
1631get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1632{
1633 /* Returns a string that corresponds to the name of the regex character set
1634 * given by 'flags', and *lenp is set the length of that string, which
1635 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1636
1637 *lenp = 1;
1638 switch (get_regex_charset(flags)) {
1639 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1640 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1641 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1642 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1643 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1644 *lenp = 2;
1645 return ASCII_MORE_RESTRICT_PAT_MODS;
1646 }
1647 /* The NOT_REACHED; hides an assert() which has a rather complex
1648 * definition in perl.h. */
1649 NOT_REACHED; /* NOTREACHED */
1650 return "?"; /* Unknown */
1651}
1652
7cb3f959 1653/*
ed382232
TC
1654
1655Return false if any get magic is on the SV other than taint magic.
1656
1657*/
1658
1659PERL_STATIC_INLINE bool
1660S_sv_only_taint_gmagic(SV *sv) {
1661 MAGIC *mg = SvMAGIC(sv);
1662
1663 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1664
1665 while (mg) {
1666 if (mg->mg_type != PERL_MAGIC_taint
1667 && !(mg->mg_flags & MGf_GSKIP)
1668 && mg->mg_virtual->svt_get) {
1669 return FALSE;
1670 }
1671 mg = mg->mg_moremagic;
1672 }
1673
1674 return TRUE;
1675}
1676
ed8ff0f3
DM
1677/* ------------------ cop.h ------------------------------------------- */
1678
1679
1680/* Enter a block. Push a new base context and return its address. */
1681
1682PERL_STATIC_INLINE PERL_CONTEXT *
1683S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1684{
1685 PERL_CONTEXT * cx;
1686
1687 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1688
1689 CXINC;
1690 cx = CX_CUR();
1691 cx->cx_type = type;
1692 cx->blk_gimme = gimme;
1693 cx->blk_oldsaveix = saveix;
4caf7d8c 1694 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1695 cx->blk_oldcop = PL_curcop;
4caf7d8c 1696 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1697 cx->blk_oldscopesp = PL_scopestack_ix;
1698 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1699 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1700
1701 PL_tmps_floor = PL_tmps_ix;
1702 CX_DEBUG(cx, "PUSH");
1703 return cx;
1704}
1705
1706
1707/* Exit a block (RETURN and LAST). */
1708
1709PERL_STATIC_INLINE void
1710S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1711{
1712 PERL_ARGS_ASSERT_CX_POPBLOCK;
1713
1714 CX_DEBUG(cx, "POP");
1715 /* these 3 are common to cx_popblock and cx_topblock */
1716 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1717 PL_scopestack_ix = cx->blk_oldscopesp;
1718 PL_curpm = cx->blk_oldpm;
1719
1720 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1721 * and leaves a CX entry lying around for repeated use, so
1722 * skip for multicall */ \
1723 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1724 || PL_savestack_ix == cx->blk_oldsaveix);
1725 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1726 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1727}
1728
1729/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1730 * Whereas cx_popblock() restores the state to the point just before
1731 * cx_pushblock() was called, cx_topblock() restores it to the point just
1732 * *after* cx_pushblock() was called. */
1733
1734PERL_STATIC_INLINE void
1735S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1736{
1737 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1738
1739 CX_DEBUG(cx, "TOP");
1740 /* these 3 are common to cx_popblock and cx_topblock */
1741 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1742 PL_scopestack_ix = cx->blk_oldscopesp;
1743 PL_curpm = cx->blk_oldpm;
1744
1745 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1746}
1747
1748
a73d8813
DM
1749PERL_STATIC_INLINE void
1750S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1751{
1752 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1753
1754 PERL_ARGS_ASSERT_CX_PUSHSUB;
1755
3f6bd23a 1756 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1757 cx->blk_sub.cv = cv;
1758 cx->blk_sub.olddepth = CvDEPTH(cv);
1759 cx->blk_sub.prevcomppad = PL_comppad;
1760 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1761 cx->blk_sub.retop = retop;
1762 SvREFCNT_inc_simple_void_NN(cv);
1763 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1764}
1765
1766
1767/* subsets of cx_popsub() */
1768
1769PERL_STATIC_INLINE void
1770S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1771{
1772 CV *cv;
1773
1774 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1775 assert(CxTYPE(cx) == CXt_SUB);
1776
1777 PL_comppad = cx->blk_sub.prevcomppad;
1778 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1779 cv = cx->blk_sub.cv;
1780 CvDEPTH(cv) = cx->blk_sub.olddepth;
1781 cx->blk_sub.cv = NULL;
1782 SvREFCNT_dec(cv);
1783}
1784
1785
1786/* handle the @_ part of leaving a sub */
1787
1788PERL_STATIC_INLINE void
1789S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1790{
1791 AV *av;
1792
1793 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1794 assert(CxTYPE(cx) == CXt_SUB);
1795 assert(AvARRAY(MUTABLE_AV(
1796 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1797 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1798
1799 CX_POP_SAVEARRAY(cx);
1800 av = MUTABLE_AV(PAD_SVl(0));
1801 if (UNLIKELY(AvREAL(av)))
1802 /* abandon @_ if it got reified */
1803 clear_defarray(av, 0);
1804 else {
1805 CLEAR_ARGARRAY(av);
1806 }
1807}
1808
1809
1810PERL_STATIC_INLINE void
1811S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1812{
1813 PERL_ARGS_ASSERT_CX_POPSUB;
1814 assert(CxTYPE(cx) == CXt_SUB);
1815
3f6bd23a 1816 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1817
1818 if (CxHASARGS(cx))
1819 cx_popsub_args(cx);
1820 cx_popsub_common(cx);
1821}
1822
1823
6a7d52cc
DM
1824PERL_STATIC_INLINE void
1825S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1826{
1827 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1828
1829 cx->blk_format.cv = cv;
1830 cx->blk_format.retop = retop;
1831 cx->blk_format.gv = gv;
1832 cx->blk_format.dfoutgv = PL_defoutgv;
1833 cx->blk_format.prevcomppad = PL_comppad;
1834 cx->blk_u16 = 0;
1835
1836 SvREFCNT_inc_simple_void_NN(cv);
1837 CvDEPTH(cv)++;
1838 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1839}
1840
1841
1842PERL_STATIC_INLINE void
1843S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1844{
1845 CV *cv;
1846 GV *dfout;
1847
1848 PERL_ARGS_ASSERT_CX_POPFORMAT;
1849 assert(CxTYPE(cx) == CXt_FORMAT);
1850
1851 dfout = cx->blk_format.dfoutgv;
1852 setdefout(dfout);
1853 cx->blk_format.dfoutgv = NULL;
1854 SvREFCNT_dec_NN(dfout);
1855
1856 PL_comppad = cx->blk_format.prevcomppad;
1857 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1858 cv = cx->blk_format.cv;
1859 cx->blk_format.cv = NULL;
1860 --CvDEPTH(cv);
1861 SvREFCNT_dec_NN(cv);
1862}
1863
1864
13febba5
DM
1865PERL_STATIC_INLINE void
1866S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1867{
1868 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1869
1870 cx->blk_eval.retop = retop;
1871 cx->blk_eval.old_namesv = namesv;
1872 cx->blk_eval.old_eval_root = PL_eval_root;
1873 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1874 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1875 cx->blk_eval.cur_top_env = PL_top_env;
1876
4c57ced5 1877 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1878 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1879 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1880}
1881
1882
1883PERL_STATIC_INLINE void
1884S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1885{
1886 SV *sv;
1887
1888 PERL_ARGS_ASSERT_CX_POPEVAL;
1889 assert(CxTYPE(cx) == CXt_EVAL);
1890
1891 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1892 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1893 PL_eval_root = cx->blk_eval.old_eval_root;
1894 sv = cx->blk_eval.cur_text;
4c57ced5 1895 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1896 cx->blk_eval.cur_text = NULL;
1897 SvREFCNT_dec_NN(sv);
1898 }
1899
1900 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1901 if (sv) {
1902 cx->blk_eval.old_namesv = NULL;
1903 SvREFCNT_dec_NN(sv);
1904 }
13febba5 1905}
6a7d52cc 1906
a73d8813 1907
d1b6bf72
DM
1908/* push a plain loop, i.e.
1909 * { block }
1910 * while (cond) { block }
1911 * for (init;cond;continue) { block }
1912 * This loop can be last/redo'ed etc.
1913 */
1914
1915PERL_STATIC_INLINE void
1916S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1917{
1918 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1919 cx->blk_loop.my_op = cLOOP;
1920}
1921
1922
1923/* push a true for loop, i.e.
1924 * for var (list) { block }
1925 */
1926
1927PERL_STATIC_INLINE void
1928S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1929{
1930 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1931
1932 /* this one line is common with cx_pushloop_plain */
1933 cx->blk_loop.my_op = cLOOP;
1934
1935 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1936 cx->blk_loop.itersave = itersave;
1937#ifdef USE_ITHREADS
1938 cx->blk_loop.oldcomppad = PL_comppad;
1939#endif
1940}
1941
1942
1943/* pop all loop types, including plain */
1944
1945PERL_STATIC_INLINE void
1946S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1947{
1948 PERL_ARGS_ASSERT_CX_POPLOOP;
1949
1950 assert(CxTYPE_is_LOOP(cx));
1951 if ( CxTYPE(cx) == CXt_LOOP_ARY
1952 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1953 {
1954 /* Free ary or cur. This assumes that state_u.ary.ary
1955 * aligns with state_u.lazysv.cur. See cx_dup() */
1956 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1957 cx->blk_loop.state_u.lazysv.cur = NULL;
1958 SvREFCNT_dec_NN(sv);
1959 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1960 sv = cx->blk_loop.state_u.lazysv.end;
1961 cx->blk_loop.state_u.lazysv.end = NULL;
1962 SvREFCNT_dec_NN(sv);
1963 }
1964 }
1965 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1966 SV *cursv;
1967 SV **svp = (cx)->blk_loop.itervar_u.svp;
1968 if ((cx->cx_type & CXp_FOR_GV))
1969 svp = &GvSV((GV*)svp);
1970 cursv = *svp;
1971 *svp = cx->blk_loop.itersave;
1972 cx->blk_loop.itersave = NULL;
1973 SvREFCNT_dec(cursv);
1974 }
1975}
1976
2a7b7c61
DM
1977
1978PERL_STATIC_INLINE void
7896dde7 1979S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1980{
7896dde7 1981 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1982
7896dde7 1983 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1984}
1985
1986
1987PERL_STATIC_INLINE void
7896dde7 1988S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1989{
7896dde7
Z
1990 PERL_ARGS_ASSERT_CX_POPWHEN;
1991 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1992
1993 PERL_UNUSED_ARG(cx);
59a14f30 1994 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1995 /* currently NOOP */
1996}
1997
1998
7896dde7
Z
1999PERL_STATIC_INLINE void
2000S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
2001{
2002 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
2003
2004 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2005 cx->blk_givwhen.defsv_save = orig_defsv;
2006}
2007
2008
2009PERL_STATIC_INLINE void
2010S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
2011{
2012 SV *sv;
2013
2014 PERL_ARGS_ASSERT_CX_POPGIVEN;
2015 assert(CxTYPE(cx) == CXt_GIVEN);
2016
2017 sv = GvSV(PL_defgv);
2018 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2019 cx->blk_givwhen.defsv_save = NULL;
2020 SvREFCNT_dec(sv);
2021}
2022
ec2c235b
KW
2023/* ------------------ util.h ------------------------------------------- */
2024
2025/*
2026=head1 Miscellaneous Functions
2027
2028=for apidoc foldEQ
2029
2030Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2031same
2032case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2033match themselves and their opposite case counterparts. Non-cased and non-ASCII
2034range bytes match only themselves.
2035
2036=cut
2037*/
2038
2039PERL_STATIC_INLINE I32
2040Perl_foldEQ(const char *s1, const char *s2, I32 len)
2041{
2042 const U8 *a = (const U8 *)s1;
2043 const U8 *b = (const U8 *)s2;
2044
2045 PERL_ARGS_ASSERT_FOLDEQ;
2046
2047 assert(len >= 0);
2048
2049 while (len--) {
2050 if (*a != *b && *a != PL_fold[*b])
2051 return 0;
2052 a++,b++;
2053 }
2054 return 1;
2055}
2056
0f9cb40c 2057PERL_STATIC_INLINE I32
ec2c235b
KW
2058Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2059{
2060 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2061 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2062 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2063 * does it check that the strings each have at least 'len' characters */
2064
2065 const U8 *a = (const U8 *)s1;
2066 const U8 *b = (const U8 *)s2;
2067
2068 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2069
2070 assert(len >= 0);
2071
2072 while (len--) {
2073 if (*a != *b && *a != PL_fold_latin1[*b]) {
2074 return 0;
2075 }
2076 a++, b++;
2077 }
2078 return 1;
2079}
2080
2081/*
2082=for apidoc foldEQ_locale
2083
2084Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2085same case-insensitively in the current locale; false otherwise.
2086
2087=cut
2088*/
2089
0f9cb40c 2090PERL_STATIC_INLINE I32
ec2c235b
KW
2091Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2092{
2093 dVAR;
2094 const U8 *a = (const U8 *)s1;
2095 const U8 *b = (const U8 *)s2;
2096
2097 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2098
2099 assert(len >= 0);
2100
2101 while (len--) {
2102 if (*a != *b && *a != PL_fold_locale[*b])
2103 return 0;
2104 a++,b++;
2105 }
2106 return 1;
2107}
2108
6dba01e2
KW
2109#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2110
2111PERL_STATIC_INLINE void *
2112S_my_memrchr(const char * s, const char c, const STRLEN len)
2113{
2114 /* memrchr(), since many platforms lack it */
2115
2116 const char * t = s + len - 1;
2117
2118 PERL_ARGS_ASSERT_MY_MEMRCHR;
2119
2120 while (t >= s) {
2121 if (*t == c) {
2122 return (void *) t;
2123 }
2124 t--;
2125 }
2126
2127 return NULL;
2128}
2129
2130#endif
2131
ed382232 2132/*
c8028aa6
TC
2133 * ex: set ts=8 sts=4 sw=4 et:
2134 */