This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regexec.c: White-space only
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
385#ifndef EBCDIC
4ab2fd9b
KW
386
387/* This looks like 0x010101... */
2c5c8af5 388# define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
4ab2fd9b
KW
389
390/* This looks like 0x808080... */
2c5c8af5
KW
391# define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
392# define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
393# define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 394
099e59a4
KW
395/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
396 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
397 * optimized out completely on a 32-bit system, and its mask gets optimized out
398 * on a 64-bit system */
2c5c8af5 399# define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
5eabe374
KW
400 | ( PTR2nat(x) >> 1) \
401 | ( ( (PTR2nat(x) \
402 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4
KW
403
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
2c5c8af5
KW
441# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
442 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
1d2af574
KW
443
444 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
445 assert(*ep >= s && *ep < send);
446
447 return FALSE;
448
2c5c8af5 449# else /* If weird byte order, drop into next loop to do byte-at-a-time
1d2af574
KW
450 checks. */
451
46bb68f6 452 break;
2c5c8af5 453# endif
46bb68f6 454 }
1d2af574 455
46bb68f6 456 x += PERL_WORDSIZE;
1d2af574 457
099e59a4 458 } while (x + PERL_WORDSIZE <= send);
b40579ff 459 }
e17544a6 460
0b08cab0 461#endif /* End of ! EBCDIC */
e17544a6
KW
462
463 /* Process per-byte */
464 while (x < send) {
465 if (! UTF8_IS_INVARIANT(*x)) {
466 if (ep) {
467 *ep = x;
468 }
0cbf5865 469
e17544a6 470 return FALSE;
0cbf5865 471 }
1e599354 472
e17544a6 473 x++;
1e599354
KW
474 }
475
476 return TRUE;
477}
478
1d2af574
KW
479PERL_STATIC_INLINE unsigned int
480S__variant_byte_number(PERL_UINTMAX_T word)
481{
482
483 /* This returns the position in a word (0..7) of the first variant byte in
484 * it. This is a helper function. Note that there are no branches */
485
486 assert(word);
487
488 /* Get just the msb bits of each byte */
489 word &= PERL_VARIANTS_WORD_MASK;
490
597ee3f4
KW
491# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
492 easiest thing is to hide that from the callers */
493 {
494 unsigned int i;
495 const U8 * s = (U8 *) &word;
496 dTHX;
497
498 for (i = 0; i < sizeof(word); i++ ) {
499 if (s[i]) {
500 return i;
501 }
502 }
503
504 Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
505 __FILE__, __LINE__);
506 }
507
508# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
1d2af574
KW
509
510 /* Bytes are stored like
511 * Byte8 ... Byte2 Byte1
512 * 63..56...15...8 7...0
513 *
514 * Isolate the lsb;
515 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
516 *
517 * The word will look this this, with a rightmost set bit in position 's':
518 * ('x's are don't cares)
519 * s
520 * x..x100..0
521 * x..xx10..0 Right shift (rightmost 0 is shifted off)
522 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
523 * the 1 just to their left into a 0; the remainder is
524 * untouched
525 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
526 * bottom to all 1
527 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
528 *
529 * Another method is to do 'word &= -word'; but it generates a compiler
530 * message on some platforms about taking the negative of an unsigned */
531
532 word >>= 1;
533 word = 1 + (word ^ (word - 1));
534
535# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
536
537 /* Bytes are stored like
538 * Byte1 Byte2 ... Byte8
539 * 63..56 55..47 ... 7...0
540 *
541 * Isolate the msb; http://codeforces.com/blog/entry/10330
542 *
543 * Only the most significant set bit matters. Or'ing word with its right
544 * shift of 1 makes that bit and the next one to its right both 1. Then
545 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
546 * msb and all to the right being 1. */
547 word |= word >> 1;
548 word |= word >> 2;
549 word |= word >> 4;
550 word |= word >> 8;
551 word |= word >> 16;
552 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
553
554 /* Then subtracting the right shift by 1 clears all but the left-most of
555 * the 1 bits, which is our desired result */
556 word -= (word >> 1);
557
558# else
559# error Unexpected byte order
560# endif
561
7cf2d6c7
KW
562 /* Here 'word' has a single bit set: the msb of the first byte in which it
563 * is set. Calculate that position in the word. We can use this
1d2af574 564 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
67e12c5c
KW
565 * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
566 * just get shifted off at compile time) */
567 word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
568 | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
569 | (39 << 24) | (47 << 16)
570 | (55 << 8) | (63 << 0));
1d2af574
KW
571 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
572
573 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
574 word = ((word + 1) >> 3) - 1;
575
576# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
577
578 /* And invert the result */
579 word = CHARBITS - word - 1;
580
581# endif
582
583 return (unsigned int) word;
584}
585
03c1e4ab
KW
586#if defined(PERL_CORE) || defined(PERL_EXT)
587
588/*
589=for apidoc variant_under_utf8_count
590
591This function looks at the sequence of bytes between C<s> and C<e>, which are
592assumed to be encoded in ASCII/Latin1, and returns how many of them would
593change should the string be translated into UTF-8. Due to the nature of UTF-8,
594each of these would occupy two bytes instead of the single one in the input
595string. Thus, this function returns the precise number of bytes the string
596would expand by when translated to UTF-8.
597
598Unlike most of the other functions that have C<utf8> in their name, the input
599to this function is NOT a UTF-8-encoded string. The function name is slightly
600I<odd> to emphasize this.
601
602This function is internal to Perl because khw thinks that any XS code that
603would want this is probably operating too close to the internals. Presenting a
604valid use case could change that.
605
606See also
607C<L<perlapi/is_utf8_invariant_string>>
608and
609C<L<perlapi/is_utf8_invariant_string_loc>>,
610
611=cut
612
613*/
614
615PERL_STATIC_INLINE Size_t
616S_variant_under_utf8_count(const U8* const s, const U8* const e)
617{
618 const U8* x = s;
619 Size_t count = 0;
620
621 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
622
623# ifndef EBCDIC
624
5d0379de
KW
625 /* Test if the string is long enough to use word-at-a-time. (Logic is the
626 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
627 if ((STRLEN) (e - x) >= PERL_WORDSIZE
628 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
629 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
630 {
631
632 /* Process per-byte until reach word boundary. XXX This loop could be
633 * eliminated if we knew that this platform had fast unaligned reads */
634 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
635 count += ! UTF8_IS_INVARIANT(*x++);
636 }
637
638 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
639 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
640 explanation of how this works */
03c1e4ab
KW
641 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
642 * PERL_COUNT_MULTIPLIER)
643 >> ((PERL_WORDSIZE - 1) * CHARBITS);
644 x += PERL_WORDSIZE;
645 } while (x + PERL_WORDSIZE <= e);
646 }
647
648# endif
649
650 /* Process per-byte */
651 while (x < e) {
652 if (! UTF8_IS_INVARIANT(*x)) {
653 count++;
654 }
655
656 x++;
657 }
658
659 return count;
660}
661
662#endif
663
aff4cafe
KW
664#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
665# undef PERL_WORDSIZE
666# undef PERL_COUNT_MULTIPLIER
667# undef PERL_WORD_BOUNDARY_MASK
668# undef PERL_VARIANTS_WORD_MASK
669#endif
03c1e4ab 670
7c93d8f0 671/*
5ff889fb
KW
672=for apidoc is_utf8_string
673
82c5d941
KW
674Returns TRUE if the first C<len> bytes of string C<s> form a valid
675Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
676be calculated using C<strlen(s)> (which means if you use this option, that C<s>
677can't have embedded C<NUL> characters and has to have a terminating C<NUL>
678byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
679
2717076a
KW
680This function considers Perl's extended UTF-8 to be valid. That means that
681code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
682considered valid by this function. Use C<L</is_strict_utf8_string>>,
683C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
684code points are considered valid.
5ff889fb 685
9f2abfde
KW
686See also
687C<L</is_utf8_invariant_string>>,
0cbf5865 688C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
689C<L</is_utf8_string_loc>>,
690C<L</is_utf8_string_loclen>>,
8bc127bf
KW
691C<L</is_utf8_fixed_width_buf_flags>>,
692C<L</is_utf8_fixed_width_buf_loc_flags>>,
693C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
694
695=cut
696*/
697
dd237e82 698#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 699
c9cd936b
KW
700#if defined(PERL_CORE) || defined (PERL_EXT)
701
702/*
703=for apidoc is_utf8_non_invariant_string
704
705Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
706C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
707UTF-8; otherwise returns FALSE.
708
709A TRUE return means that at least one code point represented by the sequence
710either is a wide character not representable as a single byte, or the
711representation differs depending on whether the sequence is encoded in UTF-8 or
712not.
713
714See also
715C<L<perlapi/is_utf8_invariant_string>>,
716C<L<perlapi/is_utf8_string>>
717
718=cut
719
720This is commonly used to determine if a SV's UTF-8 flag should be turned on.
b3b93dfe
KW
721It generally needn't be if its string is entirely UTF-8 invariant, and it
722shouldn't be if it otherwise contains invalid UTF-8.
c9cd936b
KW
723
724It is an internal function because khw thinks that XS code shouldn't be working
725at this low a level. A valid use case could change that.
726
727*/
728
729PERL_STATIC_INLINE bool
730S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
731{
732 const U8 * first_variant;
733
734 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
735
736 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
737 return FALSE;
738 }
739
740 return is_utf8_string(first_variant, len - (first_variant - s));
741}
742
743#endif
744
5ff889fb 745/*
9f2abfde
KW
746=for apidoc is_strict_utf8_string
747
748Returns TRUE if the first C<len> bytes of string C<s> form a valid
749UTF-8-encoded string that is fully interchangeable by any application using
750Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
751calculated using C<strlen(s)> (which means if you use this option, that C<s>
752can't have embedded C<NUL> characters and has to have a terminating C<NUL>
753byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
754
755This function returns FALSE for strings containing any
756code points above the Unicode max of 0x10FFFF, surrogate code points, or
757non-character code points.
758
759See also
760C<L</is_utf8_invariant_string>>,
0cbf5865 761C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
762C<L</is_utf8_string>>,
763C<L</is_utf8_string_flags>>,
764C<L</is_utf8_string_loc>>,
765C<L</is_utf8_string_loc_flags>>,
766C<L</is_utf8_string_loclen>>,
767C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
768C<L</is_utf8_fixed_width_buf_flags>>,
769C<L</is_utf8_fixed_width_buf_loc_flags>>,
770C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
771C<L</is_strict_utf8_string_loc>>,
772C<L</is_strict_utf8_string_loclen>>,
773C<L</is_c9strict_utf8_string>>,
774C<L</is_c9strict_utf8_string_loc>>,
775and
776C<L</is_c9strict_utf8_string_loclen>>.
777
778=cut
779*/
780
dd237e82 781#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
782
783/*
784=for apidoc is_c9strict_utf8_string
785
786Returns TRUE if the first C<len> bytes of string C<s> form a valid
787UTF-8-encoded string that conforms to
788L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
789otherwise it returns FALSE. If C<len> is 0, it will be calculated using
790C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
791C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
792characters being ASCII constitute 'a valid UTF-8 string'.
793
794This function returns FALSE for strings containing any code points above the
795Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
796code points per
797L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
798
799See also
800C<L</is_utf8_invariant_string>>,
0cbf5865 801C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
802C<L</is_utf8_string>>,
803C<L</is_utf8_string_flags>>,
804C<L</is_utf8_string_loc>>,
805C<L</is_utf8_string_loc_flags>>,
806C<L</is_utf8_string_loclen>>,
807C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
808C<L</is_utf8_fixed_width_buf_flags>>,
809C<L</is_utf8_fixed_width_buf_loc_flags>>,
810C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
811C<L</is_strict_utf8_string>>,
812C<L</is_strict_utf8_string_loc>>,
813C<L</is_strict_utf8_string_loclen>>,
814C<L</is_c9strict_utf8_string_loc>>,
815and
816C<L</is_c9strict_utf8_string_loclen>>.
817
818=cut
819*/
820
dd237e82 821#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
822
823/*
824=for apidoc is_utf8_string_flags
825
826Returns TRUE if the first C<len> bytes of string C<s> form a valid
827UTF-8 string, subject to the restrictions imposed by C<flags>;
828returns FALSE otherwise. If C<len> is 0, it will be calculated
829using C<strlen(s)> (which means if you use this option, that C<s> can't have
830embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
831that all characters being ASCII constitute 'a valid UTF-8 string'.
832
833If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
834C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
835as C<L</is_strict_utf8_string>>; and if C<flags> is
836C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
837C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
838combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
839C<L</utf8n_to_uvchr>>, with the same meanings.
840
841See also
842C<L</is_utf8_invariant_string>>,
0cbf5865 843C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
844C<L</is_utf8_string>>,
845C<L</is_utf8_string_loc>>,
846C<L</is_utf8_string_loc_flags>>,
847C<L</is_utf8_string_loclen>>,
848C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
849C<L</is_utf8_fixed_width_buf_flags>>,
850C<L</is_utf8_fixed_width_buf_loc_flags>>,
851C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
852C<L</is_strict_utf8_string>>,
853C<L</is_strict_utf8_string_loc>>,
854C<L</is_strict_utf8_string_loclen>>,
855C<L</is_c9strict_utf8_string>>,
856C<L</is_c9strict_utf8_string_loc>>,
857and
858C<L</is_c9strict_utf8_string_loclen>>.
859
860=cut
861*/
862
863PERL_STATIC_INLINE bool
f60f61fd 864S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 865{
33756530 866 const U8 * first_variant;
9f2abfde
KW
867
868 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
869 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 870 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 871
f60f61fd
KW
872 if (len == 0) {
873 len = strlen((const char *)s);
874 }
875
9f2abfde
KW
876 if (flags == 0) {
877 return is_utf8_string(s, len);
878 }
879
d044b7a7 880 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
881 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
882 {
883 return is_strict_utf8_string(s, len);
884 }
885
d044b7a7 886 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
887 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
888 {
889 return is_c9strict_utf8_string(s, len);
890 }
891
33756530
KW
892 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
893 const U8* const send = s + len;
894 const U8* x = first_variant;
895
a0d7f935
KW
896 while (x < send) {
897 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
898 if (UNLIKELY(! cur_len)) {
899 return FALSE;
900 }
901 x += cur_len;
9f2abfde 902 }
33756530 903 }
9f2abfde
KW
904
905 return TRUE;
906}
907
908/*
5ff889fb
KW
909
910=for apidoc is_utf8_string_loc
911
2717076a 912Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 913case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 914"utf8ness success") in the C<ep> pointer.
5ff889fb 915
2717076a 916See also C<L</is_utf8_string_loclen>>.
5ff889fb 917
3964c812
KW
918=cut
919*/
920
921#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
922
923/*
924
5ff889fb
KW
925=for apidoc is_utf8_string_loclen
926
2717076a 927Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 928case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 929"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 930encoded characters in the C<el> pointer.
5ff889fb 931
2717076a 932See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
933
934=cut
935*/
936
56e4cf64 937PERL_STATIC_INLINE bool
33756530 938Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 939{
33756530 940 const U8 * first_variant;
5ff889fb
KW
941
942 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
943
33756530
KW
944 if (len == 0) {
945 len = strlen((const char *) s);
946 }
947
948 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
949 if (el)
950 *el = len;
951
952 if (ep) {
953 *ep = s + len;
954 }
955
956 return TRUE;
957 }
958
959 {
960 const U8* const send = s + len;
961 const U8* x = first_variant;
962 STRLEN outlen = first_variant - s;
963
a0d7f935
KW
964 while (x < send) {
965 const STRLEN cur_len = isUTF8_CHAR(x, send);
966 if (UNLIKELY(! cur_len)) {
967 break;
968 }
969 x += cur_len;
970 outlen++;
5ff889fb 971 }
5ff889fb 972
a0d7f935
KW
973 if (el)
974 *el = outlen;
5ff889fb 975
a0d7f935
KW
976 if (ep) {
977 *ep = x;
978 }
5ff889fb 979
a0d7f935 980 return (x == send);
33756530 981 }
5ff889fb
KW
982}
983
984/*
9f2abfde
KW
985
986=for apidoc is_strict_utf8_string_loc
987
988Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
989case of "utf8ness failure") or the location C<s>+C<len> (in the case of
990"utf8ness success") in the C<ep> pointer.
991
992See also C<L</is_strict_utf8_string_loclen>>.
993
994=cut
995*/
996
997#define is_strict_utf8_string_loc(s, len, ep) \
998 is_strict_utf8_string_loclen(s, len, ep, 0)
999
1000/*
1001
1002=for apidoc is_strict_utf8_string_loclen
1003
1004Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
1005case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1006"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1007encoded characters in the C<el> pointer.
1008
1009See also C<L</is_strict_utf8_string_loc>>.
1010
1011=cut
1012*/
1013
1014PERL_STATIC_INLINE bool
33756530 1015S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1016{
33756530 1017 const U8 * first_variant;
9f2abfde
KW
1018
1019 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1020
33756530
KW
1021 if (len == 0) {
1022 len = strlen((const char *) s);
1023 }
1024
1025 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1026 if (el)
1027 *el = len;
1028
1029 if (ep) {
1030 *ep = s + len;
1031 }
1032
1033 return TRUE;
1034 }
1035
1036 {
1037 const U8* const send = s + len;
1038 const U8* x = first_variant;
1039 STRLEN outlen = first_variant - s;
1040
a0d7f935
KW
1041 while (x < send) {
1042 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1043 if (UNLIKELY(! cur_len)) {
1044 break;
1045 }
1046 x += cur_len;
1047 outlen++;
9f2abfde 1048 }
9f2abfde 1049
a0d7f935
KW
1050 if (el)
1051 *el = outlen;
9f2abfde 1052
a0d7f935
KW
1053 if (ep) {
1054 *ep = x;
1055 }
9f2abfde 1056
a0d7f935 1057 return (x == send);
33756530 1058 }
9f2abfde
KW
1059}
1060
1061/*
1062
1063=for apidoc is_c9strict_utf8_string_loc
1064
1065Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1066the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1067"utf8ness success") in the C<ep> pointer.
1068
1069See also C<L</is_c9strict_utf8_string_loclen>>.
1070
1071=cut
1072*/
1073
1074#define is_c9strict_utf8_string_loc(s, len, ep) \
1075 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1076
1077/*
1078
1079=for apidoc is_c9strict_utf8_string_loclen
1080
1081Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1082the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1083"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1084characters in the C<el> pointer.
1085
1086See also C<L</is_c9strict_utf8_string_loc>>.
1087
1088=cut
1089*/
1090
1091PERL_STATIC_INLINE bool
33756530 1092S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1093{
33756530 1094 const U8 * first_variant;
9f2abfde
KW
1095
1096 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1097
33756530
KW
1098 if (len == 0) {
1099 len = strlen((const char *) s);
1100 }
1101
1102 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1103 if (el)
1104 *el = len;
1105
1106 if (ep) {
1107 *ep = s + len;
1108 }
1109
1110 return TRUE;
1111 }
1112
1113 {
1114 const U8* const send = s + len;
1115 const U8* x = first_variant;
1116 STRLEN outlen = first_variant - s;
1117
a0d7f935
KW
1118 while (x < send) {
1119 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1120 if (UNLIKELY(! cur_len)) {
1121 break;
1122 }
1123 x += cur_len;
1124 outlen++;
9f2abfde 1125 }
9f2abfde 1126
a0d7f935
KW
1127 if (el)
1128 *el = outlen;
9f2abfde 1129
a0d7f935
KW
1130 if (ep) {
1131 *ep = x;
1132 }
9f2abfde 1133
a0d7f935 1134 return (x == send);
33756530 1135 }
9f2abfde
KW
1136}
1137
1138/*
1139
1140=for apidoc is_utf8_string_loc_flags
1141
1142Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1143case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1144"utf8ness success") in the C<ep> pointer.
1145
1146See also C<L</is_utf8_string_loclen_flags>>.
1147
1148=cut
1149*/
1150
1151#define is_utf8_string_loc_flags(s, len, ep, flags) \
1152 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1153
1154
1155/* The above 3 actual functions could have been moved into the more general one
1156 * just below, and made #defines that call it with the right 'flags'. They are
1157 * currently kept separate to increase their chances of getting inlined */
1158
1159/*
1160
1161=for apidoc is_utf8_string_loclen_flags
1162
1163Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1164case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1165"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1166encoded characters in the C<el> pointer.
1167
1168See also C<L</is_utf8_string_loc_flags>>.
1169
1170=cut
1171*/
1172
1173PERL_STATIC_INLINE bool
f60f61fd 1174S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1175{
33756530 1176 const U8 * first_variant;
9f2abfde
KW
1177
1178 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1179 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1180 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1181
f60f61fd 1182 if (len == 0) {
a0d7f935 1183 len = strlen((const char *) s);
f60f61fd
KW
1184 }
1185
9f2abfde
KW
1186 if (flags == 0) {
1187 return is_utf8_string_loclen(s, len, ep, el);
1188 }
1189
d044b7a7 1190 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1191 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1192 {
1193 return is_strict_utf8_string_loclen(s, len, ep, el);
1194 }
1195
d044b7a7 1196 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1197 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1198 {
1199 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1200 }
1201
33756530
KW
1202 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1203 if (el)
1204 *el = len;
1205
1206 if (ep) {
1207 *ep = s + len;
1208 }
1209
1210 return TRUE;
1211 }
1212
1213 {
1214 const U8* send = s + len;
1215 const U8* x = first_variant;
1216 STRLEN outlen = first_variant - s;
1217
a0d7f935
KW
1218 while (x < send) {
1219 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1220 if (UNLIKELY(! cur_len)) {
1221 break;
1222 }
1223 x += cur_len;
1224 outlen++;
9f2abfde 1225 }
9f2abfde 1226
a0d7f935
KW
1227 if (el)
1228 *el = outlen;
9f2abfde 1229
a0d7f935
KW
1230 if (ep) {
1231 *ep = x;
1232 }
9f2abfde 1233
a0d7f935 1234 return (x == send);
33756530 1235 }
9f2abfde
KW
1236}
1237
1238/*
7c93d8f0
KW
1239=for apidoc utf8_distance
1240
1241Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1242and C<b>.
1243
1244WARNING: use only if you *know* that the pointers point inside the
1245same UTF-8 buffer.
1246
1247=cut
1248*/
1249
1250PERL_STATIC_INLINE IV
1251Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1252{
1253 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1254
1255 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1256}
1257
1258/*
1259=for apidoc utf8_hop
1260
1261Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1262forward or backward.
1263
1264WARNING: do not use the following unless you *know* C<off> is within
1265the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1266on the first byte of character or just after the last byte of a character.
1267
1268=cut
1269*/
1270
1271PERL_STATIC_INLINE U8 *
1272Perl_utf8_hop(const U8 *s, SSize_t off)
1273{
1274 PERL_ARGS_ASSERT_UTF8_HOP;
1275
1276 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1277 * the bitops (especially ~) can create illegal UTF-8.
1278 * In other words: in Perl UTF-8 is not just for Unicode. */
1279
1280 if (off >= 0) {
1281 while (off--)
1282 s += UTF8SKIP(s);
1283 }
1284 else {
1285 while (off++) {
1286 s--;
1287 while (UTF8_IS_CONTINUATION(*s))
1288 s--;
1289 }
1290 }
7347ee54 1291 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1292 return (U8 *)s;
7347ee54 1293 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1294}
1295
4dab108f 1296/*
65df57a8
TC
1297=for apidoc utf8_hop_forward
1298
1299Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1300forward.
1301
1302C<off> must be non-negative.
1303
1304C<s> must be before or equal to C<end>.
1305
1306When moving forward it will not move beyond C<end>.
1307
1308Will not exceed this limit even if the string is not valid "UTF-8".
1309
1310=cut
1311*/
1312
1313PERL_STATIC_INLINE U8 *
1314Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1315{
1316 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1317
1318 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1319 * the bitops (especially ~) can create illegal UTF-8.
1320 * In other words: in Perl UTF-8 is not just for Unicode. */
1321
1322 assert(s <= end);
1323 assert(off >= 0);
1324
1325 while (off--) {
1326 STRLEN skip = UTF8SKIP(s);
de979548 1327 if ((STRLEN)(end - s) <= skip) {
7347ee54 1328 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1329 return (U8 *)end;
7347ee54 1330 GCC_DIAG_RESTORE_STMT;
de979548 1331 }
65df57a8
TC
1332 s += skip;
1333 }
1334
7347ee54 1335 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1336 return (U8 *)s;
7347ee54 1337 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1338}
1339
1340/*
1341=for apidoc utf8_hop_back
1342
1343Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1344backward.
1345
1346C<off> must be non-positive.
1347
1348C<s> must be after or equal to C<start>.
1349
1350When moving backward it will not move before C<start>.
1351
1352Will not exceed this limit even if the string is not valid "UTF-8".
1353
1354=cut
1355*/
1356
1357PERL_STATIC_INLINE U8 *
1358Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1359{
1360 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1361
1362 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1363 * the bitops (especially ~) can create illegal UTF-8.
1364 * In other words: in Perl UTF-8 is not just for Unicode. */
1365
1366 assert(start <= s);
1367 assert(off <= 0);
1368
1369 while (off++ && s > start) {
1370 s--;
1371 while (UTF8_IS_CONTINUATION(*s) && s > start)
1372 s--;
1373 }
1374
7347ee54 1375 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1376 return (U8 *)s;
7347ee54 1377 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1378}
1379
1380/*
1381=for apidoc utf8_hop_safe
1382
1383Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1384either forward or backward.
1385
1386When moving backward it will not move before C<start>.
1387
1388When moving forward it will not move beyond C<end>.
1389
1390Will not exceed those limits even if the string is not valid "UTF-8".
1391
1392=cut
1393*/
1394
1395PERL_STATIC_INLINE U8 *
1396Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1397{
1398 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1399
1400 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1401 * the bitops (especially ~) can create illegal UTF-8.
1402 * In other words: in Perl UTF-8 is not just for Unicode. */
1403
1404 assert(start <= s && s <= end);
1405
1406 if (off >= 0) {
1407 return utf8_hop_forward(s, off, end);
1408 }
1409 else {
1410 return utf8_hop_back(s, off, start);
1411 }
1412}
1413
1414/*
4dab108f
KW
1415
1416=for apidoc is_utf8_valid_partial_char
1417
6cbb9248
KW
1418Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1419S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1420points. Otherwise, it returns 1 if there exists at least one non-empty
1421sequence of bytes that when appended to sequence C<s>, starting at position
1422C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1423otherwise returns 0.
1424
1425In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1426point.
1427
1428This is useful when a fixed-length buffer is being tested for being well-formed
1429UTF-8, but the final few bytes in it don't comprise a full character; that is,
1430it is split somewhere in the middle of the final code point's UTF-8
1431representation. (Presumably when the buffer is refreshed with the next chunk
1432of data, the new first bytes will complete the partial code point.) This
1433function is used to verify that the final bytes in the current buffer are in
1434fact the legal beginning of some code point, so that if they aren't, the
1435failure can be signalled without having to wait for the next read.
4dab108f
KW
1436
1437=cut
1438*/
2717076a
KW
1439#define is_utf8_valid_partial_char(s, e) \
1440 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1441
1442/*
1443
1444=for apidoc is_utf8_valid_partial_char_flags
1445
1446Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1447or not the input is a valid UTF-8 encoded partial character, but it takes an
1448extra parameter, C<flags>, which can further restrict which code points are
1449considered valid.
1450
1451If C<flags> is 0, this behaves identically to
1452C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1453of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1454there is any sequence of bytes that can complete the input partial character in
1455such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1456TRUE; otherwise FALSE. Non character code points cannot be determined based on
1457partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1458determined from just the first one or two bytes.
1459
1460=cut
1461 */
1462
56e4cf64 1463PERL_STATIC_INLINE bool
f1c999a7 1464S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1465{
f1c999a7 1466 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1467
f1c999a7 1468 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1469 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1470
8875bd48 1471 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1472 return FALSE;
1473 }
1474
f1c999a7 1475 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1476}
1477
8bc127bf
KW
1478/*
1479
1480=for apidoc is_utf8_fixed_width_buf_flags
1481
1482Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1483is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1484otherwise it returns FALSE.
1485
1486If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1487without restriction. If the final few bytes of the buffer do not form a
1488complete code point, this will return TRUE anyway, provided that
1489C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1490
1491If C<flags> in non-zero, it can be any combination of the
1492C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1493same meanings.
1494
1495This function differs from C<L</is_utf8_string_flags>> only in that the latter
1496returns FALSE if the final few bytes of the string don't form a complete code
1497point.
1498
1499=cut
1500 */
1501#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1502 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1503
1504/*
1505
1506=for apidoc is_utf8_fixed_width_buf_loc_flags
1507
1508Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1509failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1510to the beginning of any partial character at the end of the buffer; if there is
1511no partial character C<*ep> will contain C<s>+C<len>.
1512
1513See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1514
1515=cut
1516*/
1517
1518#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1519 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1520
1521/*
1522
1523=for apidoc is_utf8_fixed_width_buf_loclen_flags
1524
1525Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1526complete, valid characters found in the C<el> pointer.
1527
1528=cut
1529*/
1530
1531PERL_STATIC_INLINE bool
1532S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1533 STRLEN len,
8bc127bf
KW
1534 const U8 **ep,
1535 STRLEN *el,
1536 const U32 flags)
1537{
1538 const U8 * maybe_partial;
1539
1540 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1541
1542 if (! ep) {
1543 ep = &maybe_partial;
1544 }
1545
1546 /* If it's entirely valid, return that; otherwise see if the only error is
1547 * that the final few bytes are for a partial character */
1548 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1549 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1550}
1551
c8028aa6
TC
1552/* ------------------------------- perl.h ----------------------------- */
1553
1554/*
dcccc8ff
KW
1555=head1 Miscellaneous Functions
1556
41188aa0 1557=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1558
6602b933 1559Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1560If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1561
1562Return TRUE if the name is safe.
1563
796b6530 1564Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1565
1566=cut
1567*/
1568
1569PERL_STATIC_INLINE bool
41188aa0 1570S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1571 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1572 * perl itself uses xce*() functions which accept 8-bit strings.
1573 */
1574
1575 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1576
6c4650b3 1577 if (len > 1) {
c8028aa6 1578 char *null_at;
41188aa0 1579 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1580 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1581 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1582 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1583 what, op_name, pv, null_at+1);
c8028aa6
TC
1584 return FALSE;
1585 }
1586 }
1587
1588 return TRUE;
1589}
1590
1591/*
7cb3f959
TC
1592
1593Return true if the supplied filename has a newline character
fa6c7d00 1594immediately before the first (hopefully only) NUL.
7cb3f959
TC
1595
1596My original look at this incorrectly used the len from SvPV(), but
1597that's incorrect, since we allow for a NUL in pv[len-1].
1598
1599So instead, strlen() and work from there.
1600
1601This allow for the user reading a filename, forgetting to chomp it,
1602then calling:
1603
1604 open my $foo, "$file\0";
1605
1606*/
1607
1608#ifdef PERL_CORE
1609
1610PERL_STATIC_INLINE bool
1611S_should_warn_nl(const char *pv) {
1612 STRLEN len;
1613
1614 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1615
1616 len = strlen(pv);
1617
1618 return len > 0 && pv[len-1] == '\n';
1619}
1620
1621#endif
1622
81d52ecd
JH
1623/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1624
1625#define MAX_CHARSET_NAME_LENGTH 2
1626
1627PERL_STATIC_INLINE const char *
1628get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1629{
1630 /* Returns a string that corresponds to the name of the regex character set
1631 * given by 'flags', and *lenp is set the length of that string, which
1632 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1633
1634 *lenp = 1;
1635 switch (get_regex_charset(flags)) {
1636 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1637 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1638 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1639 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1640 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1641 *lenp = 2;
1642 return ASCII_MORE_RESTRICT_PAT_MODS;
1643 }
1644 /* The NOT_REACHED; hides an assert() which has a rather complex
1645 * definition in perl.h. */
1646 NOT_REACHED; /* NOTREACHED */
1647 return "?"; /* Unknown */
1648}
1649
7cb3f959 1650/*
ed382232
TC
1651
1652Return false if any get magic is on the SV other than taint magic.
1653
1654*/
1655
1656PERL_STATIC_INLINE bool
1657S_sv_only_taint_gmagic(SV *sv) {
1658 MAGIC *mg = SvMAGIC(sv);
1659
1660 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1661
1662 while (mg) {
1663 if (mg->mg_type != PERL_MAGIC_taint
1664 && !(mg->mg_flags & MGf_GSKIP)
1665 && mg->mg_virtual->svt_get) {
1666 return FALSE;
1667 }
1668 mg = mg->mg_moremagic;
1669 }
1670
1671 return TRUE;
1672}
1673
ed8ff0f3
DM
1674/* ------------------ cop.h ------------------------------------------- */
1675
1676
1677/* Enter a block. Push a new base context and return its address. */
1678
1679PERL_STATIC_INLINE PERL_CONTEXT *
1680S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1681{
1682 PERL_CONTEXT * cx;
1683
1684 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1685
1686 CXINC;
1687 cx = CX_CUR();
1688 cx->cx_type = type;
1689 cx->blk_gimme = gimme;
1690 cx->blk_oldsaveix = saveix;
4caf7d8c 1691 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1692 cx->blk_oldcop = PL_curcop;
4caf7d8c 1693 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1694 cx->blk_oldscopesp = PL_scopestack_ix;
1695 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1696 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1697
1698 PL_tmps_floor = PL_tmps_ix;
1699 CX_DEBUG(cx, "PUSH");
1700 return cx;
1701}
1702
1703
1704/* Exit a block (RETURN and LAST). */
1705
1706PERL_STATIC_INLINE void
1707S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1708{
1709 PERL_ARGS_ASSERT_CX_POPBLOCK;
1710
1711 CX_DEBUG(cx, "POP");
1712 /* these 3 are common to cx_popblock and cx_topblock */
1713 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1714 PL_scopestack_ix = cx->blk_oldscopesp;
1715 PL_curpm = cx->blk_oldpm;
1716
1717 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1718 * and leaves a CX entry lying around for repeated use, so
1719 * skip for multicall */ \
1720 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1721 || PL_savestack_ix == cx->blk_oldsaveix);
1722 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1723 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1724}
1725
1726/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1727 * Whereas cx_popblock() restores the state to the point just before
1728 * cx_pushblock() was called, cx_topblock() restores it to the point just
1729 * *after* cx_pushblock() was called. */
1730
1731PERL_STATIC_INLINE void
1732S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1733{
1734 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1735
1736 CX_DEBUG(cx, "TOP");
1737 /* these 3 are common to cx_popblock and cx_topblock */
1738 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1739 PL_scopestack_ix = cx->blk_oldscopesp;
1740 PL_curpm = cx->blk_oldpm;
1741
1742 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1743}
1744
1745
a73d8813
DM
1746PERL_STATIC_INLINE void
1747S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1748{
1749 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1750
1751 PERL_ARGS_ASSERT_CX_PUSHSUB;
1752
3f6bd23a 1753 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1754 cx->blk_sub.cv = cv;
1755 cx->blk_sub.olddepth = CvDEPTH(cv);
1756 cx->blk_sub.prevcomppad = PL_comppad;
1757 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1758 cx->blk_sub.retop = retop;
1759 SvREFCNT_inc_simple_void_NN(cv);
1760 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1761}
1762
1763
1764/* subsets of cx_popsub() */
1765
1766PERL_STATIC_INLINE void
1767S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1768{
1769 CV *cv;
1770
1771 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1772 assert(CxTYPE(cx) == CXt_SUB);
1773
1774 PL_comppad = cx->blk_sub.prevcomppad;
1775 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1776 cv = cx->blk_sub.cv;
1777 CvDEPTH(cv) = cx->blk_sub.olddepth;
1778 cx->blk_sub.cv = NULL;
1779 SvREFCNT_dec(cv);
1780}
1781
1782
1783/* handle the @_ part of leaving a sub */
1784
1785PERL_STATIC_INLINE void
1786S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1787{
1788 AV *av;
1789
1790 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1791 assert(CxTYPE(cx) == CXt_SUB);
1792 assert(AvARRAY(MUTABLE_AV(
1793 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1794 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1795
1796 CX_POP_SAVEARRAY(cx);
1797 av = MUTABLE_AV(PAD_SVl(0));
1798 if (UNLIKELY(AvREAL(av)))
1799 /* abandon @_ if it got reified */
1800 clear_defarray(av, 0);
1801 else {
1802 CLEAR_ARGARRAY(av);
1803 }
1804}
1805
1806
1807PERL_STATIC_INLINE void
1808S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1809{
1810 PERL_ARGS_ASSERT_CX_POPSUB;
1811 assert(CxTYPE(cx) == CXt_SUB);
1812
3f6bd23a 1813 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1814
1815 if (CxHASARGS(cx))
1816 cx_popsub_args(cx);
1817 cx_popsub_common(cx);
1818}
1819
1820
6a7d52cc
DM
1821PERL_STATIC_INLINE void
1822S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1823{
1824 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1825
1826 cx->blk_format.cv = cv;
1827 cx->blk_format.retop = retop;
1828 cx->blk_format.gv = gv;
1829 cx->blk_format.dfoutgv = PL_defoutgv;
1830 cx->blk_format.prevcomppad = PL_comppad;
1831 cx->blk_u16 = 0;
1832
1833 SvREFCNT_inc_simple_void_NN(cv);
1834 CvDEPTH(cv)++;
1835 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1836}
1837
1838
1839PERL_STATIC_INLINE void
1840S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1841{
1842 CV *cv;
1843 GV *dfout;
1844
1845 PERL_ARGS_ASSERT_CX_POPFORMAT;
1846 assert(CxTYPE(cx) == CXt_FORMAT);
1847
1848 dfout = cx->blk_format.dfoutgv;
1849 setdefout(dfout);
1850 cx->blk_format.dfoutgv = NULL;
1851 SvREFCNT_dec_NN(dfout);
1852
1853 PL_comppad = cx->blk_format.prevcomppad;
1854 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1855 cv = cx->blk_format.cv;
1856 cx->blk_format.cv = NULL;
1857 --CvDEPTH(cv);
1858 SvREFCNT_dec_NN(cv);
1859}
1860
1861
13febba5
DM
1862PERL_STATIC_INLINE void
1863S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1864{
1865 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1866
1867 cx->blk_eval.retop = retop;
1868 cx->blk_eval.old_namesv = namesv;
1869 cx->blk_eval.old_eval_root = PL_eval_root;
1870 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1871 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1872 cx->blk_eval.cur_top_env = PL_top_env;
1873
4c57ced5 1874 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1875 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1876 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1877}
1878
1879
1880PERL_STATIC_INLINE void
1881S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1882{
1883 SV *sv;
1884
1885 PERL_ARGS_ASSERT_CX_POPEVAL;
1886 assert(CxTYPE(cx) == CXt_EVAL);
1887
1888 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1889 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1890 PL_eval_root = cx->blk_eval.old_eval_root;
1891 sv = cx->blk_eval.cur_text;
4c57ced5 1892 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1893 cx->blk_eval.cur_text = NULL;
1894 SvREFCNT_dec_NN(sv);
1895 }
1896
1897 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1898 if (sv) {
1899 cx->blk_eval.old_namesv = NULL;
1900 SvREFCNT_dec_NN(sv);
1901 }
13febba5 1902}
6a7d52cc 1903
a73d8813 1904
d1b6bf72
DM
1905/* push a plain loop, i.e.
1906 * { block }
1907 * while (cond) { block }
1908 * for (init;cond;continue) { block }
1909 * This loop can be last/redo'ed etc.
1910 */
1911
1912PERL_STATIC_INLINE void
1913S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1914{
1915 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1916 cx->blk_loop.my_op = cLOOP;
1917}
1918
1919
1920/* push a true for loop, i.e.
1921 * for var (list) { block }
1922 */
1923
1924PERL_STATIC_INLINE void
1925S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1926{
1927 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1928
1929 /* this one line is common with cx_pushloop_plain */
1930 cx->blk_loop.my_op = cLOOP;
1931
1932 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1933 cx->blk_loop.itersave = itersave;
1934#ifdef USE_ITHREADS
1935 cx->blk_loop.oldcomppad = PL_comppad;
1936#endif
1937}
1938
1939
1940/* pop all loop types, including plain */
1941
1942PERL_STATIC_INLINE void
1943S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1944{
1945 PERL_ARGS_ASSERT_CX_POPLOOP;
1946
1947 assert(CxTYPE_is_LOOP(cx));
1948 if ( CxTYPE(cx) == CXt_LOOP_ARY
1949 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1950 {
1951 /* Free ary or cur. This assumes that state_u.ary.ary
1952 * aligns with state_u.lazysv.cur. See cx_dup() */
1953 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1954 cx->blk_loop.state_u.lazysv.cur = NULL;
1955 SvREFCNT_dec_NN(sv);
1956 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1957 sv = cx->blk_loop.state_u.lazysv.end;
1958 cx->blk_loop.state_u.lazysv.end = NULL;
1959 SvREFCNT_dec_NN(sv);
1960 }
1961 }
1962 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1963 SV *cursv;
1964 SV **svp = (cx)->blk_loop.itervar_u.svp;
1965 if ((cx->cx_type & CXp_FOR_GV))
1966 svp = &GvSV((GV*)svp);
1967 cursv = *svp;
1968 *svp = cx->blk_loop.itersave;
1969 cx->blk_loop.itersave = NULL;
1970 SvREFCNT_dec(cursv);
1971 }
1972}
1973
2a7b7c61
DM
1974
1975PERL_STATIC_INLINE void
7896dde7 1976S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1977{
7896dde7 1978 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1979
7896dde7 1980 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1981}
1982
1983
1984PERL_STATIC_INLINE void
7896dde7 1985S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1986{
7896dde7
Z
1987 PERL_ARGS_ASSERT_CX_POPWHEN;
1988 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1989
1990 PERL_UNUSED_ARG(cx);
59a14f30 1991 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1992 /* currently NOOP */
1993}
1994
1995
7896dde7
Z
1996PERL_STATIC_INLINE void
1997S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1998{
1999 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
2000
2001 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2002 cx->blk_givwhen.defsv_save = orig_defsv;
2003}
2004
2005
2006PERL_STATIC_INLINE void
2007S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
2008{
2009 SV *sv;
2010
2011 PERL_ARGS_ASSERT_CX_POPGIVEN;
2012 assert(CxTYPE(cx) == CXt_GIVEN);
2013
2014 sv = GvSV(PL_defgv);
2015 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2016 cx->blk_givwhen.defsv_save = NULL;
2017 SvREFCNT_dec(sv);
2018}
2019
ec2c235b
KW
2020/* ------------------ util.h ------------------------------------------- */
2021
2022/*
2023=head1 Miscellaneous Functions
2024
2025=for apidoc foldEQ
2026
2027Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2028same
2029case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2030match themselves and their opposite case counterparts. Non-cased and non-ASCII
2031range bytes match only themselves.
2032
2033=cut
2034*/
2035
2036PERL_STATIC_INLINE I32
2037Perl_foldEQ(const char *s1, const char *s2, I32 len)
2038{
2039 const U8 *a = (const U8 *)s1;
2040 const U8 *b = (const U8 *)s2;
2041
2042 PERL_ARGS_ASSERT_FOLDEQ;
2043
2044 assert(len >= 0);
2045
2046 while (len--) {
2047 if (*a != *b && *a != PL_fold[*b])
2048 return 0;
2049 a++,b++;
2050 }
2051 return 1;
2052}
2053
0f9cb40c 2054PERL_STATIC_INLINE I32
ec2c235b
KW
2055Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2056{
2057 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2058 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2059 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2060 * does it check that the strings each have at least 'len' characters */
2061
2062 const U8 *a = (const U8 *)s1;
2063 const U8 *b = (const U8 *)s2;
2064
2065 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2066
2067 assert(len >= 0);
2068
2069 while (len--) {
2070 if (*a != *b && *a != PL_fold_latin1[*b]) {
2071 return 0;
2072 }
2073 a++, b++;
2074 }
2075 return 1;
2076}
2077
2078/*
2079=for apidoc foldEQ_locale
2080
2081Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2082same case-insensitively in the current locale; false otherwise.
2083
2084=cut
2085*/
2086
0f9cb40c 2087PERL_STATIC_INLINE I32
ec2c235b
KW
2088Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2089{
2090 dVAR;
2091 const U8 *a = (const U8 *)s1;
2092 const U8 *b = (const U8 *)s2;
2093
2094 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2095
2096 assert(len >= 0);
2097
2098 while (len--) {
2099 if (*a != *b && *a != PL_fold_locale[*b])
2100 return 0;
2101 a++,b++;
2102 }
2103 return 1;
2104}
2105
6dba01e2
KW
2106#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2107
2108PERL_STATIC_INLINE void *
2109S_my_memrchr(const char * s, const char c, const STRLEN len)
2110{
2111 /* memrchr(), since many platforms lack it */
2112
2113 const char * t = s + len - 1;
2114
2115 PERL_ARGS_ASSERT_MY_MEMRCHR;
2116
2117 while (t >= s) {
2118 if (*t == c) {
2119 return (void *) t;
2120 }
2121 t--;
2122 }
2123
2124 return NULL;
2125}
2126
2127#endif
2128
ed382232 2129/*
c8028aa6
TC
2130 * ex: set ts=8 sts=4 sw=4 et:
2131 */