This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix bug in new [[:ascii:]] nodes
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
385#ifndef EBCDIC
4ab2fd9b
KW
386
387/* This looks like 0x010101... */
388#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
389
390/* This looks like 0x808080... */
391#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
392#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
393#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 394
099e59a4
KW
395/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
396 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
397 * optimized out completely on a 32-bit system, and its mask gets optimized out
398 * on a 64-bit system */
5eabe374
KW
399#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
400 | ( PTR2nat(x) >> 1) \
401 | ( ( (PTR2nat(x) \
402 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4
KW
403
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
1d2af574
KW
441#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
442 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
443
444 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
445 assert(*ep >= s && *ep < send);
446
447 return FALSE;
448
449#else /* If weird byte order, drop into next loop to do byte-at-a-time
450 checks. */
451
46bb68f6 452 break;
1d2af574 453#endif
46bb68f6 454 }
1d2af574 455
46bb68f6 456 x += PERL_WORDSIZE;
1d2af574 457
099e59a4 458 } while (x + PERL_WORDSIZE <= send);
b40579ff 459 }
e17544a6 460
e17544a6
KW
461#endif
462
463 /* Process per-byte */
464 while (x < send) {
465 if (! UTF8_IS_INVARIANT(*x)) {
466 if (ep) {
467 *ep = x;
468 }
0cbf5865 469
e17544a6 470 return FALSE;
0cbf5865 471 }
1e599354 472
e17544a6 473 x++;
1e599354
KW
474 }
475
476 return TRUE;
477}
478
1d2af574
KW
479#ifndef EBCDIC
480
481PERL_STATIC_INLINE unsigned int
482S__variant_byte_number(PERL_UINTMAX_T word)
483{
484
485 /* This returns the position in a word (0..7) of the first variant byte in
486 * it. This is a helper function. Note that there are no branches */
487
488 assert(word);
489
490 /* Get just the msb bits of each byte */
491 word &= PERL_VARIANTS_WORD_MASK;
492
493# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
494
495 /* Bytes are stored like
496 * Byte8 ... Byte2 Byte1
497 * 63..56...15...8 7...0
498 *
499 * Isolate the lsb;
500 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
501 *
502 * The word will look this this, with a rightmost set bit in position 's':
503 * ('x's are don't cares)
504 * s
505 * x..x100..0
506 * x..xx10..0 Right shift (rightmost 0 is shifted off)
507 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
508 * the 1 just to their left into a 0; the remainder is
509 * untouched
510 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
511 * bottom to all 1
512 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
513 *
514 * Another method is to do 'word &= -word'; but it generates a compiler
515 * message on some platforms about taking the negative of an unsigned */
516
517 word >>= 1;
518 word = 1 + (word ^ (word - 1));
519
520# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
521
522 /* Bytes are stored like
523 * Byte1 Byte2 ... Byte8
524 * 63..56 55..47 ... 7...0
525 *
526 * Isolate the msb; http://codeforces.com/blog/entry/10330
527 *
528 * Only the most significant set bit matters. Or'ing word with its right
529 * shift of 1 makes that bit and the next one to its right both 1. Then
530 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
531 * msb and all to the right being 1. */
532 word |= word >> 1;
533 word |= word >> 2;
534 word |= word >> 4;
535 word |= word >> 8;
536 word |= word >> 16;
537 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
538
539 /* Then subtracting the right shift by 1 clears all but the left-most of
540 * the 1 bits, which is our desired result */
541 word -= (word >> 1);
542
543# else
544# error Unexpected byte order
545# endif
546
547 /* Here 'word' has a single bit set, the msb is of the first byte which
548 * has it set. Calculate that position in the word. We can use this
549 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
550 * assumes an 8-bit byte */
551 word = (word >> 7) * (( 7ULL << 56) | (15ULL << 48) | (23ULL << 40)
552 | (31ULL << 32) | (39ULL << 24) | (47ULL << 16)
553 | (55ULL << 8) | (63ULL << 0));
554 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
555
556 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
557 word = ((word + 1) >> 3) - 1;
558
559# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
560
561 /* And invert the result */
562 word = CHARBITS - word - 1;
563
564# endif
565
566 return (unsigned int) word;
567}
568
569#endif /* ! EBCDIC */
03c1e4ab
KW
570#if defined(PERL_CORE) || defined(PERL_EXT)
571
572/*
573=for apidoc variant_under_utf8_count
574
575This function looks at the sequence of bytes between C<s> and C<e>, which are
576assumed to be encoded in ASCII/Latin1, and returns how many of them would
577change should the string be translated into UTF-8. Due to the nature of UTF-8,
578each of these would occupy two bytes instead of the single one in the input
579string. Thus, this function returns the precise number of bytes the string
580would expand by when translated to UTF-8.
581
582Unlike most of the other functions that have C<utf8> in their name, the input
583to this function is NOT a UTF-8-encoded string. The function name is slightly
584I<odd> to emphasize this.
585
586This function is internal to Perl because khw thinks that any XS code that
587would want this is probably operating too close to the internals. Presenting a
588valid use case could change that.
589
590See also
591C<L<perlapi/is_utf8_invariant_string>>
592and
593C<L<perlapi/is_utf8_invariant_string_loc>>,
594
595=cut
596
597*/
598
599PERL_STATIC_INLINE Size_t
600S_variant_under_utf8_count(const U8* const s, const U8* const e)
601{
602 const U8* x = s;
603 Size_t count = 0;
604
605 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
606
607# ifndef EBCDIC
608
5d0379de
KW
609 /* Test if the string is long enough to use word-at-a-time. (Logic is the
610 * same as for is_utf8_invariant_string()) */
03c1e4ab
KW
611 if ((STRLEN) (e - x) >= PERL_WORDSIZE
612 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
613 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
614 {
615
616 /* Process per-byte until reach word boundary. XXX This loop could be
617 * eliminated if we knew that this platform had fast unaligned reads */
618 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
619 count += ! UTF8_IS_INVARIANT(*x++);
620 }
621
622 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
623 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
624 explanation of how this works */
03c1e4ab
KW
625 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
626 * PERL_COUNT_MULTIPLIER)
627 >> ((PERL_WORDSIZE - 1) * CHARBITS);
628 x += PERL_WORDSIZE;
629 } while (x + PERL_WORDSIZE <= e);
630 }
631
632# endif
633
634 /* Process per-byte */
635 while (x < e) {
636 if (! UTF8_IS_INVARIANT(*x)) {
637 count++;
638 }
639
640 x++;
641 }
642
643 return count;
644}
645
646#endif
647
aff4cafe
KW
648#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
649# undef PERL_WORDSIZE
650# undef PERL_COUNT_MULTIPLIER
651# undef PERL_WORD_BOUNDARY_MASK
652# undef PERL_VARIANTS_WORD_MASK
653#endif
03c1e4ab 654
7c93d8f0 655/*
5ff889fb
KW
656=for apidoc is_utf8_string
657
82c5d941
KW
658Returns TRUE if the first C<len> bytes of string C<s> form a valid
659Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
660be calculated using C<strlen(s)> (which means if you use this option, that C<s>
661can't have embedded C<NUL> characters and has to have a terminating C<NUL>
662byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
663
2717076a
KW
664This function considers Perl's extended UTF-8 to be valid. That means that
665code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
666considered valid by this function. Use C<L</is_strict_utf8_string>>,
667C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
668code points are considered valid.
5ff889fb 669
9f2abfde
KW
670See also
671C<L</is_utf8_invariant_string>>,
0cbf5865 672C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
673C<L</is_utf8_string_loc>>,
674C<L</is_utf8_string_loclen>>,
8bc127bf
KW
675C<L</is_utf8_fixed_width_buf_flags>>,
676C<L</is_utf8_fixed_width_buf_loc_flags>>,
677C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
678
679=cut
680*/
681
dd237e82 682#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 683
c9cd936b
KW
684#if defined(PERL_CORE) || defined (PERL_EXT)
685
686/*
687=for apidoc is_utf8_non_invariant_string
688
689Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
690C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
691UTF-8; otherwise returns FALSE.
692
693A TRUE return means that at least one code point represented by the sequence
694either is a wide character not representable as a single byte, or the
695representation differs depending on whether the sequence is encoded in UTF-8 or
696not.
697
698See also
699C<L<perlapi/is_utf8_invariant_string>>,
700C<L<perlapi/is_utf8_string>>
701
702=cut
703
704This is commonly used to determine if a SV's UTF-8 flag should be turned on.
b3b93dfe
KW
705It generally needn't be if its string is entirely UTF-8 invariant, and it
706shouldn't be if it otherwise contains invalid UTF-8.
c9cd936b
KW
707
708It is an internal function because khw thinks that XS code shouldn't be working
709at this low a level. A valid use case could change that.
710
711*/
712
713PERL_STATIC_INLINE bool
714S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
715{
716 const U8 * first_variant;
717
718 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
719
720 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
721 return FALSE;
722 }
723
724 return is_utf8_string(first_variant, len - (first_variant - s));
725}
726
727#endif
728
5ff889fb 729/*
9f2abfde
KW
730=for apidoc is_strict_utf8_string
731
732Returns TRUE if the first C<len> bytes of string C<s> form a valid
733UTF-8-encoded string that is fully interchangeable by any application using
734Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
735calculated using C<strlen(s)> (which means if you use this option, that C<s>
736can't have embedded C<NUL> characters and has to have a terminating C<NUL>
737byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
738
739This function returns FALSE for strings containing any
740code points above the Unicode max of 0x10FFFF, surrogate code points, or
741non-character code points.
742
743See also
744C<L</is_utf8_invariant_string>>,
0cbf5865 745C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
746C<L</is_utf8_string>>,
747C<L</is_utf8_string_flags>>,
748C<L</is_utf8_string_loc>>,
749C<L</is_utf8_string_loc_flags>>,
750C<L</is_utf8_string_loclen>>,
751C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
752C<L</is_utf8_fixed_width_buf_flags>>,
753C<L</is_utf8_fixed_width_buf_loc_flags>>,
754C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
755C<L</is_strict_utf8_string_loc>>,
756C<L</is_strict_utf8_string_loclen>>,
757C<L</is_c9strict_utf8_string>>,
758C<L</is_c9strict_utf8_string_loc>>,
759and
760C<L</is_c9strict_utf8_string_loclen>>.
761
762=cut
763*/
764
dd237e82 765#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
766
767/*
768=for apidoc is_c9strict_utf8_string
769
770Returns TRUE if the first C<len> bytes of string C<s> form a valid
771UTF-8-encoded string that conforms to
772L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
773otherwise it returns FALSE. If C<len> is 0, it will be calculated using
774C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
775C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
776characters being ASCII constitute 'a valid UTF-8 string'.
777
778This function returns FALSE for strings containing any code points above the
779Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
780code points per
781L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
782
783See also
784C<L</is_utf8_invariant_string>>,
0cbf5865 785C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
786C<L</is_utf8_string>>,
787C<L</is_utf8_string_flags>>,
788C<L</is_utf8_string_loc>>,
789C<L</is_utf8_string_loc_flags>>,
790C<L</is_utf8_string_loclen>>,
791C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
792C<L</is_utf8_fixed_width_buf_flags>>,
793C<L</is_utf8_fixed_width_buf_loc_flags>>,
794C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
795C<L</is_strict_utf8_string>>,
796C<L</is_strict_utf8_string_loc>>,
797C<L</is_strict_utf8_string_loclen>>,
798C<L</is_c9strict_utf8_string_loc>>,
799and
800C<L</is_c9strict_utf8_string_loclen>>.
801
802=cut
803*/
804
dd237e82 805#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
806
807/*
808=for apidoc is_utf8_string_flags
809
810Returns TRUE if the first C<len> bytes of string C<s> form a valid
811UTF-8 string, subject to the restrictions imposed by C<flags>;
812returns FALSE otherwise. If C<len> is 0, it will be calculated
813using C<strlen(s)> (which means if you use this option, that C<s> can't have
814embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
815that all characters being ASCII constitute 'a valid UTF-8 string'.
816
817If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
818C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
819as C<L</is_strict_utf8_string>>; and if C<flags> is
820C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
821C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
822combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
823C<L</utf8n_to_uvchr>>, with the same meanings.
824
825See also
826C<L</is_utf8_invariant_string>>,
0cbf5865 827C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
828C<L</is_utf8_string>>,
829C<L</is_utf8_string_loc>>,
830C<L</is_utf8_string_loc_flags>>,
831C<L</is_utf8_string_loclen>>,
832C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
833C<L</is_utf8_fixed_width_buf_flags>>,
834C<L</is_utf8_fixed_width_buf_loc_flags>>,
835C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
836C<L</is_strict_utf8_string>>,
837C<L</is_strict_utf8_string_loc>>,
838C<L</is_strict_utf8_string_loclen>>,
839C<L</is_c9strict_utf8_string>>,
840C<L</is_c9strict_utf8_string_loc>>,
841and
842C<L</is_c9strict_utf8_string_loclen>>.
843
844=cut
845*/
846
847PERL_STATIC_INLINE bool
f60f61fd 848S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 849{
33756530 850 const U8 * first_variant;
9f2abfde
KW
851
852 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
853 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 854 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 855
f60f61fd
KW
856 if (len == 0) {
857 len = strlen((const char *)s);
858 }
859
9f2abfde
KW
860 if (flags == 0) {
861 return is_utf8_string(s, len);
862 }
863
d044b7a7 864 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
865 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
866 {
867 return is_strict_utf8_string(s, len);
868 }
869
d044b7a7 870 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
871 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
872 {
873 return is_c9strict_utf8_string(s, len);
874 }
875
33756530
KW
876 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
877 const U8* const send = s + len;
878 const U8* x = first_variant;
879
a0d7f935
KW
880 while (x < send) {
881 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
882 if (UNLIKELY(! cur_len)) {
883 return FALSE;
884 }
885 x += cur_len;
9f2abfde 886 }
33756530 887 }
9f2abfde
KW
888
889 return TRUE;
890}
891
892/*
5ff889fb
KW
893
894=for apidoc is_utf8_string_loc
895
2717076a 896Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 897case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 898"utf8ness success") in the C<ep> pointer.
5ff889fb 899
2717076a 900See also C<L</is_utf8_string_loclen>>.
5ff889fb 901
3964c812
KW
902=cut
903*/
904
905#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
906
907/*
908
5ff889fb
KW
909=for apidoc is_utf8_string_loclen
910
2717076a 911Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 912case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 913"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 914encoded characters in the C<el> pointer.
5ff889fb 915
2717076a 916See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
917
918=cut
919*/
920
56e4cf64 921PERL_STATIC_INLINE bool
33756530 922Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 923{
33756530 924 const U8 * first_variant;
5ff889fb
KW
925
926 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
927
33756530
KW
928 if (len == 0) {
929 len = strlen((const char *) s);
930 }
931
932 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
933 if (el)
934 *el = len;
935
936 if (ep) {
937 *ep = s + len;
938 }
939
940 return TRUE;
941 }
942
943 {
944 const U8* const send = s + len;
945 const U8* x = first_variant;
946 STRLEN outlen = first_variant - s;
947
a0d7f935
KW
948 while (x < send) {
949 const STRLEN cur_len = isUTF8_CHAR(x, send);
950 if (UNLIKELY(! cur_len)) {
951 break;
952 }
953 x += cur_len;
954 outlen++;
5ff889fb 955 }
5ff889fb 956
a0d7f935
KW
957 if (el)
958 *el = outlen;
5ff889fb 959
a0d7f935
KW
960 if (ep) {
961 *ep = x;
962 }
5ff889fb 963
a0d7f935 964 return (x == send);
33756530 965 }
5ff889fb
KW
966}
967
968/*
9f2abfde
KW
969
970=for apidoc is_strict_utf8_string_loc
971
972Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
973case of "utf8ness failure") or the location C<s>+C<len> (in the case of
974"utf8ness success") in the C<ep> pointer.
975
976See also C<L</is_strict_utf8_string_loclen>>.
977
978=cut
979*/
980
981#define is_strict_utf8_string_loc(s, len, ep) \
982 is_strict_utf8_string_loclen(s, len, ep, 0)
983
984/*
985
986=for apidoc is_strict_utf8_string_loclen
987
988Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
989case of "utf8ness failure") or the location C<s>+C<len> (in the case of
990"utf8ness success") in the C<ep> pointer, and the number of UTF-8
991encoded characters in the C<el> pointer.
992
993See also C<L</is_strict_utf8_string_loc>>.
994
995=cut
996*/
997
998PERL_STATIC_INLINE bool
33756530 999S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1000{
33756530 1001 const U8 * first_variant;
9f2abfde
KW
1002
1003 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1004
33756530
KW
1005 if (len == 0) {
1006 len = strlen((const char *) s);
1007 }
1008
1009 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1010 if (el)
1011 *el = len;
1012
1013 if (ep) {
1014 *ep = s + len;
1015 }
1016
1017 return TRUE;
1018 }
1019
1020 {
1021 const U8* const send = s + len;
1022 const U8* x = first_variant;
1023 STRLEN outlen = first_variant - s;
1024
a0d7f935
KW
1025 while (x < send) {
1026 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1027 if (UNLIKELY(! cur_len)) {
1028 break;
1029 }
1030 x += cur_len;
1031 outlen++;
9f2abfde 1032 }
9f2abfde 1033
a0d7f935
KW
1034 if (el)
1035 *el = outlen;
9f2abfde 1036
a0d7f935
KW
1037 if (ep) {
1038 *ep = x;
1039 }
9f2abfde 1040
a0d7f935 1041 return (x == send);
33756530 1042 }
9f2abfde
KW
1043}
1044
1045/*
1046
1047=for apidoc is_c9strict_utf8_string_loc
1048
1049Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1050the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1051"utf8ness success") in the C<ep> pointer.
1052
1053See also C<L</is_c9strict_utf8_string_loclen>>.
1054
1055=cut
1056*/
1057
1058#define is_c9strict_utf8_string_loc(s, len, ep) \
1059 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1060
1061/*
1062
1063=for apidoc is_c9strict_utf8_string_loclen
1064
1065Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1066the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1067"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1068characters in the C<el> pointer.
1069
1070See also C<L</is_c9strict_utf8_string_loc>>.
1071
1072=cut
1073*/
1074
1075PERL_STATIC_INLINE bool
33756530 1076S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 1077{
33756530 1078 const U8 * first_variant;
9f2abfde
KW
1079
1080 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1081
33756530
KW
1082 if (len == 0) {
1083 len = strlen((const char *) s);
1084 }
1085
1086 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1087 if (el)
1088 *el = len;
1089
1090 if (ep) {
1091 *ep = s + len;
1092 }
1093
1094 return TRUE;
1095 }
1096
1097 {
1098 const U8* const send = s + len;
1099 const U8* x = first_variant;
1100 STRLEN outlen = first_variant - s;
1101
a0d7f935
KW
1102 while (x < send) {
1103 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1104 if (UNLIKELY(! cur_len)) {
1105 break;
1106 }
1107 x += cur_len;
1108 outlen++;
9f2abfde 1109 }
9f2abfde 1110
a0d7f935
KW
1111 if (el)
1112 *el = outlen;
9f2abfde 1113
a0d7f935
KW
1114 if (ep) {
1115 *ep = x;
1116 }
9f2abfde 1117
a0d7f935 1118 return (x == send);
33756530 1119 }
9f2abfde
KW
1120}
1121
1122/*
1123
1124=for apidoc is_utf8_string_loc_flags
1125
1126Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1127case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1128"utf8ness success") in the C<ep> pointer.
1129
1130See also C<L</is_utf8_string_loclen_flags>>.
1131
1132=cut
1133*/
1134
1135#define is_utf8_string_loc_flags(s, len, ep, flags) \
1136 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1137
1138
1139/* The above 3 actual functions could have been moved into the more general one
1140 * just below, and made #defines that call it with the right 'flags'. They are
1141 * currently kept separate to increase their chances of getting inlined */
1142
1143/*
1144
1145=for apidoc is_utf8_string_loclen_flags
1146
1147Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1148case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1149"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1150encoded characters in the C<el> pointer.
1151
1152See also C<L</is_utf8_string_loc_flags>>.
1153
1154=cut
1155*/
1156
1157PERL_STATIC_INLINE bool
f60f61fd 1158S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1159{
33756530 1160 const U8 * first_variant;
9f2abfde
KW
1161
1162 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1163 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1164 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1165
f60f61fd 1166 if (len == 0) {
a0d7f935 1167 len = strlen((const char *) s);
f60f61fd
KW
1168 }
1169
9f2abfde
KW
1170 if (flags == 0) {
1171 return is_utf8_string_loclen(s, len, ep, el);
1172 }
1173
d044b7a7 1174 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1175 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1176 {
1177 return is_strict_utf8_string_loclen(s, len, ep, el);
1178 }
1179
d044b7a7 1180 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1181 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1182 {
1183 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1184 }
1185
33756530
KW
1186 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1187 if (el)
1188 *el = len;
1189
1190 if (ep) {
1191 *ep = s + len;
1192 }
1193
1194 return TRUE;
1195 }
1196
1197 {
1198 const U8* send = s + len;
1199 const U8* x = first_variant;
1200 STRLEN outlen = first_variant - s;
1201
a0d7f935
KW
1202 while (x < send) {
1203 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1204 if (UNLIKELY(! cur_len)) {
1205 break;
1206 }
1207 x += cur_len;
1208 outlen++;
9f2abfde 1209 }
9f2abfde 1210
a0d7f935
KW
1211 if (el)
1212 *el = outlen;
9f2abfde 1213
a0d7f935
KW
1214 if (ep) {
1215 *ep = x;
1216 }
9f2abfde 1217
a0d7f935 1218 return (x == send);
33756530 1219 }
9f2abfde
KW
1220}
1221
1222/*
7c93d8f0
KW
1223=for apidoc utf8_distance
1224
1225Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1226and C<b>.
1227
1228WARNING: use only if you *know* that the pointers point inside the
1229same UTF-8 buffer.
1230
1231=cut
1232*/
1233
1234PERL_STATIC_INLINE IV
1235Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1236{
1237 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1238
1239 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1240}
1241
1242/*
1243=for apidoc utf8_hop
1244
1245Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1246forward or backward.
1247
1248WARNING: do not use the following unless you *know* C<off> is within
1249the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1250on the first byte of character or just after the last byte of a character.
1251
1252=cut
1253*/
1254
1255PERL_STATIC_INLINE U8 *
1256Perl_utf8_hop(const U8 *s, SSize_t off)
1257{
1258 PERL_ARGS_ASSERT_UTF8_HOP;
1259
1260 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1261 * the bitops (especially ~) can create illegal UTF-8.
1262 * In other words: in Perl UTF-8 is not just for Unicode. */
1263
1264 if (off >= 0) {
1265 while (off--)
1266 s += UTF8SKIP(s);
1267 }
1268 else {
1269 while (off++) {
1270 s--;
1271 while (UTF8_IS_CONTINUATION(*s))
1272 s--;
1273 }
1274 }
7347ee54 1275 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1276 return (U8 *)s;
7347ee54 1277 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1278}
1279
4dab108f 1280/*
65df57a8
TC
1281=for apidoc utf8_hop_forward
1282
1283Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1284forward.
1285
1286C<off> must be non-negative.
1287
1288C<s> must be before or equal to C<end>.
1289
1290When moving forward it will not move beyond C<end>.
1291
1292Will not exceed this limit even if the string is not valid "UTF-8".
1293
1294=cut
1295*/
1296
1297PERL_STATIC_INLINE U8 *
1298Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1299{
1300 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1301
1302 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1303 * the bitops (especially ~) can create illegal UTF-8.
1304 * In other words: in Perl UTF-8 is not just for Unicode. */
1305
1306 assert(s <= end);
1307 assert(off >= 0);
1308
1309 while (off--) {
1310 STRLEN skip = UTF8SKIP(s);
de979548 1311 if ((STRLEN)(end - s) <= skip) {
7347ee54 1312 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1313 return (U8 *)end;
7347ee54 1314 GCC_DIAG_RESTORE_STMT;
de979548 1315 }
65df57a8
TC
1316 s += skip;
1317 }
1318
7347ee54 1319 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1320 return (U8 *)s;
7347ee54 1321 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1322}
1323
1324/*
1325=for apidoc utf8_hop_back
1326
1327Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1328backward.
1329
1330C<off> must be non-positive.
1331
1332C<s> must be after or equal to C<start>.
1333
1334When moving backward it will not move before C<start>.
1335
1336Will not exceed this limit even if the string is not valid "UTF-8".
1337
1338=cut
1339*/
1340
1341PERL_STATIC_INLINE U8 *
1342Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1343{
1344 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1345
1346 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1347 * the bitops (especially ~) can create illegal UTF-8.
1348 * In other words: in Perl UTF-8 is not just for Unicode. */
1349
1350 assert(start <= s);
1351 assert(off <= 0);
1352
1353 while (off++ && s > start) {
1354 s--;
1355 while (UTF8_IS_CONTINUATION(*s) && s > start)
1356 s--;
1357 }
1358
7347ee54 1359 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1360 return (U8 *)s;
7347ee54 1361 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1362}
1363
1364/*
1365=for apidoc utf8_hop_safe
1366
1367Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1368either forward or backward.
1369
1370When moving backward it will not move before C<start>.
1371
1372When moving forward it will not move beyond C<end>.
1373
1374Will not exceed those limits even if the string is not valid "UTF-8".
1375
1376=cut
1377*/
1378
1379PERL_STATIC_INLINE U8 *
1380Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1381{
1382 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1383
1384 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1385 * the bitops (especially ~) can create illegal UTF-8.
1386 * In other words: in Perl UTF-8 is not just for Unicode. */
1387
1388 assert(start <= s && s <= end);
1389
1390 if (off >= 0) {
1391 return utf8_hop_forward(s, off, end);
1392 }
1393 else {
1394 return utf8_hop_back(s, off, start);
1395 }
1396}
1397
1398/*
4dab108f
KW
1399
1400=for apidoc is_utf8_valid_partial_char
1401
6cbb9248
KW
1402Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1403S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1404points. Otherwise, it returns 1 if there exists at least one non-empty
1405sequence of bytes that when appended to sequence C<s>, starting at position
1406C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1407otherwise returns 0.
1408
1409In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1410point.
1411
1412This is useful when a fixed-length buffer is being tested for being well-formed
1413UTF-8, but the final few bytes in it don't comprise a full character; that is,
1414it is split somewhere in the middle of the final code point's UTF-8
1415representation. (Presumably when the buffer is refreshed with the next chunk
1416of data, the new first bytes will complete the partial code point.) This
1417function is used to verify that the final bytes in the current buffer are in
1418fact the legal beginning of some code point, so that if they aren't, the
1419failure can be signalled without having to wait for the next read.
4dab108f
KW
1420
1421=cut
1422*/
2717076a
KW
1423#define is_utf8_valid_partial_char(s, e) \
1424 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1425
1426/*
1427
1428=for apidoc is_utf8_valid_partial_char_flags
1429
1430Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1431or not the input is a valid UTF-8 encoded partial character, but it takes an
1432extra parameter, C<flags>, which can further restrict which code points are
1433considered valid.
1434
1435If C<flags> is 0, this behaves identically to
1436C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1437of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1438there is any sequence of bytes that can complete the input partial character in
1439such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1440TRUE; otherwise FALSE. Non character code points cannot be determined based on
1441partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1442determined from just the first one or two bytes.
1443
1444=cut
1445 */
1446
56e4cf64 1447PERL_STATIC_INLINE bool
f1c999a7 1448S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1449{
f1c999a7 1450 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1451
f1c999a7 1452 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1453 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1454
8875bd48 1455 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1456 return FALSE;
1457 }
1458
f1c999a7 1459 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1460}
1461
8bc127bf
KW
1462/*
1463
1464=for apidoc is_utf8_fixed_width_buf_flags
1465
1466Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1467is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1468otherwise it returns FALSE.
1469
1470If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1471without restriction. If the final few bytes of the buffer do not form a
1472complete code point, this will return TRUE anyway, provided that
1473C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1474
1475If C<flags> in non-zero, it can be any combination of the
1476C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1477same meanings.
1478
1479This function differs from C<L</is_utf8_string_flags>> only in that the latter
1480returns FALSE if the final few bytes of the string don't form a complete code
1481point.
1482
1483=cut
1484 */
1485#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1486 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1487
1488/*
1489
1490=for apidoc is_utf8_fixed_width_buf_loc_flags
1491
1492Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1493failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1494to the beginning of any partial character at the end of the buffer; if there is
1495no partial character C<*ep> will contain C<s>+C<len>.
1496
1497See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1498
1499=cut
1500*/
1501
1502#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1503 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1504
1505/*
1506
1507=for apidoc is_utf8_fixed_width_buf_loclen_flags
1508
1509Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1510complete, valid characters found in the C<el> pointer.
1511
1512=cut
1513*/
1514
1515PERL_STATIC_INLINE bool
1516S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1517 STRLEN len,
8bc127bf
KW
1518 const U8 **ep,
1519 STRLEN *el,
1520 const U32 flags)
1521{
1522 const U8 * maybe_partial;
1523
1524 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1525
1526 if (! ep) {
1527 ep = &maybe_partial;
1528 }
1529
1530 /* If it's entirely valid, return that; otherwise see if the only error is
1531 * that the final few bytes are for a partial character */
1532 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1533 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1534}
1535
c8028aa6
TC
1536/* ------------------------------- perl.h ----------------------------- */
1537
1538/*
dcccc8ff
KW
1539=head1 Miscellaneous Functions
1540
41188aa0 1541=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1542
6602b933 1543Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1544If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1545
1546Return TRUE if the name is safe.
1547
796b6530 1548Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1549
1550=cut
1551*/
1552
1553PERL_STATIC_INLINE bool
41188aa0 1554S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1555 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1556 * perl itself uses xce*() functions which accept 8-bit strings.
1557 */
1558
1559 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1560
6c4650b3 1561 if (len > 1) {
c8028aa6 1562 char *null_at;
41188aa0 1563 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1564 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1565 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1566 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1567 what, op_name, pv, null_at+1);
c8028aa6
TC
1568 return FALSE;
1569 }
1570 }
1571
1572 return TRUE;
1573}
1574
1575/*
7cb3f959
TC
1576
1577Return true if the supplied filename has a newline character
fa6c7d00 1578immediately before the first (hopefully only) NUL.
7cb3f959
TC
1579
1580My original look at this incorrectly used the len from SvPV(), but
1581that's incorrect, since we allow for a NUL in pv[len-1].
1582
1583So instead, strlen() and work from there.
1584
1585This allow for the user reading a filename, forgetting to chomp it,
1586then calling:
1587
1588 open my $foo, "$file\0";
1589
1590*/
1591
1592#ifdef PERL_CORE
1593
1594PERL_STATIC_INLINE bool
1595S_should_warn_nl(const char *pv) {
1596 STRLEN len;
1597
1598 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1599
1600 len = strlen(pv);
1601
1602 return len > 0 && pv[len-1] == '\n';
1603}
1604
1605#endif
1606
81d52ecd
JH
1607/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1608
1609#define MAX_CHARSET_NAME_LENGTH 2
1610
1611PERL_STATIC_INLINE const char *
1612get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1613{
1614 /* Returns a string that corresponds to the name of the regex character set
1615 * given by 'flags', and *lenp is set the length of that string, which
1616 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1617
1618 *lenp = 1;
1619 switch (get_regex_charset(flags)) {
1620 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1621 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1622 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1623 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1624 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1625 *lenp = 2;
1626 return ASCII_MORE_RESTRICT_PAT_MODS;
1627 }
1628 /* The NOT_REACHED; hides an assert() which has a rather complex
1629 * definition in perl.h. */
1630 NOT_REACHED; /* NOTREACHED */
1631 return "?"; /* Unknown */
1632}
1633
7cb3f959 1634/*
ed382232
TC
1635
1636Return false if any get magic is on the SV other than taint magic.
1637
1638*/
1639
1640PERL_STATIC_INLINE bool
1641S_sv_only_taint_gmagic(SV *sv) {
1642 MAGIC *mg = SvMAGIC(sv);
1643
1644 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1645
1646 while (mg) {
1647 if (mg->mg_type != PERL_MAGIC_taint
1648 && !(mg->mg_flags & MGf_GSKIP)
1649 && mg->mg_virtual->svt_get) {
1650 return FALSE;
1651 }
1652 mg = mg->mg_moremagic;
1653 }
1654
1655 return TRUE;
1656}
1657
ed8ff0f3
DM
1658/* ------------------ cop.h ------------------------------------------- */
1659
1660
1661/* Enter a block. Push a new base context and return its address. */
1662
1663PERL_STATIC_INLINE PERL_CONTEXT *
1664S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1665{
1666 PERL_CONTEXT * cx;
1667
1668 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1669
1670 CXINC;
1671 cx = CX_CUR();
1672 cx->cx_type = type;
1673 cx->blk_gimme = gimme;
1674 cx->blk_oldsaveix = saveix;
4caf7d8c 1675 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1676 cx->blk_oldcop = PL_curcop;
4caf7d8c 1677 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1678 cx->blk_oldscopesp = PL_scopestack_ix;
1679 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1680 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1681
1682 PL_tmps_floor = PL_tmps_ix;
1683 CX_DEBUG(cx, "PUSH");
1684 return cx;
1685}
1686
1687
1688/* Exit a block (RETURN and LAST). */
1689
1690PERL_STATIC_INLINE void
1691S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1692{
1693 PERL_ARGS_ASSERT_CX_POPBLOCK;
1694
1695 CX_DEBUG(cx, "POP");
1696 /* these 3 are common to cx_popblock and cx_topblock */
1697 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1698 PL_scopestack_ix = cx->blk_oldscopesp;
1699 PL_curpm = cx->blk_oldpm;
1700
1701 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1702 * and leaves a CX entry lying around for repeated use, so
1703 * skip for multicall */ \
1704 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1705 || PL_savestack_ix == cx->blk_oldsaveix);
1706 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1707 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1708}
1709
1710/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1711 * Whereas cx_popblock() restores the state to the point just before
1712 * cx_pushblock() was called, cx_topblock() restores it to the point just
1713 * *after* cx_pushblock() was called. */
1714
1715PERL_STATIC_INLINE void
1716S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1717{
1718 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1719
1720 CX_DEBUG(cx, "TOP");
1721 /* these 3 are common to cx_popblock and cx_topblock */
1722 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1723 PL_scopestack_ix = cx->blk_oldscopesp;
1724 PL_curpm = cx->blk_oldpm;
1725
1726 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1727}
1728
1729
a73d8813
DM
1730PERL_STATIC_INLINE void
1731S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1732{
1733 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1734
1735 PERL_ARGS_ASSERT_CX_PUSHSUB;
1736
3f6bd23a 1737 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1738 cx->blk_sub.cv = cv;
1739 cx->blk_sub.olddepth = CvDEPTH(cv);
1740 cx->blk_sub.prevcomppad = PL_comppad;
1741 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1742 cx->blk_sub.retop = retop;
1743 SvREFCNT_inc_simple_void_NN(cv);
1744 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1745}
1746
1747
1748/* subsets of cx_popsub() */
1749
1750PERL_STATIC_INLINE void
1751S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1752{
1753 CV *cv;
1754
1755 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1756 assert(CxTYPE(cx) == CXt_SUB);
1757
1758 PL_comppad = cx->blk_sub.prevcomppad;
1759 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1760 cv = cx->blk_sub.cv;
1761 CvDEPTH(cv) = cx->blk_sub.olddepth;
1762 cx->blk_sub.cv = NULL;
1763 SvREFCNT_dec(cv);
1764}
1765
1766
1767/* handle the @_ part of leaving a sub */
1768
1769PERL_STATIC_INLINE void
1770S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1771{
1772 AV *av;
1773
1774 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1775 assert(CxTYPE(cx) == CXt_SUB);
1776 assert(AvARRAY(MUTABLE_AV(
1777 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1778 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1779
1780 CX_POP_SAVEARRAY(cx);
1781 av = MUTABLE_AV(PAD_SVl(0));
1782 if (UNLIKELY(AvREAL(av)))
1783 /* abandon @_ if it got reified */
1784 clear_defarray(av, 0);
1785 else {
1786 CLEAR_ARGARRAY(av);
1787 }
1788}
1789
1790
1791PERL_STATIC_INLINE void
1792S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1793{
1794 PERL_ARGS_ASSERT_CX_POPSUB;
1795 assert(CxTYPE(cx) == CXt_SUB);
1796
3f6bd23a 1797 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1798
1799 if (CxHASARGS(cx))
1800 cx_popsub_args(cx);
1801 cx_popsub_common(cx);
1802}
1803
1804
6a7d52cc
DM
1805PERL_STATIC_INLINE void
1806S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1807{
1808 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1809
1810 cx->blk_format.cv = cv;
1811 cx->blk_format.retop = retop;
1812 cx->blk_format.gv = gv;
1813 cx->blk_format.dfoutgv = PL_defoutgv;
1814 cx->blk_format.prevcomppad = PL_comppad;
1815 cx->blk_u16 = 0;
1816
1817 SvREFCNT_inc_simple_void_NN(cv);
1818 CvDEPTH(cv)++;
1819 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1820}
1821
1822
1823PERL_STATIC_INLINE void
1824S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1825{
1826 CV *cv;
1827 GV *dfout;
1828
1829 PERL_ARGS_ASSERT_CX_POPFORMAT;
1830 assert(CxTYPE(cx) == CXt_FORMAT);
1831
1832 dfout = cx->blk_format.dfoutgv;
1833 setdefout(dfout);
1834 cx->blk_format.dfoutgv = NULL;
1835 SvREFCNT_dec_NN(dfout);
1836
1837 PL_comppad = cx->blk_format.prevcomppad;
1838 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1839 cv = cx->blk_format.cv;
1840 cx->blk_format.cv = NULL;
1841 --CvDEPTH(cv);
1842 SvREFCNT_dec_NN(cv);
1843}
1844
1845
13febba5
DM
1846PERL_STATIC_INLINE void
1847S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1848{
1849 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1850
1851 cx->blk_eval.retop = retop;
1852 cx->blk_eval.old_namesv = namesv;
1853 cx->blk_eval.old_eval_root = PL_eval_root;
1854 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1855 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1856 cx->blk_eval.cur_top_env = PL_top_env;
1857
4c57ced5 1858 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1859 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1860 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1861}
1862
1863
1864PERL_STATIC_INLINE void
1865S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1866{
1867 SV *sv;
1868
1869 PERL_ARGS_ASSERT_CX_POPEVAL;
1870 assert(CxTYPE(cx) == CXt_EVAL);
1871
1872 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1873 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1874 PL_eval_root = cx->blk_eval.old_eval_root;
1875 sv = cx->blk_eval.cur_text;
4c57ced5 1876 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1877 cx->blk_eval.cur_text = NULL;
1878 SvREFCNT_dec_NN(sv);
1879 }
1880
1881 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1882 if (sv) {
1883 cx->blk_eval.old_namesv = NULL;
1884 SvREFCNT_dec_NN(sv);
1885 }
13febba5 1886}
6a7d52cc 1887
a73d8813 1888
d1b6bf72
DM
1889/* push a plain loop, i.e.
1890 * { block }
1891 * while (cond) { block }
1892 * for (init;cond;continue) { block }
1893 * This loop can be last/redo'ed etc.
1894 */
1895
1896PERL_STATIC_INLINE void
1897S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1898{
1899 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1900 cx->blk_loop.my_op = cLOOP;
1901}
1902
1903
1904/* push a true for loop, i.e.
1905 * for var (list) { block }
1906 */
1907
1908PERL_STATIC_INLINE void
1909S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1910{
1911 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1912
1913 /* this one line is common with cx_pushloop_plain */
1914 cx->blk_loop.my_op = cLOOP;
1915
1916 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1917 cx->blk_loop.itersave = itersave;
1918#ifdef USE_ITHREADS
1919 cx->blk_loop.oldcomppad = PL_comppad;
1920#endif
1921}
1922
1923
1924/* pop all loop types, including plain */
1925
1926PERL_STATIC_INLINE void
1927S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1928{
1929 PERL_ARGS_ASSERT_CX_POPLOOP;
1930
1931 assert(CxTYPE_is_LOOP(cx));
1932 if ( CxTYPE(cx) == CXt_LOOP_ARY
1933 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1934 {
1935 /* Free ary or cur. This assumes that state_u.ary.ary
1936 * aligns with state_u.lazysv.cur. See cx_dup() */
1937 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1938 cx->blk_loop.state_u.lazysv.cur = NULL;
1939 SvREFCNT_dec_NN(sv);
1940 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1941 sv = cx->blk_loop.state_u.lazysv.end;
1942 cx->blk_loop.state_u.lazysv.end = NULL;
1943 SvREFCNT_dec_NN(sv);
1944 }
1945 }
1946 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1947 SV *cursv;
1948 SV **svp = (cx)->blk_loop.itervar_u.svp;
1949 if ((cx->cx_type & CXp_FOR_GV))
1950 svp = &GvSV((GV*)svp);
1951 cursv = *svp;
1952 *svp = cx->blk_loop.itersave;
1953 cx->blk_loop.itersave = NULL;
1954 SvREFCNT_dec(cursv);
1955 }
1956}
1957
2a7b7c61
DM
1958
1959PERL_STATIC_INLINE void
7896dde7 1960S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1961{
7896dde7 1962 PERL_ARGS_ASSERT_CX_PUSHWHEN;
2a7b7c61 1963
7896dde7 1964 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1965}
1966
1967
1968PERL_STATIC_INLINE void
7896dde7 1969S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1970{
7896dde7
Z
1971 PERL_ARGS_ASSERT_CX_POPWHEN;
1972 assert(CxTYPE(cx) == CXt_WHEN);
2a7b7c61
DM
1973
1974 PERL_UNUSED_ARG(cx);
59a14f30 1975 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1976 /* currently NOOP */
1977}
1978
1979
7896dde7
Z
1980PERL_STATIC_INLINE void
1981S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1982{
1983 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1984
1985 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1986 cx->blk_givwhen.defsv_save = orig_defsv;
1987}
1988
1989
1990PERL_STATIC_INLINE void
1991S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1992{
1993 SV *sv;
1994
1995 PERL_ARGS_ASSERT_CX_POPGIVEN;
1996 assert(CxTYPE(cx) == CXt_GIVEN);
1997
1998 sv = GvSV(PL_defgv);
1999 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2000 cx->blk_givwhen.defsv_save = NULL;
2001 SvREFCNT_dec(sv);
2002}
2003
ec2c235b
KW
2004/* ------------------ util.h ------------------------------------------- */
2005
2006/*
2007=head1 Miscellaneous Functions
2008
2009=for apidoc foldEQ
2010
2011Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2012same
2013case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2014match themselves and their opposite case counterparts. Non-cased and non-ASCII
2015range bytes match only themselves.
2016
2017=cut
2018*/
2019
2020PERL_STATIC_INLINE I32
2021Perl_foldEQ(const char *s1, const char *s2, I32 len)
2022{
2023 const U8 *a = (const U8 *)s1;
2024 const U8 *b = (const U8 *)s2;
2025
2026 PERL_ARGS_ASSERT_FOLDEQ;
2027
2028 assert(len >= 0);
2029
2030 while (len--) {
2031 if (*a != *b && *a != PL_fold[*b])
2032 return 0;
2033 a++,b++;
2034 }
2035 return 1;
2036}
2037
0f9cb40c 2038PERL_STATIC_INLINE I32
ec2c235b
KW
2039Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2040{
2041 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2042 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2043 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2044 * does it check that the strings each have at least 'len' characters */
2045
2046 const U8 *a = (const U8 *)s1;
2047 const U8 *b = (const U8 *)s2;
2048
2049 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2050
2051 assert(len >= 0);
2052
2053 while (len--) {
2054 if (*a != *b && *a != PL_fold_latin1[*b]) {
2055 return 0;
2056 }
2057 a++, b++;
2058 }
2059 return 1;
2060}
2061
2062/*
2063=for apidoc foldEQ_locale
2064
2065Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2066same case-insensitively in the current locale; false otherwise.
2067
2068=cut
2069*/
2070
0f9cb40c 2071PERL_STATIC_INLINE I32
ec2c235b
KW
2072Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2073{
2074 dVAR;
2075 const U8 *a = (const U8 *)s1;
2076 const U8 *b = (const U8 *)s2;
2077
2078 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2079
2080 assert(len >= 0);
2081
2082 while (len--) {
2083 if (*a != *b && *a != PL_fold_locale[*b])
2084 return 0;
2085 a++,b++;
2086 }
2087 return 1;
2088}
2089
6dba01e2
KW
2090#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2091
2092PERL_STATIC_INLINE void *
2093S_my_memrchr(const char * s, const char c, const STRLEN len)
2094{
2095 /* memrchr(), since many platforms lack it */
2096
2097 const char * t = s + len - 1;
2098
2099 PERL_ARGS_ASSERT_MY_MEMRCHR;
2100
2101 while (t >= s) {
2102 if (*t == c) {
2103 return (void *) t;
2104 }
2105 t--;
2106 }
2107
2108 return NULL;
2109}
2110
2111#endif
2112
ed382232 2113/*
c8028aa6
TC
2114 * ex: set ts=8 sts=4 sw=4 et:
2115 */